├── checkpoints
└── README.md
├── models
├── __init__.py
├── net_config.py
├── utils.py
├── msgc_densenet.py
├── msgc_mobilenetv2.py
└── msgc_resnet.py
├── LICENSE
├── .gitignore
├── README.md
├── scripts.sh
├── utils.py
├── main_dist.py
└── logs
├── msgc_resnet18_att_log.txt
├── msgc_condensenet_noatt_log.txt
└── msgc_resnet18_noatt_log.txt
/checkpoints/README.md:
--------------------------------------------------------------------------------
1 |
2 | Please download the checkpoints in this folder.
3 |
4 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .msgc_mobilenetv2 import msgc_mobilenetv2
3 | from .msgc_resnet import msgc_resnet18, msgc_resnet50
4 | from .msgc_densenet import msgc_condensenet
5 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | It is just for research purpose, and commercial use should be contacted with authors first.
2 |
3 | Copyright (c) 2023 Zhuo Su
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # result files
104 | .DS_Store
105 | *.swp
106 | matlab*
107 | zhuo_*
108 |
--------------------------------------------------------------------------------
/models/net_config.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import unicode_literals
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | class Config_densenet:
7 | def __init__(self, args):
8 |
9 | self.bottleneck = 4
10 | self.stages = [4, 6, 8, 10, 8]
11 | self.growth = [8, 16, 32, 64, 128]
12 |
13 | self.heads = args.heads
14 | self.num_classes = args.num_classes
15 | self.eps = 2.0 / 3
16 | self.bias = 2.0
17 | self.input_size = None
18 | self.attention = args.attention
19 | if 'imagenet' in args.data:
20 | self.input_size = (224, 224)
21 | elif 'cifar' in args.data:
22 | self.input_size = (32, 32)
23 | self.data = args.data
24 |
25 | class Config_mobilenetv2:
26 | def __init__(self, args):
27 |
28 | self.heads = args.heads
29 | self.num_classes = args.num_classes
30 | self.width_mult = args.width_mul
31 | self.eps = 2.0 / 3
32 | self.bias = 2.0
33 | self.input_size = None
34 | self.attention = args.attention
35 | if 'imagenet' in args.data:
36 | self.input_size = (224, 224)
37 | elif 'cifar' in args.data:
38 | self.input_size = (32, 32)
39 | self.data = args.data
40 |
41 | class Config_resnet:
42 | def __init__(self, args):
43 |
44 | self.heads = args.heads
45 | self.num_classes = args.num_classes
46 | self.eps = 2.0 / 3
47 | self.bias = 2.0
48 | self.input_size = None
49 | self.attention = args.attention
50 | if 'imagenet' in args.data:
51 | self.input_size = (224, 224)
52 | elif 'cifar' in args.data:
53 | self.input_size = (32, 32)
54 | self.data = args.data
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Middle Spectrum Grouped Convolution (MSGC)
2 |
3 | This repository contains the PyTorch implementation for
4 | [Boosting Convolutional Neural Networks with Middle Spectrum Grouped Convolution](https://arxiv.org/abs/2304.06305)
5 | by
6 | [Zhuo Su](https://zhuogege1943.com/homepage/),
7 | [Jiehua Zhang](https://scholar.google.com/citations?user=UIbAv3wAAAAJ&hl=en&oi=sra),
8 | Tianpeng Liu,
9 | Zhen Liu,
10 | Shuanghui Zhang,
11 | [Matti Pietikäinen](https://en.wikipedia.org/wiki/Matti_Pietik%C3%A4inen_(academic)),
12 | and [Li Liu](http://lilyliliu.com/) (corresponding author).
13 |
14 |
15 | ## Running environment
16 |
17 | Ubuntu 18.04 system + cuda 11.1 and cudnn 8.2.1 + Pytorch 1.9 + python 3.9
18 |
19 | *Other versions may also work~ :)*
20 |
21 |
22 | ## Performance
23 |
24 | The performances of MSGC equipped models (on ImageNet) are listed below. The checkpoints of our trained models can be downloaded at [link to our trained models](https://github.com/hellozhuo/msgc/releases/download/v1.0/trained_models_imagenet.zip). For evaluation,
25 | please unzip the checkpoints to folder [checkpoints](checkpoints).
26 | The evaluation scripts to reproduce the following results can be found in [scripts.sh](scripts.sh).
27 |
28 | | Model | Attention | Top-1 (%) | Top-5 (%) | MAC | Training script | Training log |
29 | |-------|-------|-------|-------|-----|-------------|-------------|
30 | | ResNet-18 | - | 69.76 | 89.08 | 1817 M | - | - |
31 | | ResNet-18 + MSGC | ✗ | 70.30 | 89.27 | 883 M | [script](https://github.com/hellozhuo/msgc/blob/092f46e4e115bfdcbc73546c309267996fa86dd2/scripts.sh#L32) | [log](logs/msgc_resnet18_noatt_log.txt) |
32 | | ResNet-18 + MSGC | ✓ | 71.51 | 90.21 | 885 M | [script](https://github.com/hellozhuo/msgc/blob/092f46e4e115bfdcbc73546c309267996fa86dd2/scripts.sh#L35) | [log](logs/msgc_resnet18_att_log.txt) |
33 | | ResNet-18 + MSGC | ✓ | 72.33 | 90.53 | 1630 M | [script](https://github.com/hellozhuo/msgc/blob/092f46e4e115bfdcbc73546c309267996fa86dd2/scripts.sh#L53) | [log](logs/msgc_resnet18_noatt_tau0_9_log.txt) |
34 | | | | | | | |
35 | | ResNet-50 | - | 76.13 | 92.86 | 4099 M | - | - |
36 | | ResNet-50 + MSGC | ✗ | 77.20 | 93.37 | 1886 M | [script](https://github.com/hellozhuo/msgc/blob/092f46e4e115bfdcbc73546c309267996fa86dd2/scripts.sh#L38) | [log](logs/msgc_resnet50_noatt_log.txt) |
37 | | ResNet-50 + MSGC | ✓ | 76.76 | 92.99 | 1892 M | [script](https://github.com/hellozhuo/msgc/blob/092f46e4e115bfdcbc73546c309267996fa86dd2/scripts.sh#L41) | [log](logs/msgc_resnet50_att_log.txt) |
38 | | | | | | | |
39 | | MobileNetV2 | - | 71.88 | 90.27 | 307 M | - | - |
40 | | MobileNetV2 + MSGC | ✗ | 72.10 | 90.41 | 198 M | [script](https://github.com/hellozhuo/msgc/blob/092f46e4e115bfdcbc73546c309267996fa86dd2/scripts.sh#L44) | [log](logs/msgc_mobilenetv2_noatt_log.txt) |
41 | | MobileNetV2 + MSGC | ✓ | 72.59 | 90.82 | 197 M | [script](https://github.com/hellozhuo/msgc/blob/092f46e4e115bfdcbc73546c309267996fa86dd2/scripts.sh#L47) | [log](logs/msgc_mobilenetv2_att_log.txt) |
42 | | | | | | | |
43 | | CondenseNet | - | 73.80 | 91.70 | 529 M | - | - |
44 | | CondenseNet + MSGC | ✗ | 74.81 | 92.17 | 523 M | [script](https://github.com/hellozhuo/msgc/blob/092f46e4e115bfdcbc73546c309267996fa86dd2/scripts.sh#L50) | [log](logs/msgc_condensenet_noatt_log.txt) |
45 |
46 | ## Training
47 |
48 | An example script for training on two gpus is:
49 | ```bash
50 | CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch \
51 | --master_port=12345 \
52 | --nproc_per_node=2 \
53 | main_dist.py \
54 | --model msgc_resnet18 \
55 | --attention \
56 | -j 16 \
57 | --data imagenet \
58 | --datadir /to/imagenet/dataset \
59 | --savedir ./results \
60 | --resume \
61 | --target 0.5
62 | ```
63 |
64 | The above script trains the MSGC equipped ResNet-18 architecture with a target MAC reduction of 50%.
65 | Other training scripts can be seen in [scripts.sh](scripts.sh).
66 |
67 | For more detailed illustraion of the training set up, please refer to [main\_dist.py](main_dist.py), or run:
68 | ```bash
69 | python main_dist.py -h
70 | ```
71 |
72 | ## Acknowledgement
73 |
74 | The coding is inspired by:
75 |
76 | - [Pixel Difference Convolution](https://github.com/zhuoinoulu/pidinet)
77 | - [Dynamic Grouped Convolution](https://github.com/hellozhuo/dgc)
78 | - [Detectron2](https://github.com/facebookresearch/detectron2)
79 |
80 |
--------------------------------------------------------------------------------
/models/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import unicode_literals
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import math
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 |
11 | def conv2d_out_dim(dim, kernel_size, padding=0, stride=1, dilation=1, ceil_mode=False):
12 | if ceil_mode:
13 | return int(math.ceil((dim + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1))
14 | else:
15 | return int(math.floor((dim + 2 * padding - dilation * (kernel_size - 1) - 1) / stride + 1))
16 |
17 | class GumbelSoftmax(nn.Module):
18 | '''
19 | gumbel softmax gate.
20 | '''
21 | def __init__(self, eps=1):
22 | super(GumbelSoftmax, self).__init__()
23 | self.eps = eps
24 | self.sigmoid = nn.Sigmoid()
25 |
26 | def gumbel_sample(self, template_tensor, eps=1e-8):
27 | uniform_samples_tensor = template_tensor.clone().uniform_()
28 | gumble_samples_tensor = torch.log(uniform_samples_tensor+eps)-torch.log(
29 | 1-uniform_samples_tensor+eps)
30 | return gumble_samples_tensor
31 |
32 | def gumbel_softmax(self, logits):
33 | """ Draw a sample from the Gumbel-Softmax distribution"""
34 | gsamples = self.gumbel_sample(logits.data)
35 | logits = logits + gsamples
36 | soft_samples = self.sigmoid(logits / self.eps)
37 | return soft_samples, logits
38 |
39 | def forward(self, logits):
40 | if not self.training:
41 | out_hard = (logits>=0).float()
42 | return out_hard
43 | out_soft, prob_soft = self.gumbel_softmax(logits)
44 | out_hard = ((out_soft >= 0.5).float() - out_soft).detach() + out_soft
45 | return out_hard
46 |
47 | class MaskGen(nn.Module):
48 | '''
49 | Decision Mask.
50 | '''
51 | def __init__(self, inplanes, outplanes, heads=4, eps=0.66667, bias=-1, squeeze_rate=4, pool=False):
52 | super(MaskGen, self).__init__()
53 | # Parameter
54 | self.bottleneck = inplanes // squeeze_rate
55 | self.inplanes, self.outplanes, self.heads = inplanes, outplanes, heads
56 |
57 | # channel attention
58 | self.avg_pool = nn.AdaptiveAvgPool2d(1) if pool else None
59 | self.conv = nn.Sequential(
60 | nn.Conv2d(inplanes, self.bottleneck, kernel_size=1, stride=1, bias=False),
61 | nn.BatchNorm2d(self.bottleneck),
62 | nn.ReLU(inplace=True),
63 | nn.Conv2d(self.bottleneck, outplanes * heads, kernel_size=1, stride=1, bias=bias>=0),
64 | )
65 | if bias>=0:
66 | nn.init.constant_(self.conv[3].bias, bias)
67 | # Gate
68 | self.gate = GumbelSoftmax(eps=eps)
69 |
70 | self.flops = (inplanes + 1) * self.bottleneck + self.bottleneck * outplanes * heads
71 |
72 | def forward(self, x):
73 | batch = x.size(0)
74 | if self.avg_pool is not None:
75 | context = self.avg_pool(x) # [N, C, 1, 1]
76 | else:
77 | context = x
78 | # transform
79 | mask = self.conv(context).view(batch, self.heads, self.outplanes) # [N, heads, C_out]
80 | # channel gate
81 | mask = self.gate(mask) # [N, heads, C_out]
82 |
83 | return mask
84 |
85 | class AttGen(nn.Module):
86 | '''
87 | Attention Maps
88 | '''
89 | def __init__(self, inplanes, outplanes, heads=4, squeeze_rate=4, pool=False):
90 | super(AttGen, self).__init__()
91 | # Parameter
92 | self.bottleneck = inplanes // squeeze_rate
93 | self.inplanes, self.outplanes, self.heads = inplanes, outplanes, heads
94 |
95 | # channel attention
96 | self.avg_pool = nn.AdaptiveAvgPool2d(1) if pool else None
97 | self.conv = nn.Sequential(
98 | nn.Conv2d(inplanes, self.bottleneck, kernel_size=1, stride=1, bias=False),
99 | nn.BatchNorm2d(self.bottleneck),
100 | nn.ReLU(inplace=True),
101 | nn.Conv2d(self.bottleneck, outplanes * heads, kernel_size=1, stride=1, bias=True),
102 | )
103 | nn.init.constant_(self.conv[3].weight, 0)
104 | nn.init.constant_(self.conv[3].bias, 1)
105 |
106 | self.flops = (inplanes + 1) * self.bottleneck + self.bottleneck * outplanes * heads
107 |
108 | def forward(self, x):
109 | batch = x.size(0)
110 | if self.avg_pool is not None:
111 | context = self.avg_pool(x) # [N, C, 1, 1]
112 | else:
113 | context = x
114 | # transform
115 | att = self.conv(context).view(batch, self.heads, self.outplanes) # [N, heads, C_out]
116 |
117 | return att
118 |
--------------------------------------------------------------------------------
/scripts.sh:
--------------------------------------------------------------------------------
1 |
2 | ## Evaluation
3 |
4 | # ResNet-18 with MSGC w/o attention (Top-1 = 70.3%, MAC = 883 M)
5 | #CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=1 main_dist.py --model msgc_resnet18 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --evaluate checkpoints/msgc_resnet18_noatt.pth
6 |
7 | # ResNet-18 with MSGC w/ attention (Top-1 = 71.5%, MAC = 885 M)
8 | #CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=1 main_dist.py --model msgc_resnet18 --attention --data imagenet --datadir /to/imagenet/dataset --savedir ./results --evaluate checkpoints/msgc_resnet18_att.pth
9 |
10 | # ResNet-50 with MSGC w/o attention (Top-1 = 77.2%, MAC = 1886 M)
11 | #CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=1 main_dist.py --model msgc_resnet50 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --evaluate checkpoints/msgc_resnet50_noatt.pth
12 |
13 | # ResNet-50 with MSGC w/ attention (Top-1 = 76.8%, MAC = 1892 M)
14 | #CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=1 main_dist.py --model msgc_resnet50 --attention --data imagenet --datadir /to/imagenet/dataset --savedir ./results --evaluate checkpoints/msgc_resnet50_att.pth
15 |
16 | # MobileNetV2 with MSGC w/o attention (Top-1 = 72.1%, MAC = 198 M)
17 | #CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=1 main_dist.py --model msgc_mobilenetv2 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --evaluate checkpoints/msgc_mobilenetv2_noatt.pth
18 |
19 | # MobileNetV2 with MSGC w/ attention (Top-1 = 72.6%, MAC = 197 M)
20 | #CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=1 main_dist.py --model msgc_mobilenetv2 --attention --data imagenet --datadir /to/imagenet/dataset --savedir ./results --evaluate checkpoints/msgc_mobilenetv2_att.pth
21 |
22 | # CondenseNet with MSGC w/o attention (Top-1 = 74.8%, MAC = 523 M)
23 | #CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=1 main_dist.py --model msgc_condensenet --data imagenet --datadir /to/imagenet/dataset --savedir ./results --evaluate checkpoints/msgc_condensenet.pth
24 |
25 | # ResNet-18 with MSGC w/o attention, \tau_{end} = 0.9 (Top-1 = 72.3%, MAC = 1631 M)
26 | #CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=1 main_dist.py --model msgc_resnet18 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --evaluate checkpoints/msgc_resnet18_noatt_tau0.9.pth
27 |
28 |
29 | ## Training
30 |
31 | # ResNet-18 with MSGC w/o attention
32 | #CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=2 main_dist.py --model msgc_resnet18 -j 16 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --resume --target 0.49
33 |
34 | # ResNet-18 with MSGC w/ attention
35 | #CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=2 main_dist.py --model msgc_resnet18 --attention -j 16 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --resume --target 0.488
36 |
37 | # ResNet-50 with MSGC w/o attention
38 | #CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=2 main_dist.py --model msgc_resnet50 -j 16 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --resume --target 0.457
39 |
40 | # ResNet-50 with MSGC w/ attention
41 | #CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=2 main_dist.py --model msgc_resnet50 --attention -j 16 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --resume --target 0.457
42 |
43 | # MobileNetV2 with MSGC w/o attention
44 | #CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=2 main_dist.py --model msgc_mobilenetv2 -j 16 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --epochs 150 --resume --target 0.65
45 |
46 | # MobileNetV2 with MSGC w/ attention
47 | #CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=2 main_dist.py --model msgc_mobilenetv2 --attention -j 16 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --epochs 150 --resume --target 0.628
48 |
49 | # CondenseNet with MSGC w/o attention
50 | #CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=2 main_dist.py --model msgc_condensenet -j 16 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --resume --target 0.258
51 |
52 | # ResNet-18 with MSGC w/o attention, \tau_{end} = 0.9
53 | #CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --master_port=12345 --nproc_per_node=2 main_dist.py --model msgc_resnet18 -j 16 --data imagenet --datadir /to/imagenet/dataset --savedir ./results --resume --target 0.9
54 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import unicode_literals
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import os
7 | import shutil
8 | import math
9 | import time
10 |
11 | import torch
12 | import torch.nn as nn
13 |
14 |
15 | ######################################
16 | # basic functions #
17 | ######################################
18 |
19 |
20 | class CrossEntropyLabelSmooth(nn.Module):
21 | """
22 | label smooth
23 | """
24 | def __init__(self, num_classes, epsilon):
25 | super(CrossEntropyLabelSmooth, self).__init__()
26 | self.num_classes = num_classes
27 | self.epsilon = epsilon
28 | self.logsoftmax = nn.LogSoftmax(dim=1)
29 |
30 | def forward(self, inputs, targets):
31 | log_probs = self.logsoftmax(inputs)
32 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
33 | targets = (1 - self.epsilon) * targets + self.epsilon / self.num_classes
34 | loss = (-targets * log_probs).mean(0).sum()
35 | return loss
36 |
37 |
38 | def load_checkpoint(args, running_file):
39 |
40 | model_dir = os.path.join(args.savedir, 'save_models')
41 | latest_filename = os.path.join(model_dir, 'latest.txt')
42 | model_filename = ''
43 |
44 | if args.evaluate is not None:
45 | model_filename = args.evaluate
46 | elif args.resume_from is not None:
47 | model_filename = args.resume_from
48 | elif os.path.exists(latest_filename):
49 | with open(latest_filename, 'r') as fin:
50 | model_filename = fin.readlines()[0].strip()
51 | loadinfo = "=> loading checkpoint from '{}'".format(model_filename)
52 | print(loadinfo)
53 |
54 | state = None
55 | if os.path.exists(model_filename):
56 | state = torch.load(model_filename, map_location='cpu')
57 | loadinfo2 = "=> loaded checkpoint '{}' successfully".format(model_filename)
58 | else:
59 | loadinfo2 = "no checkpoint loaded"
60 | print(loadinfo2)
61 | running_file.write('%s\n%s\n' % (loadinfo, loadinfo2))
62 | running_file.flush()
63 |
64 | return state
65 |
66 |
67 | def save_checkpoint(state, epoch, root, is_best, saveID, keep_freq=10):
68 |
69 | filename = 'checkpoint_%03d.pth.tar' % epoch
70 | model_dir = os.path.join(root, 'save_models')
71 | model_filename = os.path.join(model_dir, filename)
72 | latest_filename = os.path.join(model_dir, 'latest.txt')
73 |
74 | if not os.path.exists(model_dir):
75 | os.makedirs(model_dir)
76 |
77 | # write new checkpoint
78 | torch.save(state, model_filename)
79 | with open(latest_filename, 'w') as fout:
80 | fout.write(model_filename)
81 | print("=> saved checkpoint '{}'".format(model_filename))
82 |
83 | # update best model
84 | if is_best:
85 | best_filename = os.path.join(model_dir, 'model_best.pth.tar')
86 | shutil.copyfile(model_filename, best_filename)
87 |
88 | # remove old model
89 | if saveID is not None and (saveID + 1) % keep_freq != 0:
90 | filename = 'checkpoint_%03d.pth.tar' % saveID
91 | model_filename = os.path.join(model_dir, filename)
92 | if os.path.exists(model_filename):
93 | os.remove(model_filename)
94 | print('=> removed checkpoint %s' % model_filename)
95 |
96 | print('##########Time##########', time.strftime('%Y-%m-%d %H:%M:%S'))
97 | return epoch
98 |
99 |
100 | class AverageMeter(object):
101 | """Computes and stores the average and current value"""
102 | def __init__(self, accum='mean'):
103 | self.reset()
104 | self.accum = accum
105 |
106 | def reset(self):
107 | self.val = 0
108 | self.avg = 0
109 | self.sum = 0
110 | self.count = 0
111 |
112 | def update(self, val, n=1):
113 | if self.accum == 'mean':
114 | self.sum += val * n
115 | self.val = val
116 | elif self.accum == 'sum':
117 | self.sum += val
118 | self.val = val / n
119 | self.count += n
120 | self.avg = self.sum / self.count
121 | self.avg100 = self.sum / self.count * 100
122 | self.val100 = self.val * 100
123 |
124 | def adjust_learning_rate(optimizer, epoch, args, method='cosine'):
125 | if method == 'cosine':
126 | T_total = float(args.epochs)
127 | T_cur = float(epoch)
128 | lr_multi = 0.5 * (1 + math.cos(math.pi * T_cur / T_total))
129 | elif method == 'multistep':
130 | lr_multi = 1.0
131 | for epoch_step, lr_gamma in zip(args.lr_steps, args.lr_gammas):
132 | if epoch >= epoch_step:
133 | lr_multi = lr_multi * lr_gamma
134 | if epoch < args.warm_epoch:
135 | lr_multi = (epoch + 1) / args.warm_epoch
136 |
137 | _lr = []
138 | for i, param_group in enumerate(optimizer.param_groups):
139 | param_group['lr'] = args.lr_list[i] * lr_multi
140 | _lr.append('{:.6f}'.format(param_group['lr']))
141 | return '-'.join(_lr)
142 |
143 |
144 | def accuracy(output, target, topk=(1,)):
145 | """
146 | Computes the precision@k for the specified values of k
147 | """
148 | maxk = max(topk)
149 | batch_size = target.size(0)
150 |
151 | _, pred = output.topk(maxk, 1, True, True)
152 | pred = pred.t()
153 | correct = pred.eq(target.view(1, -1).expand_as(pred))
154 |
155 | res = []
156 | for k in topk:
157 | correct_k = correct[:k].contiguous().view(-1).float().sum(0)
158 | #res.append(correct_k.mul_(100.0 / batch_size))
159 | res.append(correct_k)
160 | return res
161 |
162 |
163 |
164 | ######################################
165 | # debug functions #
166 | ######################################
167 |
168 | def change_checkpoint(state):
169 | """
170 | an interface to modify the checkpoint
171 | """
172 | state_new = dict()
173 | for k, v in state.items():
174 | if 'binary_conv' in k:
175 | state_new[k.replace('binary_conv', 'bconv')] = v
176 | elif 'bn1' in k:
177 | state_new[k.replace('bn1', 'bn')] = v
178 | else:
179 | state_new[k] = v
180 | return state_new
181 |
182 | def visualize(checkpoint, img_dir):
183 |
184 | from matplotlib import pyplot as plt
185 | import numpy as np
186 |
187 | state = checkpoint['state_dict']
188 | epoch = checkpoint['epoch']
189 | os.makedirs(img_dir, exist_ok=True)
190 | img_file = os.path.join(img_dir, 'img_epoch_%03d.png' % epoch)
191 | print('processing %s' % img_file)
192 |
193 | data = []
194 | for k, v in state.items():
195 | if 'bconv' in k and 'weights' in k:
196 | data.append(v.data.view(-1))
197 |
198 | data = torch.cat(data).cpu().numpy()
199 |
200 | bins = list(np.linspace(-1.5, 1.5, 200))
201 | plt.hist(data, bins)
202 | plt.savefig(img_file)
203 | plt.close()
204 |
205 | print('done')
206 |
207 |
208 |
--------------------------------------------------------------------------------
/models/msgc_densenet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import unicode_literals
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | from torchvision.models.utils import load_state_dict_from_url
10 | import math
11 |
12 | from .utils import MaskGen, AttGen, conv2d_out_dim
13 | from .net_config import Config_densenet
14 |
15 | class _Conv(nn.Sequential):
16 | def __init__(self, in_channels, out_channels, kernel_size=1,
17 | stride=1, padding=0, groups=1):
18 | super(_Conv, self).__init__()
19 | self.add_module('norm', nn.BatchNorm2d(in_channels))
20 | self.add_module('relu', nn.ReLU(inplace=True))
21 | self.add_module('conv', nn.Conv2d(in_channels * groups, out_channels,
22 | kernel_size=kernel_size,
23 | stride=stride,
24 | padding=padding, bias=False,
25 | groups=groups))
26 |
27 | class _DyDenseLayer(nn.Module):
28 | def __init__(self, in_channels, growth_rate, bottleneck, input_size, config):
29 | super(_DyDenseLayer, self).__init__()
30 |
31 | h, w = input_size
32 | self.attention = config.attention
33 |
34 | hidden_dim = bottleneck * growth_rate
35 |
36 | ## 1x1 conv i --> b*k
37 | self.conv_1 = _Conv(in_channels, hidden_dim)
38 |
39 | ## 3x3 conv b*k --> k
40 | self.conv_2 = _Conv(hidden_dim, growth_rate,
41 | kernel_size=3, padding=1, groups=config.heads)
42 | self.flops_dgc1 = in_channels * hidden_dim * h * w
43 | self.flops_conv1_relu = in_channels * h * w
44 | if self.attention:
45 | self.flops_dgc2 = hidden_dim * (9 * growth_rate + config.heads + 1) * h * w # +1: for relu
46 | self.flops_original_extra = config.heads * hidden_dim * h * w
47 | else:
48 | self.flops_dgc2 = hidden_dim * (9 * growth_rate + 1) * h * w
49 | self.flops_original_extra = 0
50 |
51 | self.flops_dgc = self.flops_dgc1 + self.flops_dgc2
52 |
53 | squeeze_rate = 8 if in_channels >= 200 else 4
54 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
55 | ## mask generator
56 | self.maskgen1 = MaskGen(in_channels, in_channels, 1, config.eps, config.bias, squeeze_rate)
57 | self.maskgen2 = MaskGen(in_channels, hidden_dim, config.heads, config.eps, config.bias, squeeze_rate)
58 | self.flops_mask = self.maskgen1.flops + self.maskgen2.flops + h * w * in_channels
59 |
60 | ## attention generator
61 | self.flops_att = 0
62 | if self.attention:
63 | self.attgen2 = AttGen(in_channels, hidden_dim, config.heads, squeeze_rate)
64 | self.flops_att = self.attgen2.flops
65 |
66 | def get_others(self, mask1, mask2, others):
67 | flops_dgc_, bonus_ = others
68 | flops_dgc1 = mask1.mean() * self.flops_dgc1
69 | flops_dgc2 = mask2.mean() * self.flops_dgc2
70 |
71 | mask2_d = mask2.detach()
72 | heads = mask2_d.size(1)
73 | mask_bonus = 1.0 - mask2_d[:, 0, :]
74 | for i in range(heads):
75 | mask_bonus = mask_bonus * (1.0 - mask2_d[:, i, :])
76 | bonus = mask_bonus.mean() * flops_dgc1.detach()
77 |
78 | flops_dgc = flops_dgc_ + flops_dgc1 + flops_dgc2
79 | bonus = bonus_ + bonus
80 |
81 | return flops_dgc, bonus
82 |
83 | def forward(self, x_others):
84 | x, others = x_others
85 | x_ = x
86 |
87 | x = self.conv_1.relu(self.conv_1.norm(x))
88 |
89 | x_pool = self.avg_pool(x)
90 | mask1 = self.maskgen1(x_pool)
91 | mask2 = self.maskgen2(x_pool)
92 | in_channels = mask1.size(2)
93 | b, heads, hidden_dim = mask2.size()
94 | if self.attention:
95 | att2 = self.attgen2(x_pool)
96 |
97 | x = self.conv_1.conv(x * mask1.view(b, in_channels, 1, 1))
98 | x = self.conv_2.relu(self.conv_2.norm(x))
99 |
100 | xcat = []
101 | for i in range(heads):
102 | if self.attention:
103 | xmask = x * mask2[:, i, :].view(b, hidden_dim, 1, 1) * att2[:, i, :].view(b, hidden_dim, 1, 1)
104 | else:
105 | xmask = x * mask2[:, i, :].view(b, hidden_dim, 1, 1)
106 | xcat.append(xmask)
107 | x = torch.cat(xcat, dim=1)
108 | x = self.conv_2.conv(x)
109 | x = torch.cat([x_, x], 1)
110 |
111 | flops_dgc, bonus = self.get_others(mask1, mask2, others)
112 | return x, [flops_dgc, bonus]
113 |
114 |
115 | class _DenseBlock(nn.Sequential):
116 | def __init__(self, num_layers, in_channels, growth_rate,
117 | bottleneck=4, input_size=None, config=None):
118 | super(_DenseBlock, self).__init__()
119 | assert config is not None, 'config should not be None'
120 | assert input_size is not None, 'input size should not be None'
121 | self.flops_dgc = 0
122 | self.flops_mask = 0
123 | self.flops_conv1_relu = 0
124 | self.flops_original_extra = 0
125 | for i in range(num_layers):
126 | layer = _DyDenseLayer(in_channels + i * growth_rate, growth_rate,
127 | bottleneck, input_size, config)
128 | self.add_module('denselayer_%d' % (i + 1), layer)
129 | self.flops_dgc += layer.flops_dgc
130 | self.flops_mask += layer.flops_mask + layer.flops_att
131 | self.flops_conv1_relu += layer.flops_conv1_relu
132 | self.flops_original_extra += layer.flops_original_extra
133 |
134 | class _Transition(nn.Module):
135 | def __init__(self, in_channels):
136 | super(_Transition, self).__init__()
137 | self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
138 |
139 | def forward(self, x_others):
140 | x, others = x_others
141 | x = self.pool(x)
142 | return x, others
143 |
144 | class InitConv(nn.Conv2d):
145 |
146 | def forward(self, x_others):
147 | x, others = x_others
148 | x = super(InitConv, self).forward(x)
149 | return x, others
150 |
151 | class DyDenseNet(nn.Module):
152 | def __init__(self, config):
153 |
154 | super(DyDenseNet, self).__init__()
155 |
156 | num_classes = config.num_classes
157 | self.stages = config.stages
158 | self.growth = config.growth
159 | assert len(self.stages) == len(self.growth)
160 |
161 | if 'cifar' in config.data:
162 | self.init_stride = 1
163 | self.pool_size = 8
164 | else:
165 | self.init_stride = 2
166 | self.pool_size = 7
167 | h, w = config.input_size
168 |
169 | self.features = nn.Sequential()
170 | ### Initial nChannels should be 3
171 | self.num_features = 2 * self.growth[0]
172 | ### Dense-block 1 (224x224)
173 | self.features.add_module('init_conv', InitConv(3, self.num_features,
174 | kernel_size=3,
175 | stride=self.init_stride,
176 | padding=1,
177 | bias=False))
178 | h = conv2d_out_dim(h, 3, 1, self.init_stride)
179 | w = conv2d_out_dim(w, 3, 1, self.init_stride)
180 | self.flops_init = 9 * 3 * self.num_features * h * w
181 | print('Init Conv: h {}, w {}, flops {}'.format(h, w, self.flops_init))
182 |
183 | self.flops_dgc = 0
184 | self.flops_mask = 0
185 | self.flops_original_extra = 0
186 | self.flops_pool = 0
187 | self.flops_block_relu = 0
188 |
189 | self.features_last = nn.Sequential()
190 | for i in range(len(self.stages)):
191 | ### Dense-block i
192 | h, w = self.add_block(i, config, (h, w))
193 | ### Linear layer
194 | self.classifier = nn.Linear(self.num_features, num_classes)
195 |
196 | self.flops_classifier = self.num_features * num_classes
197 |
198 | ### initialize
199 | for m in self.modules():
200 | if isinstance(m, nn.Conv2d):
201 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
202 | m.weight.data.normal_(0, math.sqrt(2. / n))
203 | elif isinstance(m, nn.BatchNorm2d):
204 | m.weight.data.fill_(1)
205 | m.bias.data.zero_()
206 | elif isinstance(m, nn.Linear):
207 | m.bias.data.zero_()
208 | for m in self.modules():
209 | if isinstance(m, AttGen):
210 | nn.init.constant_(m.conv[3].weight, 0)
211 | nn.init.constant_(m.conv[3].bias, 1)
212 |
213 | def add_block(self, i, config, input_size):
214 | ### Check if ith is the last one
215 | h, w = input_size
216 | last = (i == len(self.stages) - 1)
217 | block = _DenseBlock(
218 | num_layers=self.stages[i],
219 | in_channels=self.num_features,
220 | growth_rate=self.growth[i],
221 | input_size=(h, w),
222 | config=config
223 | )
224 | self.features.add_module('denseblock_%d' % (i + 1), block)
225 | self.num_features += self.stages[i] * self.growth[i]
226 | self.flops_dgc += block.flops_dgc
227 | self.flops_mask += block.flops_mask
228 | self.flops_block_relu += block.flops_conv1_relu
229 | self.flops_original_extra += block.flops_original_extra
230 | print('Block: h {}, w {}, flops {}'.format(h, w, block.flops_dgc))
231 | if not last:
232 | trans = _Transition(in_channels=self.num_features)
233 | self.features.add_module('transition_%d' % (i + 1), trans)
234 | h = conv2d_out_dim(h, 2, 0, 2)
235 | w = conv2d_out_dim(w, 2, 0, 2)
236 | flops_pool = 4 * self.num_features * h * w
237 | self.flops_pool += flops_pool
238 | print('Pool: h {}, w {}, flops {}'.format(h, w, flops_pool))
239 | else:
240 | self.features_last.add_module('norm_last',
241 | nn.BatchNorm2d(self.num_features))
242 | self.features_last.add_module('relu_last',
243 | nn.ReLU(inplace=True))
244 | self.features_last.add_module('pool_last',
245 | nn.AvgPool2d(self.pool_size))
246 | flops_pool = self.pool_size ** 2 * self.num_features * 2 # relu and pool
247 | self.flops_pool += flops_pool
248 | print('AvgPool: h {}, w {}, flops {}'.format(h, w, flops_pool))
249 |
250 | return (h, w)
251 |
252 | def get_flops(self):
253 | flops_main = self.flops_init + self.flops_pool + self.flops_classifier + self.flops_block_relu
254 | flops = flops_main + self.flops_dgc - self.flops_original_extra
255 | flops_possible = flops_main + self.flops_dgc * 0.25 + self.flops_mask
256 | return flops, flops_possible, flops_main, self.flops_dgc, self.flops_mask
257 |
258 | def forward(self, x):
259 | x = [x, [0, 0]]
260 | features, others = self.features(x)
261 | features = self.features_last(features)
262 |
263 | out = features.view(features.size(0), -1)
264 | out = self.classifier(out)
265 | return out, others
266 |
267 | def msgc_condensenet(args):
268 | config = Config_densenet(args)
269 | model = DyDenseNet(config)
270 | if not args.scratch:
271 | url = 'https://github.com/hellozhuo/msgc/releases/download/v1.0/pretrained_densenet74.pth'
272 | pretrained_dict = load_state_dict_from_url(url, progress=True)
273 | model_dict = model.state_dict()
274 | model_dict.update(pretrained_dict)
275 | model.load_state_dict(model_dict)
276 | print('load pretrained model successfully')
277 |
278 | return model
279 |
--------------------------------------------------------------------------------
/models/msgc_mobilenetv2.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import unicode_literals
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | from torchvision.models.utils import load_state_dict_from_url
10 | import math
11 |
12 | from .utils import MaskGen, AttGen, conv2d_out_dim
13 | from .net_config import Config_mobilenetv2
14 |
15 | def _make_divisible(v, divisor, min_value=None):
16 | """
17 | This function is taken from the original tf repo.
18 | It ensures that all layers have a channel number that is divisible by 8
19 | It can be seen here:
20 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
21 | :param v:
22 | :param divisor:
23 | :param min_value:
24 | :return:
25 | """
26 | if min_value is None:
27 | min_value = divisor
28 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
29 | # Make sure that round down does not go down by more than 10%.
30 | if new_v < 0.9 * v:
31 | new_v += divisor
32 | return new_v
33 |
34 | class ConvBNReLU_1st(nn.Sequential):
35 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
36 | padding = (kernel_size - 1) // 2
37 | super(ConvBNReLU_1st, self).__init__(
38 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
39 | nn.BatchNorm2d(out_planes),
40 | nn.ReLU6(inplace=True))
41 |
42 | def forward(self, x_others):
43 | x, others = x_others
44 | x = super(ConvBNReLU_1st, self).forward(x)
45 | return x, others
46 |
47 |
48 | class ConvBNReLU(nn.Sequential):
49 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
50 | padding = (kernel_size - 1) // 2
51 | super(ConvBNReLU, self).__init__(
52 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
53 | nn.BatchNorm2d(out_planes),
54 | nn.ReLU6(inplace=True)
55 | )
56 |
57 | class DyInvertedResidual(nn.Module):
58 | def __init__(self, inp, oup, stride, expand_ratio, input_size, config):
59 | super(DyInvertedResidual, self).__init__()
60 | self.stride = stride
61 | assert stride in [1, 2]
62 | h, w = input_size
63 | self.attention = config.attention
64 |
65 | hidden_dim = int(round(inp * expand_ratio))
66 | self.use_res_connect = self.stride == 1 and inp == oup
67 |
68 | layers = []
69 | self.conv = nn.ModuleList()
70 | self.flops_mask = inp * h * w
71 | ## pw
72 | self.flops_pw = 0
73 | if expand_ratio != 1:
74 | self.conv.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
75 | self.flops_pw = inp * hidden_dim * h * w + hidden_dim * h * w
76 | ## dw
77 | self.conv.append(ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim))
78 | h = conv2d_out_dim(h, 3, 1, stride)
79 | w = conv2d_out_dim(w, 3, 1, stride)
80 | self.output_size = (h, w)
81 | self.flops_dw = 9 * hidden_dim * h * w + hidden_dim * h * w
82 |
83 | ## dynamic group conv with bn
84 | self.conv.append(nn.Conv2d(config.heads * hidden_dim, oup, 1, 1, 0, groups=config.heads, bias=False))
85 | self.conv.append(nn.BatchNorm2d(oup))
86 | if self.attention:
87 | self.flops_dgc = hidden_dim * (oup + config.heads) * h * w # the 2nd term is for att
88 | self.flops_original_extra = config.heads * hidden_dim * h * w
89 | else:
90 | self.flops_dgc = hidden_dim * oup * h * w
91 | self.flops_original_extra = 0
92 |
93 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
94 | ## mask generator
95 | self.maskgen = MaskGen(inp, hidden_dim, config.heads, config.eps, config.bias)
96 | self.flops_mask += self.maskgen.flops
97 |
98 | ## attention generator
99 | self.flops_att = 0
100 | if self.attention:
101 | self.attgen = AttGen(inp, hidden_dim, config.heads)
102 | self.flops_att = self.attgen.flops
103 |
104 | def get_others(self, mask, others):
105 | flops_dgc_, bonus_ = others
106 | flops_dgc = mask.mean() * self.flops_dgc
107 | mask_d = mask.detach()
108 | mask_bonus = 1.0 - mask_d[:, 0, :]
109 | for i in range(1, mask_d.size(1)):
110 | mask_bonus = mask_bonus * (1.0 - mask_d[:, i, :]) # b, hidden_dim
111 | bonus = mask_bonus.mean() * (self.flops_pw + self.flops_dw)
112 | return flops_dgc_ + flops_dgc, bonus_ + bonus
113 |
114 | def forward(self, x_others):
115 | x, others = x_others
116 | x_ = x
117 |
118 | x_pool = self.avg_pool(x)
119 | mask = self.maskgen(x_pool) # b, heads, hidden_dim
120 | b, heads, hidden_dim = mask.size()
121 |
122 | if self.attention:
123 | att = self.attgen(x_pool)
124 |
125 | #x = self.conv(x) # b, hidden_dim, h, w
126 | for layer in self.conv[:-2]:
127 | x = layer(x)
128 |
129 | xcat = []
130 | for i in range(heads):
131 | if self.attention:
132 | xmask = x * mask[:, i, :].view(b, hidden_dim, 1, 1) * att[:, i, :].view(b, hidden_dim, 1, 1)
133 | else:
134 | xmask = x * mask[:, i, :].view(b, hidden_dim, 1, 1)
135 | xcat.append(xmask)
136 | x = torch.cat(xcat, dim=1) # b, heads*hidden_dim, h, w
137 | for layer in self.conv[-2:]:
138 | x = layer(x)
139 |
140 | if self.use_res_connect:
141 | x = x_ + x
142 |
143 | flops_dgc, bonus = self.get_others(mask, others)
144 |
145 | return x, [flops_dgc, bonus]
146 |
147 | class DyMobileNetV2(nn.Module):
148 | def __init__(self,
149 | config,
150 | inverted_residual_setting=None,
151 | round_nearest=8,
152 | block=None):
153 | """
154 | MobileNet V2 main class
155 | Args:
156 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
157 | inverted_residual_setting: Network structure
158 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number
159 | Set to 1 to turn off rounding
160 | block: Module specifying inverted residual building block for mobilenet
161 | """
162 | super(DyMobileNetV2, self).__init__()
163 | width_mult = config.width_mult
164 |
165 | if block is None:
166 | block = DyInvertedResidual
167 | input_channel = 32
168 | last_channel = 1280
169 | h, w = config.input_size
170 |
171 | if inverted_residual_setting is None:
172 | if "cifar" in config.data:
173 | inverted_residual_setting = [
174 | # t, c, n, s
175 | [1, 16, 1, 1],
176 | [6, 24, 2, 1],
177 | [6, 32, 3, 2],
178 | [6, 64, 4, 1],
179 | [6, 96, 3, 2],
180 | [6, 160, 3, 1],
181 | [6, 320, 1, 1],
182 | ]
183 | init_stride = 1
184 | else:
185 | inverted_residual_setting = [
186 | # t, c, n, s
187 | [1, 16, 1, 1],
188 | [6, 24, 2, 2],
189 | [6, 32, 3, 2],
190 | [6, 64, 4, 2],
191 | [6, 96, 3, 1],
192 | [6, 160, 3, 2],
193 | [6, 320, 1, 1],
194 | ]
195 | init_stride = 2
196 |
197 | # only check the first element, assuming user knows t,c,n,s are required
198 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
199 | raise ValueError("inverted_residual_setting should be non-empty "
200 | "or a 4-element list, got {}".format(inverted_residual_setting))
201 |
202 | # building first layer
203 | input_channel = _make_divisible(input_channel * width_mult, round_nearest)
204 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
205 | features = [ConvBNReLU_1st(3, input_channel, stride=init_stride)]
206 | h = conv2d_out_dim(h, 3, 1, init_stride)
207 | w = conv2d_out_dim(w, 3, 1, init_stride)
208 | self.flops_1st = 3 * input_channel * 9 * h * w + input_channel * h * w
209 | print('Conv 1st: h {}, w {}, flops {}'.format(h, w, self.flops_1st))
210 | # building inverted residual blocks
211 | self.flops_pwdw = 0
212 | self.flops_dgc = 0
213 | self.flops_mask = 0
214 | self.flops_original_extra = 0
215 | for k, [t, c, n, s] in enumerate(inverted_residual_setting):
216 | output_channel = _make_divisible(c * width_mult, round_nearest)
217 | for i in range(n):
218 | stride = s if i == 0 else 1
219 | the_block = block(input_channel, output_channel, stride, expand_ratio=t, input_size=(h, w), config=config)
220 | features.append(the_block)
221 | input_channel = output_channel
222 | h, w = the_block.output_size
223 | flops_pwdw = the_block.flops_pw + the_block.flops_dw
224 | print('Block {}-{}: h {}, w {}, pwdw flops {}, dgc flops {}'.format(
225 | k, i, h, w, flops_pwdw, the_block.flops_dgc))
226 | self.flops_pwdw += flops_pwdw
227 | self.flops_dgc += the_block.flops_dgc
228 | self.flops_mask += the_block.flops_mask + the_block.flops_att
229 | self.flops_original_extra += the_block.flops_original_extra
230 |
231 | # building last several layers
232 | features.append(ConvBNReLU_1st(input_channel, self.last_channel, kernel_size=1))
233 | # make it nn.Sequential
234 | self.features = nn.Sequential(*features)
235 |
236 | # building classifier
237 | self.classifier = nn.Sequential(
238 | nn.Dropout(0),
239 | nn.Linear(self.last_channel, config.num_classes),
240 | )
241 | self.flops_last = h * w * input_channel * self.last_channel + self.last_channel * h * w +\
242 | self.last_channel * config.num_classes + self.last_channel * h * w
243 | print('Conv last and classifier: h {}, w {}, flops {}'.format(h, w, self.flops_last))
244 |
245 | # weight initialization
246 | for m in self.modules():
247 | if isinstance(m, nn.Conv2d):
248 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
249 | #if m.bias is not None:
250 | # nn.init.zeros_(m.bias)
251 | elif isinstance(m, nn.BatchNorm2d):
252 | nn.init.ones_(m.weight)
253 | nn.init.zeros_(m.bias)
254 | elif isinstance(m, nn.Linear):
255 | nn.init.normal_(m.weight, 0, 0.01)
256 | if m.out_features == config.num_classes:
257 | nn.init.zeros_(m.bias)
258 | for m in self.modules():
259 | if isinstance(m, AttGen):
260 | nn.init.constant_(m.conv[3].weight, 0)
261 | nn.init.constant_(m.conv[3].bias, 1)
262 |
263 | def get_flops(self):
264 | flops_main = self.flops_1st + self.flops_pwdw + self.flops_last
265 | flops = flops_main + self.flops_dgc - self.flops_original_extra
266 | flops_possible = flops_main + self.flops_dgc * 0.25 + self.flops_mask
267 | return flops, flops_possible, flops_main, self.flops_dgc, self.flops_mask
268 |
269 |
270 | def forward(self, x):
271 | x_others = [x, [0, 0]]
272 | x, others = self.features(x_others)
273 | x = x.mean([2, 3])
274 | x = self.classifier(x)
275 | return x, others
276 |
277 | def msgc_mobilenetv2(args):
278 | config = Config_mobilenetv2(args)
279 | model = DyMobileNetV2(config)
280 | if not args.scratch:
281 | url = 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'
282 | pretrained_dict = load_state_dict_from_url(url, progress=True)
283 | model_dict = model.state_dict()
284 | model_dict.update(pretrained_dict)
285 | model.load_state_dict(model_dict)
286 | print('load pretrained model successfully')
287 |
288 | return model
289 |
--------------------------------------------------------------------------------
/main_dist.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import unicode_literals
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import argparse
7 | import os
8 | import time
9 | import models
10 | from utils import *
11 |
12 | import torch
13 | import torch.nn as nn
14 | import torch.backends.cudnn as cudnn
15 | import torch.optim
16 | import torchvision.transforms as transforms
17 | import torchvision.datasets as datasets
18 |
19 | import torch.nn.parallel
20 | import torch.distributed as dist
21 |
22 | parser = argparse.ArgumentParser(description='PyTorch Middle Spectrum Grouped Convolution')
23 |
24 | ### dirs
25 | parser.add_argument('--data', type=str, default='imagenet',
26 | help='name of dataset', choices=['imagenet'])
27 | parser.add_argument('--datadir', type=str, default='../data',
28 | help='dir to the dataset or the validation set')
29 | parser.add_argument('--savedir', type=str, default='results/savedir',
30 | help='path to save result and checkpoint')
31 |
32 | ### model
33 | parser.add_argument('--model', type=str, default='dgc_densenet86',
34 | help='model to train the dataset')
35 | parser.add_argument('--heads', type=int, default=4,
36 | help='number of heads')
37 | parser.add_argument('--width-mul', type=float, default=1.0,
38 | help='width mutiplier for mobilenetv2')
39 | parser.add_argument('--attention', action='store_true',
40 | help='use attention in model')
41 | parser.add_argument('--scratch', action='store_true',
42 | help='load pretrained model from pytorch repository')
43 | parser.add_argument('--checkinfo', action='store_true',
44 | help='only check the information of model')
45 |
46 | ### training
47 | parser.add_argument('--epochs', type=int, default=120,
48 | help='number of total epochs to run')
49 | parser.add_argument('--warm-epoch', default=0, type=int,
50 | help='epoch number to warm up')
51 | parser.add_argument('-b', '--batch-size', type=int, default=128,
52 | help='mini-batch size')
53 | parser.add_argument('--btest', type=int, default=100,
54 | help='mini-batch size for testing')
55 | parser.add_argument('--opt', type=str, default='sgd',
56 | help='optimizer [adam, sgd]')
57 | parser.add_argument('--momentum', type=float, default=0.9,
58 | help='momentum (default: 0.9)')
59 | parser.add_argument('--lr', type=float, default=0.075,
60 | help='initial learning rate for mask weights')
61 | parser.add_argument('--lr-mul', type=float, default=0.2,
62 | help='initial learning rate scale for pretrained weights')
63 | parser.add_argument('--lr-type', type=str, default='cosine',
64 | help='learning rate strategy [cosine, multistep]')
65 | parser.add_argument('--lr-steps', type=str, default=None,
66 | help='steps for multistep learning rate')
67 | parser.add_argument('--wd', type=float, default=1e-4,
68 | help='weight decay for all weights')
69 | parser.add_argument('--label-smooth', type=float, default=0.1,
70 | help='label smoothing')
71 | parser.add_argument('--lmd', type=float, default=30,
72 | help='lambda for calculating disperity loss')
73 | parser.add_argument('--target', type=float, default=0.5,
74 | help='target flops rate for DGC convolutions')
75 | parser.add_argument('--pstart', type=float, default=0,
76 | help='start pruning progress')
77 | parser.add_argument('--pstop', type=float, default=0.5,
78 | help='stop pruning progress')
79 | parser.add_argument('--seed', type=int, default=None,
80 | help='random seed (default: None)')
81 | parser.add_argument('--print-freq', type=int, default=10,
82 | help='print frequency (default: 10)')
83 |
84 | # gpu and cpu
85 | parser.add_argument('--gpu', type=str, default='',
86 | help='gpus available')
87 | parser.add_argument('--nocudnnbm', action='store_true',
88 | help='set cudnn benchmark to False')
89 | parser.add_argument('-j', '--workers', type=int, default=4,
90 | help='number of data loading workers')
91 |
92 | ### checkpoint
93 | parser.add_argument('--save-freq', type=int, default=5,
94 | help='save frequency (default: 10)')
95 | parser.add_argument('--resume', action='store_true',
96 | help='use latest checkpoint if have any')
97 | parser.add_argument('--resume-from', type=str, default=None,
98 | help='give a checkpoint path for resuming')
99 | parser.add_argument('--evaluate', type=str, default=None,
100 | help="full path to checkpoint to be evaluated or 'best'")
101 |
102 | parser.add_argument('--local_rank', type=int)
103 |
104 | args = parser.parse_args()
105 |
106 | torch.distributed.init_process_group(backend='nccl')
107 | torch.cuda.set_device(args.local_rank)
108 | device = torch.device('cuda', args.local_rank)
109 |
110 | best_prec1 = 0
111 |
112 | def main(running_file):
113 |
114 | global args, best_prec1
115 |
116 | ### Refine args
117 | if args.seed is None:
118 | args.seed = int(time.time())
119 | torch.manual_seed(args.seed)
120 | torch.cuda.manual_seed_all(args.seed)
121 |
122 | if args.lr_steps is not None:
123 | args.lr_steps = list(map(int, args.lr_steps.split('-')))
124 | args.lr_gammas = [0.1 for _ in args.lr_steps]
125 |
126 | if args.btest is None:
127 | args.btest = args.batch_size
128 |
129 | if args.data == 'cifar10':
130 | args.num_classes = 10
131 | R = 32
132 | elif args.data == 'cifar100':
133 | args.num_classes = 100
134 | R = 32
135 | elif 'imagenet' in args.data:
136 | args.num_classes = 1000
137 | R = 224
138 | else:
139 | raise ValueError('unrecognized data')
140 |
141 | ### Create model
142 | model = getattr(models, args.model)(args)
143 |
144 | flops_ori, flops_possible, flops_main, flops_dgc, flops_mask = model.get_flops()
145 | flops_target = args.target * flops_ori
146 | args.flops_ori, args.flops_main, args.flops_dgc, args.flops_mask = \
147 | flops_ori, flops_main, flops_dgc, flops_mask
148 | print(args)
149 | flopsinfo = 'Flops of {}: original {} M, target {} M, possible {} M, dgc {} M, mask {} M\n'.format(
150 | args.model, flops_ori / 1e6, flops_target / 1e6, flops_possible / 1e6, flops_dgc / 1e6, flops_mask / 1e6)
151 | print(flopsinfo)
152 |
153 | if args.checkinfo:
154 | running_file.write(flopsinfo)
155 | running_file.flush()
156 | return
157 |
158 | ### Define optimizer
159 | param_dict = dict(model.named_parameters())
160 |
161 | p_conv = []
162 | pname_conv = []
163 |
164 | p_mask = []
165 | pname_mask = []
166 |
167 | p_bn = []
168 | pname_bn = []
169 |
170 | BN_name_pool = []
171 | for m_name, m in model.named_modules():
172 | if isinstance(m, nn.BatchNorm2d):
173 | BN_name_pool.append(m_name + '.weight')
174 | BN_name_pool.append(m_name + '.bias')
175 |
176 | for key, value in param_dict.items():
177 | if 'mask' in key:
178 | pname_mask.append(key)
179 | p_mask.append(value)
180 | elif key in BN_name_pool:
181 | pname_bn.append(key)
182 | p_bn.append(value)
183 | else:
184 | pname_conv.append(key)
185 | p_conv.append(value)
186 | params = [{'params': p_mask, 'lr': args.lr, 'weight_decay': 0.},
187 | {'params': p_bn, 'lr': args.lr * args.lr_mul, 'weight_decay': 0.},
188 | {'params': p_conv, 'lr': args.lr * args.lr_mul, 'weight_decay': args.wd}]
189 | args.lr_list = [g['lr'] for g in params]
190 | optimizer = torch.optim.SGD(params, momentum=args.momentum, nesterov=True)
191 |
192 | ### Transfer to cuda devices
193 | model.to(device)
194 | model = torch.nn.parallel.DistributedDataParallel(model,
195 | device_ids=[args.local_rank], output_device=args.local_rank)
196 | print('cuda is used, with %d gpu devices' % torch.cuda.device_count())
197 |
198 | ### Define loss function (criterion) and optimizer
199 | criterion = nn.CrossEntropyLoss()
200 | criterion_smooth = CrossEntropyLabelSmooth(args.num_classes, args.label_smooth)
201 |
202 | cudnn.benchmark = not args.nocudnnbm
203 |
204 | ### Data loading
205 | traindir = os.path.join(args.datadir, 'train')
206 | valdir = os.path.join(args.datadir, 'val')
207 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
208 | std=[0.229, 0.224, 0.225])
209 | train_set = datasets.ImageFolder(traindir, transforms.Compose([
210 | transforms.RandomResizedCrop(224),
211 | transforms.RandomHorizontalFlip(),
212 | transforms.ToTensor(),
213 | normalize,
214 | ]))
215 |
216 | val_set = datasets.ImageFolder(valdir, transforms.Compose([
217 | transforms.Resize(256),
218 | transforms.CenterCrop(224),
219 | transforms.ToTensor(),
220 | normalize,
221 | ]))
222 |
223 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_set)
224 |
225 | train_loader = torch.utils.data.DataLoader(
226 | train_set,
227 | batch_size=args.batch_size, shuffle=False,
228 | num_workers=args.workers, pin_memory=True, sampler=train_sampler)
229 |
230 | val_loader = torch.utils.data.DataLoader(
231 | val_set,
232 | batch_size=args.btest, shuffle=False,
233 | num_workers=args.workers, pin_memory=True)
234 |
235 | ### Optionally resume from a checkpoint
236 | args.start_epoch = 0
237 | if args.resume or (args.resume_from is not None) or (args.evaluate is not None):
238 | checkpoint = load_checkpoint(args, running_file)
239 | if checkpoint is not None:
240 | model.load_state_dict(checkpoint['state_dict'])
241 | ## Evaluate directly if required
242 | if args.evaluate is not None:
243 | validate(val_loader, model, criterion, args)
244 | print('##########Time########## %s' % (time.strftime('%Y-%m-%d %H:%M:%S')))
245 | return
246 | args.start_epoch = checkpoint['epoch'] + 1
247 | best_prec1 = checkpoint['best_prec1']
248 | optimizer.load_state_dict(checkpoint['optimizer'])
249 |
250 |
251 | ### Train
252 | saveID = None
253 | print('current best: {}'.format(best_prec1))
254 | with open(args.log_file, 'a') as f:
255 | f.write('Flops of {}: original {} M, target {} M, possible {} M, main {} M, dgc {} M, mask {} M\n'.format(
256 | args.model, flops_ori/1e6, flops_target/1e6, flops_possible/1e6, flops_main/1e6, flops_dgc/1e6, flops_mask/1e6))
257 |
258 | for epoch in range(args.start_epoch, args.epochs):
259 |
260 | if epoch >= args.epochs - 5:
261 | args.save_freq = 1
262 |
263 | train_sampler.set_epoch(epoch)
264 |
265 | # adjust learning rate and progress
266 | lr_str = adjust_learning_rate(optimizer, epoch, args, method=args.lr_type)
267 |
268 | # train
269 | tr_prec1, tr_prec5, loss, tr_flops, tr_dgc, tr_bonus = \
270 | train(train_loader, model, criterion_smooth, optimizer, epoch,
271 | running_file, lr_str, args)
272 |
273 | val_prec1, val_prec5, val_flops, val_dgc, val_bonus = \
274 | validate(val_loader, model, criterion, args)
275 |
276 | is_best = val_prec1 >= best_prec1
277 | best_prec1 = max(val_prec1, best_prec1)
278 |
279 | log = ("Epoch %03d/%03d: (%.4f %.4f) | %.4f M (%.4f -%.4f)" + \
280 | " || train (%.4f %.4f) | %.4f M (%.4f -%.4f)| loss %.4f" + \
281 | " || lr %s | Time %s\n") \
282 | % (epoch, args.epochs, val_prec1, val_prec5, val_flops, val_dgc, val_bonus, \
283 | tr_prec1, tr_prec5, tr_flops, tr_dgc, tr_bonus, loss, \
284 | lr_str, time.strftime('%Y-%m-%d %H:%M:%S'))
285 | with open(args.log_file, 'a') as f:
286 | f.write(log)
287 |
288 | if args.local_rank == 0:
289 | print('checkpoint saving in local rank 0')
290 | running_file.write('checkpoint saving in local rank 0\n')
291 | running_file.flush()
292 | saveID = save_checkpoint({
293 | 'epoch': epoch,
294 | 'state_dict': model.state_dict(),
295 | 'best_prec1': best_prec1,
296 | 'optimizer': optimizer.state_dict(),
297 | }, epoch, args.savedir, is_best,
298 | saveID, keep_freq=args.save_freq)
299 |
300 | return
301 |
302 |
303 | def train(train_loader, model, criterion, optimizer, epoch,
304 | running_file, running_lr, args):
305 | batch_time = AverageMeter()
306 | data_time = AverageMeter()
307 |
308 | losses = AverageMeter()
309 | losses_sparse = AverageMeter()
310 | dgces = AverageMeter()
311 | bonuses = AverageMeter()
312 | flopses = AverageMeter()
313 |
314 | top1 = AverageMeter('sum')
315 | top5 = AverageMeter('sum')
316 |
317 | ## Switch to train mode
318 | model.train()
319 |
320 | running_file.write('\n%s\n' % str(args))
321 | running_file.flush()
322 |
323 | wD = len(str(len(train_loader)))
324 | wE = len(str(args.epochs))
325 |
326 | end = time.time()
327 | for i, (input, label) in enumerate(train_loader):
328 |
329 | ## Calculate progress
330 | progress = float(epoch * len(train_loader) + i) / (args.epochs * len(train_loader))
331 | start, stop = args.pstart, args.pstop
332 | target = (progress - start) / (stop - start) * (args.target - 1) + 1
333 | target = max(target, args.target) if progress > start else 1.0
334 |
335 | ## Measure data loading time
336 | data_time.update(time.time() - end)
337 |
338 | input = input.cuda(non_blocking=True)
339 | label = label.cuda(non_blocking=True)
340 |
341 | ## Compute output
342 | output, [dgc, bonus] = model(input)
343 |
344 | loss = criterion(output, label)
345 | flops = args.flops_main + dgc + args.flops_mask - bonus
346 | if flops.item() / args.flops_ori >= target:
347 | #loss_sparse = args.lmd * (flops / args.flops_ori - target) ** 2
348 | loss_sparse = args.lmd * (flops / args.flops_ori - target)
349 | losses_sparse.update(loss_sparse.item(), input.size(0))
350 | else:
351 | loss_sparse = 0
352 | losses_sparse.update(0, input.size(0))
353 |
354 | ## Measure accuracy and record losses
355 |
356 | prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
357 |
358 | top1.update(prec1.item(), input.size(0))
359 | top5.update(prec5.item(), input.size(0))
360 | flopses.update(flops.item()/1e6, input.size(0))
361 | dgces.update(dgc.item()/1e6, input.size(0))
362 | bonuses.update(bonus.item()/1e6, input.size(0))
363 |
364 | losses.update(loss.item(), input.size(0))
365 |
366 | ## Compute gradient and do SGD step
367 | loss = loss + loss_sparse
368 | optimizer.zero_grad()
369 | loss.backward()
370 | optimizer.step()
371 |
372 | ## Measure elapsed time
373 | batch_time.update(time.time() - end)
374 | end = time.time()
375 |
376 | ## Record
377 | if i % args.print_freq == 0:
378 | runinfo = str(('GPU %d Epoch: [{0:0%dd}/{1:0%dd}][{2:0%dd}/{3:0%dd}] | ' \
379 | % (args.local_rank, wE, wE, wD, wD) + \
380 | 'Time {batch_time.val:.3f} | ' + \
381 | 'Data {data_time.val:.3f} | ' + \
382 | 'Loss ({loss.val:.4f} {loss_sparse.val:.4f}) | ' + \
383 | 'Flops ({flops.val:.4f} M {dgc.val:.4f} M {bonus.val:.4f} M) | ' + \
384 | 'Prec@1 {top1.val100:.3f} | ' + \
385 | 'Prec@5 {top5.val100:.3f} | ' + \
386 | 'lr {lr}').format(
387 | epoch, args.epochs, i, len(train_loader),
388 | batch_time=batch_time, data_time=data_time,
389 | loss=losses, loss_sparse=losses_sparse,
390 | flops=flopses, dgc=dgces, bonus=bonuses,
391 | top1=top1, top5=top5, lr=running_lr))
392 | print(runinfo)
393 | if i % (args.print_freq * 20) == 0 and args.local_rank == 0:
394 | running_file.write('%s\n' % runinfo)
395 | running_file.flush()
396 |
397 | return top1.avg100, top5.avg100, losses.avg, flopses.avg, dgces.avg, bonuses.avg
398 |
399 |
400 | def validate(val_loader, model, criterion, args):
401 | batch_time = AverageMeter()
402 |
403 | dgces = AverageMeter()
404 | bonuses = AverageMeter()
405 | flopses = AverageMeter()
406 |
407 | top1 = AverageMeter('sum')
408 | top5 = AverageMeter('sum')
409 |
410 | ## Switch to evaluate mode
411 | model.eval()
412 |
413 | end = time.time()
414 | for i, (input, label) in enumerate(val_loader):
415 | with torch.no_grad():
416 | label = label.cuda()
417 | input = input.cuda()
418 |
419 | ## Compute output
420 | output, [dgc, bonus] = model(input)
421 |
422 | ## Measure accuracy and record loss
423 | prec1, prec5 = accuracy(output.data, label, topk=(1, 5))
424 | flops = args.flops_main + dgc + args.flops_mask - bonus
425 |
426 | top1.update(prec1.item(), input.size(0))
427 | top5.update(prec5.item(), input.size(0))
428 | flopses.update(flops.item()/1e6, input.size(0))
429 | dgces.update(dgc.item()/1e6, input.size(0))
430 | bonuses.update(bonus.item()/1e6, input.size(0))
431 |
432 | ## Measure elapsed time
433 | batch_time.update(time.time() - end)
434 | end = time.time()
435 |
436 | ## Record
437 | if i % args.print_freq == 0:
438 | print(('Test: [{0}/{1}]\t' + \
439 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' + \
440 | 'Flops ({flops.val:.4f} M {dgc.val:.4f} M {bonus.val:.4f} M) | ' + \
441 | 'Prec@1 {top1.val100:.3f} ({top1.avg100:.3f})\t' + \
442 | 'Prec@5 {top5.val100:.3f} ({top5.avg100:.3f})').format(
443 | i, len(val_loader), batch_time=batch_time,
444 | flops=flopses, dgc=dgces, bonus=bonuses,
445 | top1=top1, top5=top5))
446 |
447 | print(' * Prec@1 {top1.avg100:.3f} | Prec@5 {top5.avg100:.3f} | '
448 | 'Flops {flops.avg:.4f} M'.format(
449 | top1=top1, top5=top5, flops=flopses))
450 |
451 | return top1.avg100, top5.avg100, flopses.avg, dgces.avg, bonuses.avg
452 |
453 |
454 | if __name__ == '__main__':
455 |
456 | os.makedirs(args.savedir, exist_ok=True)
457 | args.log_file = os.path.join(args.savedir, '%s_log.txt' % args.model)
458 | running_file = os.path.join(args.savedir, '%s_running-%s.txt' % (args.model, time.strftime('%Y-%m-%d-%H-%M-%S')))
459 |
460 | with open(running_file, 'w') as f:
461 | main(f)
462 |
--------------------------------------------------------------------------------
/models/msgc_resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import Tensor
3 | import torch.nn as nn
4 | from torchvision.models.utils import load_state_dict_from_url
5 | from typing import Type, Any, Callable, Union, List, Optional
6 |
7 | from .utils import MaskGen, AttGen, conv2d_out_dim
8 | from .net_config import Config_resnet
9 |
10 | model_urls = {
11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-f37072fd.pth',
12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-b627a593.pth',
13 | 'resnet50': 'https://download.pytorch.org/models/resnet50-0676ba61.pth',
14 | }
15 |
16 |
17 | def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
18 | """3x3 convolution with padding"""
19 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
20 | padding=dilation, groups=groups, bias=False, dilation=dilation)
21 |
22 |
23 | def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
24 | """1x1 convolution"""
25 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
26 |
27 |
28 | class DyBasicBlock(nn.Module):
29 | expansion: int = 1
30 |
31 | def __init__(
32 | self,
33 | inplanes: int,
34 | planes: int,
35 | stride: int = 1,
36 | downsample: Optional[nn.Module] = None,
37 | groups: int = 1,
38 | base_width: int = 64,
39 | dilation: int = 1,
40 | norm_layer: Optional[Callable[..., nn.Module]] = None,
41 | input_size: tuple = (None, None),
42 | config: any = None
43 | ) -> None:
44 | super(DyBasicBlock, self).__init__()
45 | if norm_layer is None:
46 | norm_layer = nn.BatchNorm2d
47 | if groups != 1 or base_width != 64:
48 | raise ValueError('BasicBlock only supports groups=1 and base_width=64')
49 | if dilation > 1:
50 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
51 | self.downsample = downsample
52 | self.stride = stride
53 | self.attention = config.attention
54 |
55 | h, w = input_size
56 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1
57 | self.conv1 = conv3x3(inplanes, planes, stride)
58 | self.bn1 = norm_layer(planes)
59 | self.relu = nn.ReLU(inplace=True)
60 | if self.attention:
61 | self.flops_dgc1 = inplanes * h * w # this term is for att
62 | self.flops_original_extra = inplanes * h * w
63 | else:
64 | self.flops_dgc1 = 0
65 | self.flops_original_extra = 0
66 | self.flops_mask = h * w * inplanes
67 |
68 | h = conv2d_out_dim(h, 3, 1, stride)
69 | w = conv2d_out_dim(w, 3, 1, stride)
70 | self.output_size = (h, w)
71 | self.flops_dgc1 += 9 * inplanes * planes * h * w + planes * h * w
72 |
73 | # dynamic conv
74 | self.conv2 = conv3x3(planes * config.heads, planes, groups=config.heads)
75 | self.bn2 = norm_layer(planes)
76 | if self.attention:
77 | self.flops_dgc2 = (9 * planes + 1) * planes * h * w \
78 | + config.heads * planes * h * w # the 2nd term is for att
79 | self.flops_original_extra += config.heads * planes * h * w
80 | else:
81 | self.flops_dgc2 = (9 * planes + 1) * planes * h * w
82 |
83 | self.flops_dgc = self.flops_dgc1 + self.flops_dgc2
84 |
85 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
86 | # mask generator
87 | self.maskgen1 = MaskGen(inplanes, inplanes, 1, config.eps, config.bias)
88 | self.maskgen2 = MaskGen(inplanes, planes, config.heads, config.eps, config.bias)
89 | self.flops_mask += self.maskgen1.flops + self.maskgen2.flops
90 |
91 | # attention generator
92 | self.flops_att = 0
93 | if self.attention:
94 | self.attgen1 = AttGen(inplanes, inplanes, 1)
95 | self.attgen2 = AttGen(inplanes, planes, config.heads)
96 | self.flops_att = self.attgen1.flops + self.attgen2.flops
97 |
98 | def get_others(self, mask1, mask2, others):
99 | flops_dgc_, bonus_ = others
100 | flops_dgc1 = mask1.mean() * self.flops_dgc1
101 | flops_dgc2 = mask2.mean() * self.flops_dgc2
102 |
103 | mask_d = mask2.detach()
104 | mask_bonus = 1.0 - mask_d[:, 0, :]
105 | for i in range(1, mask_d.size(1)):
106 | mask_bonus = mask_bonus * (1.0 - mask_d[:, i, :]) # b, planes
107 | bonus = mask_bonus.mean() * flops_dgc1.detach()
108 | return flops_dgc_ + flops_dgc1 + flops_dgc2, bonus_ + bonus
109 |
110 | def forward(self, x_others):
111 | x, others = x_others
112 | identity = x
113 |
114 | x_pool = self.avg_pool(x)
115 | mask1 = self.maskgen1(x_pool) # b, 1, inplanes
116 | mask2 = self.maskgen2(x_pool) # b, heads, planes
117 | _, _, inplanes = mask1.size()
118 | b, heads, planes = mask2.size()
119 |
120 | if self.attention:
121 | att1 = self.attgen1(x_pool) # b, 1, inplanes
122 | att2 = self.attgen2(x_pool) # b, heads, planes
123 | out = self.conv1(x * mask1.view(b, inplanes, 1, 1) * att1.view(b, inplanes, 1, 1))
124 | else:
125 | out = self.conv1(x * mask1.view(b, inplanes, 1, 1))
126 |
127 | out = self.bn1(out)
128 | out = self.relu(out)
129 |
130 | outcat = []
131 | for i in range(heads):
132 | if self.attention:
133 | outmask = out * mask2[:, i, :].view(b, planes, 1, 1) * att2[:, i, :].view(b, planes, 1, 1)
134 | else:
135 | outmask = out * mask2[:, i, :].view(b, planes, 1, 1)
136 | outcat.append(outmask)
137 | out = torch.cat(outcat, dim=1) # b, heads*planes, h, w
138 |
139 | out = self.conv2(out)
140 | out = self.bn2(out)
141 |
142 | if self.downsample is not None:
143 | identity = self.downsample(x)
144 |
145 | out += identity
146 | out = self.relu(out)
147 |
148 | flops_dgc, bonus = self.get_others(mask1, mask2, others)
149 |
150 | return out, [flops_dgc, bonus]
151 |
152 |
153 | class DyBottleneck(nn.Module):
154 | # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
155 | # while original implementation places the stride at the first 1x1 convolution(self.conv1)
156 | # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
157 | # This variant is also known as ResNet V1.5 and improves accuracy according to
158 | # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
159 |
160 | expansion: int = 4
161 |
162 | def __init__(
163 | self,
164 | inplanes: int,
165 | planes: int,
166 | stride: int = 1,
167 | downsample: Optional[nn.Module] = None,
168 | groups: int = 1,
169 | base_width: int = 64,
170 | dilation: int = 1,
171 | norm_layer: Optional[Callable[..., nn.Module]] = None,
172 | input_size: tuple = (None, None),
173 | config: any = None
174 | ) -> None:
175 | super(DyBottleneck, self).__init__()
176 | if norm_layer is None:
177 | norm_layer = nn.BatchNorm2d
178 | width = int(planes * (base_width / 64.)) * groups
179 | self.downsample = downsample
180 | self.stride = stride
181 | self.attention = config.attention
182 |
183 | h, w = input_size
184 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1
185 | self.conv1 = conv1x1(inplanes, width)
186 | self.bn1 = norm_layer(width)
187 | self.flops_dgc1 = inplanes * width * h * w + width * h * w
188 | if self.attention:
189 | self.flops_dgc2 = config.heads * width * h * w
190 | self.flops_original_extra = config.heads * width * h * w
191 | else:
192 | self.flops_dgc2 = 0
193 | self.flops_original_extra = 0
194 | self.flops_mask = h * w * inplanes # for the avgpool in the MaskGen
195 |
196 | self.conv2 = conv3x3(width * config.heads, width, stride, groups=config.heads)
197 | self.bn2 = norm_layer(width)
198 | h = conv2d_out_dim(h, 3, 1, stride)
199 | w = conv2d_out_dim(w, 3, 1, stride)
200 | self.output_size = (h, w)
201 | self.flops_dgc2 += 9 * width * width * h * w + width * h * w
202 |
203 | self.conv3 = conv1x1(width, planes * self.expansion)
204 | self.bn3 = norm_layer(planes * self.expansion)
205 | self.flops_dgc3 = (width + 1) * planes * self.expansion * h * w
206 |
207 | self.flops_dgc = self.flops_dgc1 + self.flops_dgc2 + self.flops_dgc3
208 |
209 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
210 | # mask generator
211 | self.maskgen1 = MaskGen(inplanes, inplanes, 1, config.eps, config.bias, 8)
212 | self.maskgen2 = MaskGen(inplanes, width, config.heads, config.eps, config.bias, 8)
213 | self.maskgen3 = MaskGen(inplanes, width, 1, config.eps, config.bias, 8)
214 | self.flops_mask += self.maskgen1.flops + self.maskgen2.flops + self.maskgen3.flops
215 |
216 | # attention generator
217 | self.flops_att = 0
218 | if self.attention:
219 | self.attgen2 = AttGen(inplanes, width, config.heads, 8)
220 | self.flops_att = self.attgen2.flops
221 |
222 | self.relu = nn.ReLU(inplace=True)
223 |
224 | def get_others(self, mask1, mask2, mask3, others):
225 | flops_dgc_, bonus_ = others
226 | flops_dgc1 = mask1.mean() * self.flops_dgc1
227 | flops_dgc2 = mask2.mean() * self.flops_dgc2
228 | flops_dgc3 = mask3.mean() * self.flops_dgc3
229 |
230 | mask2_d = mask2.detach()
231 | heads = mask2_d.size(1)
232 | mask2_bonus = 1.0 - mask2_d[:, 0, :]
233 | for i in range(1, heads):
234 | mask2_bonus = mask2_bonus * (1.0 - mask2_d[:, i, :])
235 | bonus2 = mask2_bonus.mean() * flops_dgc1.detach()
236 |
237 | mask3_d = mask3.detach()
238 | mask3_bonus = 1.0 - mask3_d[:, 0, :]
239 | head_width = mask3_d.size(2) // heads
240 | bonus3 = 0.0
241 | for i in range(heads):
242 | start_ = i * head_width
243 | end_ = start_ + head_width
244 | mask3_bonus_head = mask3_bonus[:, start_: end_]
245 | bonus3 += mask2_d[:, i, :].mean() * self.flops_dgc2 / heads * mask3_bonus_head.mean()
246 |
247 | flops_dgc = flops_dgc_ + flops_dgc1 + flops_dgc2 + flops_dgc3
248 | bonus = bonus_ + bonus2 + bonus3
249 | return flops_dgc, bonus
250 |
251 | def forward(self, x_others):
252 | x, others = x_others
253 | identity = x
254 |
255 | x_pool = self.avg_pool(x)
256 | mask1 = self.maskgen1(x_pool)
257 | mask2 = self.maskgen2(x_pool)
258 | mask3 = self.maskgen3(x_pool)
259 | inplanes = mask1.size(2)
260 | b, heads, width = mask2.size()
261 |
262 | if self.attention:
263 | att2 = self.attgen2(x_pool)
264 |
265 | out = self.conv1(x * mask1.view(b, inplanes, 1, 1))
266 | out = self.bn1(out)
267 | out = self.relu(out)
268 |
269 | outcat = []
270 | for i in range(heads):
271 | if self.attention:
272 | outmask = out * mask2[:, i, :].view(b, width, 1, 1) * att2[:, i, :].view(b, width, 1, 1)
273 | else:
274 | outmask = out * mask2[:, i, :].view(b, width, 1, 1)
275 | outcat.append(outmask)
276 | out = torch.cat(outcat, dim=1)
277 | out = self.conv2(out)
278 | out = self.bn2(out)
279 | out = self.relu(out)
280 |
281 | out = self.conv3(out * mask3.view(b, width, 1, 1))
282 | out = self.bn3(out)
283 |
284 | if self.downsample is not None:
285 | identity = self.downsample(x)
286 |
287 | out += identity
288 | out = self.relu(out)
289 |
290 | flops_dgc, bonus = self.get_others(mask1, mask2, mask3, others)
291 |
292 | return out, [flops_dgc, bonus]
293 |
294 |
295 | class ResNet(nn.Module):
296 |
297 | def __init__(
298 | self,
299 | block: Type[Union[DyBasicBlock, DyBottleneck]],
300 | layers: List[int],
301 | config,
302 | zero_init_residual: bool = False,
303 | groups: int = 1,
304 | width_per_group: int = 64,
305 | replace_stride_with_dilation: Optional[List[bool]] = None,
306 | norm_layer: Optional[Callable[..., nn.Module]] = None
307 | ) -> None:
308 | super(ResNet, self).__init__()
309 | if norm_layer is None:
310 | norm_layer = nn.BatchNorm2d
311 | self._norm_layer = norm_layer
312 | self.config = config
313 |
314 | self.inplanes = 64
315 | self.dilation = 1
316 | h, w = config.input_size
317 | if replace_stride_with_dilation is None:
318 | # each element in the tuple indicates if we should replace
319 | # the 2x2 stride with a dilated convolution instead
320 | replace_stride_with_dilation = [False, False, False]
321 | if len(replace_stride_with_dilation) != 3:
322 | raise ValueError("replace_stride_with_dilation should be None "
323 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
324 | self.groups = groups
325 | self.base_width = width_per_group
326 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
327 | bias=False)
328 | h = conv2d_out_dim(h, 7, 3, 2)
329 | w = conv2d_out_dim(w, 7, 3, 2)
330 | self.flops_conv1 = 49 * 3 * self.inplanes * h * w
331 | print('Conv 1st: h {}, w {}, flops {}'.format(h, w, self.flops_conv1))
332 | self.bn1 = norm_layer(self.inplanes)
333 | self.relu = nn.ReLU(inplace=True)
334 | self.flops_conv1 += self.inplanes * h * w
335 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
336 | h = conv2d_out_dim(h, 3, 1, 2)
337 | w = conv2d_out_dim(w, 3, 1, 2)
338 | self.flops_conv1 += 4 * self.inplanes * h * w
339 | self.flops_dgc = 0
340 | self.flops_mask = 0
341 | self.flops_original_extra = 0
342 | self.flops_downsample = 0
343 | self.layer1, (h, w) = self._make_layer(block, 64, layers[0], input_size=(h, w))
344 | self.layer2, (h, w) = self._make_layer(block, 128, layers[1], stride=2, input_size=(h, w),
345 | dilate=replace_stride_with_dilation[0])
346 | self.layer3, (h, w) = self._make_layer(block, 256, layers[2], stride=2, input_size=(h, w),
347 | dilate=replace_stride_with_dilation[1])
348 | self.layer4, (h, w) = self._make_layer(block, 512, layers[3], stride=2, input_size=(h, w),
349 | dilate=replace_stride_with_dilation[2])
350 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
351 | self.fc = nn.Linear(512 * block.expansion, config.num_classes)
352 | self.flops_fc = 512 * block.expansion * (h * w + config.num_classes)
353 | print('classifier: flops {}'.format(self.flops_fc))
354 |
355 | for m in self.modules():
356 | if isinstance(m, nn.Conv2d):
357 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
358 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
359 | nn.init.constant_(m.weight, 1)
360 | nn.init.constant_(m.bias, 0)
361 | for m in self.modules():
362 | if isinstance(m, AttGen):
363 | nn.init.constant_(m.conv[3].weight, 0)
364 | nn.init.constant_(m.conv[3].bias, 1)
365 |
366 | # Zero-initialize the last BN in each residual branch,
367 | # so that the residual branch starts with zeros, and each residual block behaves like an identity.
368 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
369 | if zero_init_residual:
370 | for m in self.modules():
371 | if isinstance(m, DyBottleneck):
372 | nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type]
373 | elif isinstance(m, DyBasicBlock):
374 | nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type]
375 |
376 | def _make_layer(self, block: Type[Union[DyBasicBlock, DyBottleneck]], planes: int, blocks: int,
377 | stride: int = 1, input_size: tuple = (None, None), dilate: bool = False) -> nn.Sequential:
378 | norm_layer = self._norm_layer
379 | downsample = None
380 | previous_dilation = self.dilation
381 | h, w = input_size
382 | print('input h and w: {} {}'.format(h, w))
383 | if dilate:
384 | self.dilation *= stride
385 | stride = 1
386 | if stride != 1 or self.inplanes != planes * block.expansion:
387 | downsample = nn.Sequential(
388 | conv1x1(self.inplanes, planes * block.expansion, stride),
389 | norm_layer(planes * block.expansion),
390 | )
391 | hd = conv2d_out_dim(h, 1, 0, stride)
392 | wd = conv2d_out_dim(w, 1, 0, stride)
393 | self.flops_downsample += self.inplanes * planes * block.expansion * hd * wd
394 |
395 | layers = []
396 | the_block = block(self.inplanes, planes, stride, downsample, self.groups,
397 | self.base_width, previous_dilation, norm_layer,
398 | input_size=(h, w), config=self.config)
399 | layers.append(the_block)
400 | h, w = the_block.output_size
401 | print('Block: h {}, w {}, flops {}'.format(h, w, the_block.flops_dgc))
402 | self.flops_dgc += the_block.flops_dgc
403 | self.flops_mask += the_block.flops_mask + the_block.flops_att
404 | self.flops_original_extra += the_block.flops_original_extra
405 | self.inplanes = planes * block.expansion
406 | for _ in range(1, blocks):
407 | the_block = block(self.inplanes, planes, groups=self.groups,
408 | base_width=self.base_width, dilation=self.dilation,
409 | norm_layer=norm_layer, input_size=(h, w), config=self.config)
410 | layers.append(the_block)
411 | h, w = the_block.output_size
412 | print('Block: h {}, w {}, flops {}'.format(h, w, the_block.flops_dgc))
413 | self.flops_dgc += the_block.flops_dgc
414 | self.flops_mask += the_block.flops_mask + the_block.flops_att
415 | self.flops_original_extra += the_block.flops_original_extra
416 |
417 | return nn.Sequential(*layers), (h, w)
418 |
419 | def get_flops(self):
420 | flops_main = self.flops_conv1 + self.flops_downsample + self.flops_fc
421 | flops = flops_main + self.flops_dgc - self.flops_original_extra
422 | flops_possible = flops_main + self.flops_dgc * 0.25 + self.flops_mask
423 | return flops, flops_possible, flops_main, self.flops_dgc, self.flops_mask
424 |
425 | def _forward_impl(self, x: Tensor) -> Tensor:
426 | # See note [TorchScript super()]
427 | x = self.conv1(x)
428 | x = self.bn1(x)
429 | x = self.relu(x)
430 | x = self.maxpool(x)
431 |
432 | x = [x, [0, 0]]
433 | x = self.layer1(x)
434 | x = self.layer2(x)
435 | x = self.layer3(x)
436 | x, others = self.layer4(x)
437 |
438 | x = self.avgpool(x)
439 | x = torch.flatten(x, 1)
440 | x = self.fc(x)
441 |
442 | return x, others
443 |
444 | def forward(self, x: Tensor) -> Tensor:
445 | return self._forward_impl(x)
446 |
447 |
448 | def msgc_resnet18(args):
449 | r"""ResNet-18 model from
450 | `"Deep Residual Learning for Image Recognition" `_.
451 | """
452 | config = Config_resnet(args)
453 | model = ResNet(DyBasicBlock, [2, 2, 2, 2], config)
454 | if not args.scratch:
455 | pretrained_dict = load_state_dict_from_url(model_urls['resnet18'], progress=True)
456 | model_dict = model.state_dict()
457 | model_dict.update(pretrained_dict)
458 | model.load_state_dict(model_dict)
459 | print('load pretrained model successfully')
460 |
461 | return model
462 |
463 | def msgc_resnet50(args):
464 | r"""ResNet-50 model from
465 | `"Deep Residual Learning for Image Recognition" `_.
466 | """
467 | config = Config_resnet(args)
468 | model = ResNet(DyBottleneck, [3, 4, 6, 3], config)
469 | if not args.scratch:
470 | pretrained_dict = load_state_dict_from_url(model_urls['resnet50'], progress=True)
471 | model_dict = model.state_dict()
472 | model_dict.update(pretrained_dict)
473 | model.load_state_dict(model_dict)
474 | print('load pretrained model successfully')
475 |
476 | return model
477 |
--------------------------------------------------------------------------------
/logs/msgc_resnet18_att_log.txt:
--------------------------------------------------------------------------------
1 | Flops of dyresnet18: original 1814.073344 M, target 885.267791872 M, possible 559.554176 M, main 137.793536 M, dgc 1680.218624 M, mask 1.705984 M
2 | Flops of dyresnet18: original 1814.073344 M, target 885.267791872 M, possible 559.554176 M, main 137.793536 M, dgc 1680.218624 M, mask 1.705984 M
3 | Epoch 000/120: (62.2060 84.6920) | 1819.2853 M (1679.7858 -0.0000) || train (51.2702 74.6463) | 1700.1066 M (1560.6701 -0.0630)| loss 3.0891 || lr 0.075000-0.015000-0.015000 | Time 2021-08-08 13:05:50
4 | Epoch 000/120: (62.2060 84.6920) | 1819.2853 M (1679.7858 -0.0000) || train (51.1922 74.5891) | 1700.1142 M (1560.6776 -0.0629)| loss 3.0898 || lr 0.075000-0.015000-0.015000 | Time 2021-08-08 13:05:50
5 | Epoch 001/120: (63.3660 85.3500) | 1818.9956 M (1679.4961 -0.0000) || train (57.6789 80.0064) | 1763.3548 M (1623.8657 -0.0104)| loss 2.7848 || lr 0.074987-0.014997-0.014997 | Time 2021-08-08 13:22:17
6 | Epoch 001/120: (63.3660 85.3500) | 1818.9956 M (1679.4961 -0.0000) || train (57.7545 79.9492) | 1763.3571 M (1623.8677 -0.0101)| loss 2.7868 || lr 0.074987-0.014997-0.014997 | Time 2021-08-08 13:22:17
7 | Epoch 002/120: (63.7620 85.6480) | 1801.9855 M (1662.4859 -0.0000) || train (59.3271 81.1725) | 1774.4157 M (1634.9202 -0.0040)| loss 2.7064 || lr 0.074949-0.014990-0.014990 | Time 2021-08-08 13:38:47
8 | Epoch 002/120: (63.7620 85.6480) | 1801.9855 M (1662.4859 -0.0000) || train (59.2985 81.0603) | 1774.4172 M (1634.9216 -0.0039)| loss 2.7096 || lr 0.074949-0.014990-0.014990 | Time 2021-08-08 13:38:47
9 | Epoch 003/120: (63.8880 85.6280) | 1782.9961 M (1643.4966 -0.0000) || train (59.9895 81.5632) | 1759.0335 M (1619.5568 -0.0228)| loss 2.6764 || lr 0.074884-0.014977-0.014977 | Time 2021-08-08 13:55:10
10 | Epoch 003/120: (63.8880 85.6280) | 1782.9961 M (1643.4966 -0.0000) || train (59.9525 81.6062) | 1759.0299 M (1619.5531 -0.0228)| loss 2.6747 || lr 0.074884-0.014977-0.014977 | Time 2021-08-08 13:55:10
11 | Epoch 004/120: (64.1560 85.8240) | 1759.8184 M (1620.3583 -0.0394) || train (60.3245 81.8481) | 1743.5075 M (1604.1323 -0.1244)| loss 2.6545 || lr 0.074795-0.014959-0.014959 | Time 2021-08-08 14:11:42
12 | Epoch 004/120: (64.1560 85.8240) | 1759.8184 M (1620.3583 -0.0394) || train (60.4402 81.9719) | 1743.5051 M (1604.1296 -0.1240)| loss 2.6489 || lr 0.074795-0.014959-0.014959 | Time 2021-08-08 14:11:42
13 | Epoch 005/120: (64.3920 86.0720) | 1739.3336 M (1600.3085 -0.4744) || train (60.6898 82.1135) | 1727.9819 M (1588.8678 -0.3854)| loss 2.6338 || lr 0.074679-0.014936-0.014936 | Time 2021-08-08 14:28:26
14 | Epoch 005/120: (64.3920 86.0720) | 1739.3336 M (1600.3085 -0.4744) || train (60.7994 82.1941) | 1727.9829 M (1588.8686 -0.3852)| loss 2.6295 || lr 0.074679-0.014936-0.014936 | Time 2021-08-08 14:28:26
15 | Epoch 006/120: (64.5380 86.0940) | 1714.6611 M (1576.0079 -0.8463) || train (61.0446 82.3728) | 1712.4972 M (1573.7182 -0.7205)| loss 2.6170 || lr 0.074538-0.014908-0.014908 | Time 2021-08-08 14:44:48
16 | Epoch 006/120: (64.5380 86.0940) | 1714.6611 M (1576.0079 -0.8463) || train (60.8935 82.2904) | 1712.5065 M (1573.7262 -0.7193)| loss 2.6222 || lr 0.074538-0.014908-0.014908 | Time 2021-08-08 14:44:48
17 | Epoch 007/120: (63.2080 85.0300) | 1698.2536 M (1560.0106 -1.2565) || train (61.1723 82.4765) | 1697.1408 M (1558.7784 -1.1371)| loss 2.6071 || lr 0.074372-0.014874-0.014874 | Time 2021-08-08 15:01:17
18 | Epoch 007/120: (63.2080 85.0300) | 1698.2536 M (1560.0106 -1.2565) || train (61.1709 82.4729) | 1697.1481 M (1558.7852 -1.1366)| loss 2.6076 || lr 0.074372-0.014874-0.014874 | Time 2021-08-08 15:01:17
19 | Epoch 008/120: (64.6200 85.9400) | 1680.2317 M (1542.5513 -1.8191) || train (61.4157 82.6327) | 1681.7205 M (1543.8586 -1.6376)| loss 2.5963 || lr 0.074181-0.014836-0.014836 | Time 2021-08-08 15:17:44
20 | Epoch 008/120: (64.6200 85.9400) | 1680.2317 M (1542.5513 -1.8191) || train (61.5084 82.5953) | 1681.7223 M (1543.8591 -1.6363)| loss 2.5956 || lr 0.074181-0.014836-0.014836 | Time 2021-08-08 15:17:44
21 | Epoch 009/120: (65.1020 86.3540) | 1664.6664 M (1527.1620 -1.9951) || train (61.6252 82.7749) | 1666.2813 M (1528.9493 -2.1676)| loss 2.5846 || lr 0.073964-0.014793-0.014793 | Time 2021-08-08 15:34:13
22 | Epoch 009/120: (65.1020 86.3540) | 1664.6664 M (1527.1620 -1.9951) || train (61.6205 82.7604) | 1666.2901 M (1528.9565 -2.1659)| loss 2.5859 || lr 0.073964-0.014793-0.014793 | Time 2021-08-08 15:34:13
23 | Epoch 010/120: (64.9740 86.3320) | 1648.4245 M (1511.9521 -3.0271) || train (61.6230 82.7706) | 1650.8810 M (1514.0855 -2.7041)| loss 2.5836 || lr 0.073722-0.014744-0.014744 | Time 2021-08-08 15:50:33
24 | Epoch 010/120: (64.9740 86.3320) | 1648.4245 M (1511.9521 -3.0271) || train (61.7783 82.8592) | 1650.8800 M (1514.0858 -2.7053)| loss 2.5774 || lr 0.073722-0.014744-0.014744 | Time 2021-08-08 15:50:33
25 | Epoch 011/120: (64.5320 86.2800) | 1632.1802 M (1496.9049 -4.2242) || train (61.9959 83.0408) | 1635.4548 M (1499.5581 -3.6028)| loss 2.5662 || lr 0.073456-0.014691-0.014691 | Time 2021-08-08 16:06:54
26 | Epoch 011/120: (64.5320 86.2800) | 1632.1802 M (1496.9049 -4.2242) || train (61.7516 82.8730) | 1635.4552 M (1499.5565 -3.6009)| loss 2.5743 || lr 0.073456-0.014691-0.014691 | Time 2021-08-08 16:06:54
27 | Epoch 012/120: (65.1240 86.3800) | 1616.6573 M (1483.3297 -6.1719) || train (61.9667 82.9684) | 1619.9711 M (1485.3442 -4.8726)| loss 2.5663 || lr 0.073165-0.014633-0.014633 | Time 2021-08-08 16:23:17
28 | Epoch 012/120: (65.1240 86.3800) | 1616.6573 M (1483.3297 -6.1719) || train (61.9597 83.0648) | 1619.9781 M (1485.3497 -4.8711)| loss 2.5641 || lr 0.073165-0.014633-0.014633 | Time 2021-08-08 16:23:17
29 | Epoch 013/120: (64.9220 86.4160) | 1600.7194 M (1468.7933 -7.5735) || train (61.9341 83.0486) | 1604.4786 M (1471.2526 -6.2735)| loss 2.5630 || lr 0.072849-0.014570-0.014570 | Time 2021-08-08 16:39:39
30 | Epoch 013/120: (64.9220 86.4160) | 1600.7194 M (1468.7933 -7.5735) || train (62.0471 83.1188) | 1604.4816 M (1471.2550 -6.2729)| loss 2.5583 || lr 0.072849-0.014570-0.014570 | Time 2021-08-08 16:39:39
31 | Epoch 014/120: (65.1760 86.6960) | 1583.9503 M (1452.3235 -7.8727) || train (62.1503 83.1783) | 1588.9641 M (1456.2920 -6.8274)| loss 2.5527 || lr 0.072509-0.014502-0.014502 | Time 2021-08-08 16:56:02
32 | Epoch 014/120: (65.1760 86.6980) | 1583.9503 M (1452.3235 -7.8727) || train (62.1299 83.1729) | 1588.9584 M (1456.2882 -6.8293)| loss 2.5545 || lr 0.072509-0.014502-0.014502 | Time 2021-08-08 16:56:02
33 | Epoch 015/120: (65.5060 86.6820) | 1568.4010 M (1437.2344 -8.3330) || train (62.1463 83.2189) | 1573.5152 M (1441.5106 -7.4950)| loss 2.5502 || lr 0.072145-0.014429-0.014429 | Time 2021-08-08 17:12:26
34 | Epoch 015/120: (65.5060 86.6820) | 1568.4010 M (1437.2344 -8.3330) || train (62.1317 83.1917) | 1573.5156 M (1441.5119 -7.4958)| loss 2.5522 || lr 0.072145-0.014429-0.014429 | Time 2021-08-08 17:12:26
35 | Epoch 016/120: (64.8260 86.4460) | 1551.9804 M (1422.7006 -10.2197) || train (62.3553 83.3258) | 1558.0476 M (1427.1396 -8.5915)| loss 2.5424 || lr 0.071758-0.014352-0.014352 | Time 2021-08-08 17:28:50
36 | Epoch 016/120: (64.8260 86.4460) | 1551.9804 M (1422.7006 -10.2197) || train (62.2377 83.1818) | 1558.0501 M (1427.1427 -8.5921)| loss 2.5491 || lr 0.071758-0.014352-0.014352 | Time 2021-08-08 17:28:50
37 | Epoch 017/120: (65.2540 86.6440) | 1536.3320 M (1408.7100 -11.8775) || train (62.3444 83.2888) | 1542.6303 M (1413.2806 -10.1498)| loss 2.5433 || lr 0.071347-0.014269-0.014269 | Time 2021-08-08 17:45:04
38 | Epoch 017/120: (65.2540 86.6440) | 1536.3320 M (1408.7100 -11.8775) || train (62.3576 83.4523) | 1542.6233 M (1413.2763 -10.1526)| loss 2.5370 || lr 0.071347-0.014269-0.014269 | Time 2021-08-08 17:45:04
39 | Epoch 018/120: (65.0140 86.5360) | 1519.9232 M (1394.1269 -13.7032) || train (62.3779 83.3393) | 1527.1655 M (1399.5340 -11.8681)| loss 2.5399 || lr 0.070913-0.014183-0.014183 | Time 2021-08-08 18:01:37
40 | Epoch 018/120: (65.0140 86.5360) | 1519.9232 M (1394.1268 -13.7032) || train (62.4475 83.4098) | 1527.1689 M (1399.5366 -11.8672)| loss 2.5375 || lr 0.070913-0.014183-0.014183 | Time 2021-08-08 18:01:37
41 | Epoch 019/120: (64.9940 86.5000) | 1503.6865 M (1380.2061 -16.0191) || train (62.4326 83.4287) | 1511.7126 M (1385.9570 -13.7439)| loss 2.5344 || lr 0.070456-0.014091-0.014091 | Time 2021-08-08 18:18:06
42 | Epoch 019/120: (64.9940 86.5000) | 1503.6865 M (1380.2061 -16.0191) || train (62.5762 83.4943) | 1511.7084 M (1385.9540 -13.7452)| loss 2.5305 || lr 0.070456-0.014091-0.014091 | Time 2021-08-08 18:18:06
43 | Epoch 020/120: (65.6100 86.5960) | 1489.2198 M (1366.6618 -16.9414) || train (62.5473 83.4293) | 1496.2486 M (1371.9997 -15.2507)| loss 2.5330 || lr 0.069976-0.013995-0.013995 | Time 2021-08-08 18:34:48
44 | Epoch 020/120: (65.6100 86.5960) | 1489.2198 M (1366.6617 -16.9414) || train (62.5837 83.4560) | 1496.2463 M (1371.9982 -15.2515)| loss 2.5305 || lr 0.069976-0.013995-0.013995 | Time 2021-08-08 18:34:48
45 | Epoch 021/120: (65.7440 86.7500) | 1472.6098 M (1350.4333 -17.3230) || train (62.5526 83.5208) | 1480.7751 M (1357.2739 -15.9984)| loss 2.5291 || lr 0.069474-0.013895-0.013895 | Time 2021-08-08 18:51:38
46 | Epoch 021/120: (65.7440 86.7500) | 1472.6098 M (1350.4333 -17.3230) || train (62.5771 83.4521) | 1480.7768 M (1357.2756 -15.9983)| loss 2.5313 || lr 0.069474-0.013895-0.013895 | Time 2021-08-08 18:51:38
47 | Epoch 022/120: (65.2580 86.5260) | 1456.0788 M (1334.7658 -18.1866) || train (62.5445 83.4506) | 1465.2974 M (1342.6614 -16.8636)| loss 2.5312 || lr 0.068950-0.013790-0.013790 | Time 2021-08-08 19:08:20
48 | Epoch 022/120: (65.2580 86.5280) | 1456.0788 M (1334.7658 -18.1866) || train (62.7154 83.5252) | 1465.2985 M (1342.6616 -16.8626)| loss 2.5251 || lr 0.068950-0.013790-0.013790 | Time 2021-08-08 19:08:20
49 | Epoch 023/120: (65.4000 86.7220) | 1440.8966 M (1321.5711 -20.1740) || train (62.7857 83.6540) | 1449.8606 M (1328.3976 -18.0365)| loss 2.5207 || lr 0.068405-0.013681-0.013681 | Time 2021-08-08 19:24:55
50 | Epoch 023/120: (65.4000 86.7220) | 1440.8966 M (1321.5711 -20.1740) || train (62.7231 83.5776) | 1449.8553 M (1328.3961 -18.0403)| loss 2.5230 || lr 0.068405-0.013681-0.013681 | Time 2021-08-08 19:24:55
51 | Epoch 024/120: (65.6160 86.8860) | 1426.0058 M (1308.7025 -22.1961) || train (62.7563 83.5600) | 1434.3923 M (1314.9705 -20.0777)| loss 2.5222 || lr 0.067838-0.013568-0.013568 | Time 2021-08-08 19:41:36
52 | Epoch 024/120: (65.6160 86.8860) | 1426.0058 M (1308.7025 -22.1961) || train (62.7947 83.6338) | 1434.3934 M (1314.9719 -20.0780)| loss 2.5196 || lr 0.067838-0.013568-0.013568 | Time 2021-08-08 19:41:36
53 | Epoch 025/120: (65.4340 86.6420) | 1410.1949 M (1295.2063 -24.5109) || train (62.7371 83.6618) | 1418.9143 M (1301.8832 -22.4684)| loss 2.5195 || lr 0.067251-0.013450-0.013450 | Time 2021-08-08 19:58:30
54 | Epoch 025/120: (65.4340 86.6420) | 1410.1949 M (1295.2063 -24.5109) || train (62.7826 83.6089) | 1418.9172 M (1301.8864 -22.4687)| loss 2.5189 || lr 0.067251-0.013450-0.013450 | Time 2021-08-08 19:58:30
55 | Epoch 026/120: (65.2480 86.6760) | 1395.5504 M (1282.7426 -26.6917) || train (62.8664 83.6321) | 1403.4911 M (1288.7807 -24.7892)| loss 2.5170 || lr 0.066643-0.013329-0.013329 | Time 2021-08-08 20:18:41
56 | Epoch 026/120: (65.2480 86.6760) | 1395.5504 M (1282.7426 -26.6917) || train (62.8010 83.6346) | 1403.4891 M (1288.7785 -24.7888)| loss 2.5179 || lr 0.066643-0.013329-0.013329 | Time 2021-08-08 20:18:41
57 | Epoch 027/120: (64.9300 86.7320) | 1380.6097 M (1269.6626 -28.5524) || train (62.7627 83.6396) | 1387.9858 M (1275.3153 -26.8289)| loss 2.5182 || lr 0.066015-0.013203-0.013203 | Time 2021-08-08 20:39:18
58 | Epoch 027/120: (64.9300 86.7320) | 1380.6097 M (1269.6625 -28.5524) || train (62.9185 83.6747) | 1387.9863 M (1275.3165 -26.8297)| loss 2.5152 || lr 0.066015-0.013203-0.013203 | Time 2021-08-08 20:39:18
59 | Epoch 028/120: (65.4720 86.7560) | 1365.1545 M (1255.8299 -30.1749) || train (62.8807 83.6749) | 1372.4773 M (1261.4743 -28.4964)| loss 2.5158 || lr 0.065368-0.013074-0.013074 | Time 2021-08-08 20:58:29
60 | Epoch 028/120: (65.4720 86.7580) | 1365.1545 M (1255.8299 -30.1749) || train (62.8283 83.6836) | 1372.4760 M (1261.4755 -28.4990)| loss 2.5143 || lr 0.065368-0.013074-0.013074 | Time 2021-08-08 20:58:29
61 | Epoch 029/120: (65.5700 86.7540) | 1349.4659 M (1242.4237 -32.4574) || train (62.8144 83.6257) | 1357.0128 M (1248.0964 -30.5831)| loss 2.5161 || lr 0.064702-0.012940-0.012940 | Time 2021-08-08 21:15:33
62 | Epoch 029/120: (65.5700 86.7540) | 1349.4659 M (1242.4237 -32.4574) || train (62.9043 83.7163) | 1357.0184 M (1248.1022 -30.5834)| loss 2.5135 || lr 0.064702-0.012940-0.012940 | Time 2021-08-08 21:15:33
63 | Epoch 030/120: (65.3380 86.4460) | 1333.3040 M (1227.8139 -34.0095) || train (62.8857 83.6908) | 1341.5363 M (1234.7715 -32.7347)| loss 2.5155 || lr 0.064017-0.012803-0.012803 | Time 2021-08-08 21:32:51
64 | Epoch 030/120: (65.3380 86.4460) | 1333.3040 M (1227.8139 -34.0095) || train (62.9479 83.7946) | 1341.5367 M (1234.7723 -32.7351)| loss 2.5099 || lr 0.064017-0.012803-0.012803 | Time 2021-08-08 21:32:51
65 | Epoch 031/120: (64.6240 86.1220) | 1318.5027 M (1214.8549 -35.8517) || train (62.8420 83.7542) | 1326.0263 M (1220.6059 -34.0791)| loss 2.5123 || lr 0.063313-0.012663-0.012663 | Time 2021-08-08 21:50:00
66 | Epoch 031/120: (64.6200 86.1220) | 1318.5027 M (1214.8549 -35.8517) || train (63.0209 83.7703) | 1326.0336 M (1220.6115 -34.0775)| loss 2.5084 || lr 0.063313-0.012663-0.012663 | Time 2021-08-08 21:50:00
67 | Epoch 032/120: (65.7000 87.1660) | 1302.7112 M (1201.6898 -38.4782) || train (62.9199 83.6743) | 1310.5495 M (1207.2354 -36.1853)| loss 2.5128 || lr 0.062592-0.012518-0.012518 | Time 2021-08-08 22:07:10
68 | Epoch 032/120: (65.7000 87.1660) | 1302.7111 M (1201.6898 -38.4782) || train (62.9045 83.6992) | 1310.5499 M (1207.2351 -36.1847)| loss 2.5126 || lr 0.062592-0.012518-0.012518 | Time 2021-08-08 22:07:10
69 | Epoch 033/120: (65.1400 86.6400) | 1286.5323 M (1188.3244 -41.2916) || train (62.9468 83.7481) | 1295.0441 M (1194.3329 -38.7884)| loss 2.5106 || lr 0.061854-0.012371-0.012371 | Time 2021-08-08 22:24:31
70 | Epoch 033/120: (65.1400 86.6400) | 1286.5323 M (1188.3244 -41.2916) || train (62.8706 83.6715) | 1295.0417 M (1194.3322 -38.7900)| loss 2.5132 || lr 0.061854-0.012371-0.012371 | Time 2021-08-08 22:24:31
71 | Epoch 034/120: (65.0380 86.7680) | 1271.6406 M (1174.8917 -42.7506) || train (62.9532 83.7323) | 1279.5591 M (1181.2209 -41.1613)| loss 2.5080 || lr 0.061100-0.012220-0.012220 | Time 2021-08-08 22:41:56
72 | Epoch 034/120: (65.0380 86.7680) | 1271.6406 M (1174.8917 -42.7506) || train (63.0216 83.8201) | 1279.5585 M (1181.2201 -41.1612)| loss 2.5058 || lr 0.061100-0.012220-0.012220 | Time 2021-08-08 22:41:56
73 | Epoch 035/120: (65.3580 86.7040) | 1256.1726 M (1160.3907 -43.7176) || train (62.9056 83.7737) | 1264.0860 M (1166.9377 -42.3512)| loss 2.5090 || lr 0.060329-0.012066-0.012066 | Time 2021-08-08 22:59:08
74 | Epoch 035/120: (65.3580 86.7040) | 1256.1726 M (1160.3907 -43.7176) || train (62.9427 83.8137) | 1264.0781 M (1166.9309 -42.3523)| loss 2.5075 || lr 0.060329-0.012066-0.012066 | Time 2021-08-08 22:59:08
75 | Epoch 036/120: (65.8740 86.8980) | 1240.2415 M (1145.3682 -44.6262) || train (62.9394 83.8043) | 1248.6159 M (1152.5142 -43.3979)| loss 2.5076 || lr 0.059542-0.011908-0.011908 | Time 2021-08-08 23:16:21
76 | Epoch 036/120: (65.8740 86.8980) | 1240.2415 M (1145.3683 -44.6262) || train (62.7936 83.7292) | 1248.6152 M (1152.5140 -43.3983)| loss 2.5121 || lr 0.059542-0.011908-0.011908 | Time 2021-08-08 23:16:21
77 | Epoch 037/120: (65.2920 86.5580) | 1224.8024 M (1131.6305 -46.3277) || train (62.9174 83.7500) | 1233.1366 M (1138.2428 -44.6058)| loss 2.5103 || lr 0.058740-0.011748-0.011748 | Time 2021-08-08 23:33:20
78 | Epoch 037/120: (65.2920 86.5580) | 1224.8024 M (1131.6305 -46.3277) || train (62.8196 83.7659) | 1233.1299 M (1138.2361 -44.6057)| loss 2.5120 || lr 0.058740-0.011748-0.011748 | Time 2021-08-08 23:33:20
79 | Epoch 038/120: (64.7780 86.4080) | 1209.5446 M (1118.1272 -48.0821) || train (62.9099 83.6968) | 1217.6041 M (1124.6848 -46.5803)| loss 2.5098 || lr 0.057924-0.011585-0.011585 | Time 2021-08-08 23:50:12
80 | Epoch 038/120: (64.7780 86.4080) | 1209.5446 M (1118.1272 -48.0821) || train (62.9034 83.7386) | 1217.6058 M (1124.6863 -46.5801)| loss 2.5115 || lr 0.057924-0.011585-0.011585 | Time 2021-08-08 23:50:12
81 | Epoch 039/120: (64.7720 86.3920) | 1193.2301 M (1103.4086 -49.6780) || train (62.7022 83.6485) | 1202.1267 M (1111.2916 -48.6644)| loss 2.5148 || lr 0.057094-0.011419-0.011419 | Time 2021-08-09 00:07:05
82 | Epoch 039/120: (64.7720 86.3920) | 1193.2301 M (1103.4086 -49.6780) || train (62.8071 83.6655) | 1202.1274 M (1111.2906 -48.6627)| loss 2.5143 || lr 0.057094-0.011419-0.011419 | Time 2021-08-09 00:07:05
83 | Epoch 040/120: (65.4140 86.6160) | 1178.5929 M (1089.2771 -50.1838) || train (62.8146 83.7155) | 1186.6159 M (1096.5373 -49.4209)| loss 2.5132 || lr 0.056250-0.011250-0.011250 | Time 2021-08-09 00:24:09
84 | Epoch 040/120: (65.4140 86.6160) | 1178.5929 M (1089.2771 -50.1838) || train (62.8452 83.7569) | 1186.6250 M (1096.5444 -49.4189)| loss 2.5106 || lr 0.056250-0.011250-0.011250 | Time 2021-08-09 00:24:09
85 | Epoch 041/120: (65.4280 86.6320) | 1160.6459 M (1072.2181 -51.0717) || train (62.8466 83.7517) | 1171.1130 M (1080.9042 -49.2907)| loss 2.5105 || lr 0.055393-0.011079-0.011079 | Time 2021-08-09 00:41:09
86 | Epoch 041/120: (65.4280 86.6320) | 1160.6459 M (1072.2181 -51.0717) || train (62.8695 83.7199) | 1171.1012 M (1080.8960 -49.2943)| loss 2.5123 || lr 0.055393-0.011079-0.011079 | Time 2021-08-09 00:41:09
87 | Epoch 042/120: (65.5460 86.8180) | 1144.5772 M (1059.7924 -54.7146) || train (62.8881 83.6643) | 1155.5972 M (1067.0292 -50.9314)| loss 2.5122 || lr 0.054525-0.010905-0.010905 | Time 2021-08-09 00:58:02
88 | Epoch 042/120: (65.5480 86.8180) | 1144.5772 M (1059.7924 -54.7146) || train (62.8853 83.7494) | 1155.5979 M (1067.0320 -50.9337)| loss 2.5091 || lr 0.054525-0.010905-0.010905 | Time 2021-08-09 00:58:02
89 | Epoch 043/120: (65.0700 86.4960) | 1130.9209 M (1047.2711 -55.8497) || train (62.8651 83.7017) | 1140.1330 M (1055.1767 -54.5432)| loss 2.5137 || lr 0.053644-0.010729-0.010729 | Time 2021-08-09 01:14:59
90 | Epoch 043/120: (65.0720 86.4960) | 1130.9209 M (1047.2711 -55.8497) || train (62.7741 83.6499) | 1140.1323 M (1055.1767 -54.5440)| loss 2.5145 || lr 0.053644-0.010729-0.010729 | Time 2021-08-09 01:14:59
91 | Epoch 044/120: (65.5700 86.7640) | 1114.7530 M (1033.2390 -57.9855) || train (62.6336 83.5634) | 1124.6560 M (1041.3716 -56.2151)| loss 2.5187 || lr 0.052753-0.010551-0.010551 | Time 2021-08-09 01:32:01
92 | Epoch 044/120: (65.5700 86.7640) | 1114.7530 M (1033.2390 -57.9855) || train (62.7797 83.6089) | 1124.6555 M (1041.3724 -56.2164)| loss 2.5169 || lr 0.052753-0.010551-0.010551 | Time 2021-08-09 01:32:01
93 | Epoch 045/120: (65.4420 86.7860) | 1098.6362 M (1018.3725 -59.2358) || train (62.6728 83.6039) | 1109.1865 M (1027.4542 -57.7672)| loss 2.5176 || lr 0.051851-0.010370-0.010370 | Time 2021-08-09 01:49:01
94 | Epoch 045/120: (65.4420 86.7860) | 1098.6362 M (1018.3725 -59.2358) || train (62.5884 83.5672) | 1109.1751 M (1027.4457 -57.7701)| loss 2.5220 || lr 0.051851-0.010370-0.010370 | Time 2021-08-09 01:49:01
95 | Epoch 046/120: (65.2220 86.6480) | 1083.7020 M (1005.3198 -61.1174) || train (62.5295 83.5193) | 1093.7137 M (1013.4780 -59.2638)| loss 2.5246 || lr 0.050939-0.010188-0.010188 | Time 2021-08-09 02:06:05
96 | Epoch 046/120: (65.2220 86.6480) | 1083.7020 M (1005.3198 -61.1174) || train (62.4789 83.4693) | 1093.6977 M (1013.4652 -59.2671)| loss 2.5263 || lr 0.050939-0.010188-0.010188 | Time 2021-08-09 02:06:05
97 | Epoch 047/120: (64.9240 86.4180) | 1067.5583 M (989.5527 -61.4939) || train (62.4880 83.4890) | 1078.1707 M (999.0818 -60.4106)| loss 2.5248 || lr 0.050018-0.010004-0.010004 | Time 2021-08-09 02:23:11
98 | Epoch 047/120: (64.9240 86.4180) | 1067.5583 M (989.5527 -61.4939) || train (62.4168 83.4434) | 1078.1804 M (999.0920 -60.4111)| loss 2.5277 || lr 0.050018-0.010004-0.010004 | Time 2021-08-09 02:23:11
99 | Epoch 048/120: (65.3400 86.4320) | 1050.8837 M (974.4984 -63.1142) || train (62.4752 83.5232) | 1062.7104 M (984.4012 -61.1903)| loss 2.5241 || lr 0.049088-0.009818-0.009818 | Time 2021-08-09 02:40:18
100 | Epoch 048/120: (65.3400 86.4320) | 1050.8837 M (974.4984 -63.1143) || train (62.4269 83.4098) | 1062.7047 M (984.3977 -61.1924)| loss 2.5277 || lr 0.049088-0.009818-0.009818 | Time 2021-08-09 02:40:18
101 | Epoch 049/120: (65.2280 86.6640) | 1036.4631 M (960.1469 -63.1833) || train (62.4577 83.4006) | 1047.2134 M (969.5895 -61.8756)| loss 2.5287 || lr 0.048151-0.009630-0.009630 | Time 2021-08-09 02:57:28
102 | Epoch 049/120: (65.2280 86.6640) | 1036.4631 M (960.1469 -63.1833) || train (62.5347 83.4677) | 1047.2040 M (969.5844 -61.8799)| loss 2.5245 || lr 0.048151-0.009630-0.009630 | Time 2021-08-09 02:57:28
103 | Epoch 050/120: (65.2800 86.6000) | 1021.5507 M (947.9511 -65.8999) || train (62.2585 83.3567) | 1031.7798 M (955.6668 -63.3865)| loss 2.5339 || lr 0.047206-0.009441-0.009441 | Time 2021-08-09 03:14:34
104 | Epoch 050/120: (65.2800 86.6000) | 1021.5507 M (947.9511 -65.8999) || train (62.4699 83.4343) | 1031.7595 M (955.6513 -63.3913)| loss 2.5265 || lr 0.047206-0.009441-0.009441 | Time 2021-08-09 03:14:34
105 | Epoch 051/120: (65.0920 86.4980) | 1004.4803 M (933.1932 -68.2124) || train (62.3066 83.3385) | 1016.3108 M (942.6961 -65.8848)| loss 2.5332 || lr 0.046254-0.009251-0.009251 | Time 2021-08-09 03:31:38
106 | Epoch 051/120: (65.0920 86.4980) | 1004.4803 M (933.1932 -68.2124) || train (62.1953 83.3243) | 1016.3028 M (942.6905 -65.8871)| loss 2.5351 || lr 0.046254-0.009251-0.009251 | Time 2021-08-09 03:31:38
107 | Epoch 052/120: (65.2900 86.5900) | 989.6105 M (921.3537 -71.2428) || train (62.1591 83.3134) | 1000.8552 M (929.6837 -68.3280)| loss 2.5366 || lr 0.045297-0.009059-0.009059 | Time 2021-08-09 03:48:32
108 | Epoch 052/120: (65.2900 86.5900) | 989.6105 M (921.3537 -71.2428) || train (62.2298 83.3020) | 1000.8371 M (929.6712 -68.3337)| loss 2.5368 || lr 0.045297-0.009059-0.009059 | Time 2021-08-09 03:48:32
109 | Epoch 053/120: (65.2840 86.6480) | 973.5525 M (907.9390 -73.8860) || train (62.0426 83.1811) | 985.4187 M (916.9606 -71.0414)| loss 2.5416 || lr 0.044334-0.008867-0.008867 | Time 2021-08-09 04:05:25
110 | Epoch 053/120: (65.2840 86.6480) | 973.5525 M (907.9390 -73.8860) || train (62.0301 83.1838) | 985.4090 M (916.9536 -71.0441)| loss 2.5413 || lr 0.044334-0.008867-0.008867 | Time 2021-08-09 04:05:25
111 | Epoch 054/120: (64.8200 86.2720) | 959.0674 M (895.0402 -75.4723) || train (61.8189 83.1067) | 969.9182 M (903.4812 -73.0625)| loss 2.5499 || lr 0.043366-0.008673-0.008673 | Time 2021-08-09 04:22:20
112 | Epoch 054/120: (64.8200 86.2720) | 959.0675 M (895.0402 -75.4723) || train (61.9635 83.0719) | 969.9128 M (903.4783 -73.0651)| loss 2.5471 || lr 0.043366-0.008673-0.008673 | Time 2021-08-09 04:22:20
113 | Epoch 055/120: (64.6720 86.0800) | 943.1995 M (880.4347 -76.7348) || train (61.9066 83.0823) | 954.4658 M (889.9562 -74.9899)| loss 2.5484 || lr 0.042395-0.008479-0.008479 | Time 2021-08-09 04:39:25
114 | Epoch 055/120: (64.6720 86.0760) | 943.1995 M (880.4347 -76.7348) || train (61.9132 83.0637) | 954.4557 M (889.9502 -74.9940)| loss 2.5485 || lr 0.042395-0.008479-0.008479 | Time 2021-08-09 04:39:25
115 | Epoch 056/120: (64.6240 86.2760) | 928.4600 M (867.2949 -78.3343) || train (61.8489 83.0498) | 939.0102 M (875.6647 -76.1540)| loss 2.5501 || lr 0.041420-0.008284-0.008284 | Time 2021-08-09 04:56:20
116 | Epoch 056/120: (64.6240 86.2760) | 928.4600 M (867.2949 -78.3343) || train (61.8314 82.9980) | 939.0108 M (875.6640 -76.1526)| loss 2.5512 || lr 0.041420-0.008284-0.008284 | Time 2021-08-09 04:56:20
117 | Epoch 057/120: (64.1840 86.1060) | 913.7318 M (855.3192 -81.0869) || train (61.6987 82.9541) | 923.5476 M (862.8424 -78.7943)| loss 2.5545 || lr 0.040442-0.008088-0.008088 | Time 2021-08-09 05:13:17
118 | Epoch 057/120: (64.1840 86.1060) | 913.7318 M (855.3193 -81.0869) || train (61.8042 82.9702) | 923.5539 M (862.8449 -78.7905)| loss 2.5515 || lr 0.040442-0.008088-0.008088 | Time 2021-08-09 05:13:17
119 | Epoch 058/120: (64.6160 86.3660) | 897.3507 M (842.2591 -84.4079) || train (61.6633 82.8583) | 908.1363 M (849.7695 -81.1327)| loss 2.5571 || lr 0.039463-0.007893-0.007893 | Time 2021-08-09 05:30:25
120 | Epoch 058/120: (64.6160 86.3660) | 897.3507 M (842.2591 -84.4079) || train (61.6113 82.8497) | 908.1459 M (849.7784 -81.1321)| loss 2.5607 || lr 0.039463-0.007893-0.007893 | Time 2021-08-09 05:30:25
121 | Epoch 059/120: (63.8080 85.7800) | 882.0571 M (829.3016 -86.7440) || train (61.5782 82.7824) | 892.6912 M (837.5756 -84.3839)| loss 2.5621 || lr 0.038482-0.007696-0.007696 | Time 2021-08-09 05:47:19
122 | Epoch 059/120: (63.8080 85.7800) | 882.0571 M (829.3016 -86.7440) || train (61.4135 82.8002) | 892.6942 M (837.5760 -84.3813)| loss 2.5664 || lr 0.038482-0.007696-0.007696 | Time 2021-08-09 05:47:19
123 | Epoch 060/120: (65.0060 86.3080) | 881.1653 M (826.8116 -85.1458) || train (61.6778 82.9843) | 884.8437 M (830.0076 -84.6634)| loss 2.5548 || lr 0.037500-0.007500-0.007500 | Time 2021-08-09 06:04:10
124 | Epoch 060/120: (65.0060 86.3080) | 881.1653 M (826.8115 -85.1458) || train (61.8005 82.9292) | 884.8443 M (830.0085 -84.6637)| loss 2.5549 || lr 0.037500-0.007500-0.007500 | Time 2021-08-09 06:04:10
125 | Epoch 061/120: (65.0660 86.6100) | 881.2256 M (825.6330 -83.9069) || train (62.2293 83.3383) | 884.6965 M (828.6012 -83.4043)| loss 2.5332 || lr 0.036518-0.007304-0.007304 | Time 2021-08-09 06:21:00
126 | Epoch 061/120: (65.0660 86.6100) | 881.2256 M (825.6330 -83.9069) || train (62.2897 83.3207) | 884.6879 M (828.5928 -83.4044)| loss 2.5309 || lr 0.036518-0.007304-0.007304 | Time 2021-08-09 06:21:00
127 | Epoch 062/120: (65.1740 86.7860) | 880.9854 M (824.7291 -83.2432) || train (62.7370 83.4988) | 884.6048 M (827.4951 -82.3898)| loss 2.5152 || lr 0.035537-0.007107-0.007107 | Time 2021-08-09 06:37:54
128 | Epoch 062/120: (65.1740 86.7860) | 880.9854 M (824.7291 -83.2432) || train (62.6411 83.5854) | 884.6065 M (827.4989 -82.3919)| loss 2.5145 || lr 0.035537-0.007107-0.007107 | Time 2021-08-09 06:37:54
129 | Epoch 063/120: (65.9340 86.9100) | 880.7662 M (823.8795 -82.6128) || train (63.0342 83.8148) | 884.5836 M (826.9018 -81.8177)| loss 2.4988 || lr 0.034558-0.006912-0.006912 | Time 2021-08-09 06:54:49
130 | Epoch 063/120: (65.9340 86.9100) | 880.7663 M (823.8795 -82.6128) || train (62.9963 83.7697) | 884.5657 M (826.8896 -81.8235)| loss 2.5004 || lr 0.034558-0.006912-0.006912 | Time 2021-08-09 06:54:49
131 | Epoch 064/120: (66.1040 86.9620) | 881.2893 M (824.2693 -82.4796) || train (63.3712 83.9838) | 884.5463 M (826.5235 -81.4767)| loss 2.4866 || lr 0.033580-0.006716-0.006716 | Time 2021-08-09 07:11:47
132 | Epoch 064/120: (66.1040 86.9620) | 881.2892 M (824.2693 -82.4796) || train (63.2755 83.9756) | 884.5517 M (826.5253 -81.4732)| loss 2.4908 || lr 0.033580-0.006716-0.006716 | Time 2021-08-09 07:11:47
133 | Epoch 065/120: (65.8120 87.0800) | 881.1290 M (823.1058 -81.4763) || train (63.5670 84.1413) | 884.5211 M (825.9774 -80.9558)| loss 2.4793 || lr 0.032605-0.006521-0.006521 | Time 2021-08-09 07:28:47
134 | Epoch 065/120: (65.8120 87.0800) | 881.1290 M (823.1058 -81.4763) || train (63.6124 84.1762) | 884.5105 M (825.9720 -80.9610)| loss 2.4768 || lr 0.032605-0.006521-0.006521 | Time 2021-08-09 07:28:47
135 | Epoch 066/120: (65.6500 86.8640) | 881.3160 M (823.1699 -81.3534) || train (63.7949 84.2918) | 884.4833 M (825.4252 -80.4415)| loss 2.4684 || lr 0.031634-0.006327-0.006327 | Time 2021-08-09 07:45:57
136 | Epoch 066/120: (65.6500 86.8660) | 881.3160 M (823.1699 -81.3534) || train (63.8797 84.2929) | 884.4910 M (825.4317 -80.4402)| loss 2.4675 || lr 0.031634-0.006327-0.006327 | Time 2021-08-09 07:45:57
137 | Epoch 067/120: (66.4160 87.3860) | 880.9294 M (822.7741 -81.3443) || train (64.2214 84.5001) | 884.4892 M (825.3478 -80.3582)| loss 2.4546 || lr 0.030666-0.006133-0.006133 | Time 2021-08-09 08:02:58
138 | Epoch 067/120: (66.4160 87.3860) | 880.9294 M (822.7741 -81.3443) || train (64.0236 84.4952) | 884.4978 M (825.3519 -80.3536)| loss 2.4581 || lr 0.030666-0.006133-0.006133 | Time 2021-08-09 08:02:58
139 | Epoch 068/120: (66.9140 87.5380) | 881.3731 M (823.0256 -81.1520) || train (64.3191 84.6262) | 884.4840 M (825.2068 -80.2223)| loss 2.4493 || lr 0.029703-0.005941-0.005941 | Time 2021-08-09 08:20:01
140 | Epoch 068/120: (66.9140 87.5380) | 881.3731 M (823.0256 -81.1520) || train (64.3713 84.5892) | 884.4851 M (825.2124 -80.2269)| loss 2.4487 || lr 0.029703-0.005941-0.005941 | Time 2021-08-09 08:20:01
141 | Epoch 069/120: (66.8240 87.6020) | 881.1854 M (822.8965 -81.2106) || train (64.5928 84.8404) | 884.4621 M (825.1498 -80.1872)| loss 2.4378 || lr 0.028746-0.005749-0.005749 | Time 2021-08-09 08:37:17
142 | Epoch 069/120: (66.8240 87.6020) | 881.1854 M (822.8965 -81.2106) || train (64.5589 84.7499) | 884.4643 M (825.1494 -80.1847)| loss 2.4402 || lr 0.028746-0.005749-0.005749 | Time 2021-08-09 08:37:17
143 | Epoch 070/120: (67.0800 87.7700) | 880.6215 M (822.5847 -81.4627) || train (64.7192 84.9423) | 884.4590 M (825.1641 -80.2046)| loss 2.4308 || lr 0.027794-0.005559-0.005559 | Time 2021-08-09 08:54:22
144 | Epoch 070/120: (67.0800 87.7700) | 880.6215 M (822.5847 -81.4627) || train (64.6562 84.8751) | 884.4378 M (825.1486 -80.2103)| loss 2.4329 || lr 0.027794-0.005559-0.005559 | Time 2021-08-09 08:54:22
145 | Epoch 071/120: (66.9800 87.7680) | 881.2271 M (822.6509 -80.9234) || train (64.8324 85.0207) | 884.4690 M (825.0920 -80.1225)| loss 2.4243 || lr 0.026849-0.005370-0.005370 | Time 2021-08-09 09:11:16
146 | Epoch 071/120: (66.9800 87.7680) | 881.2271 M (822.6509 -80.9234) || train (64.8845 84.9611) | 884.4705 M (825.0924 -80.1214)| loss 2.4249 || lr 0.026849-0.005370-0.005370 | Time 2021-08-09 09:11:16
147 | Epoch 072/120: (67.3980 88.0180) | 881.8454 M (823.1945 -80.8486) || train (65.0758 85.1081) | 884.4537 M (824.9648 -80.0106)| loss 2.4155 || lr 0.025912-0.005182-0.005182 | Time 2021-08-09 09:28:32
148 | Epoch 072/120: (67.3980 88.0180) | 881.8454 M (823.1945 -80.8486) || train (65.1026 85.1459) | 884.4517 M (824.9618 -80.0096)| loss 2.4166 || lr 0.025912-0.005182-0.005182 | Time 2021-08-09 09:28:32
149 | Epoch 073/120: (67.0720 87.6920) | 880.7720 M (822.4776 -81.2050) || train (65.3630 85.2861) | 884.4448 M (824.9623 -80.0170)| loss 2.4062 || lr 0.024982-0.004996-0.004996 | Time 2021-08-09 09:45:44
150 | Epoch 073/120: (67.0720 87.6920) | 880.7720 M (822.4775 -81.2050) || train (65.3298 85.3143) | 884.4515 M (824.9643 -80.0123)| loss 2.4053 || lr 0.024982-0.004996-0.004996 | Time 2021-08-09 09:45:44
151 | Epoch 074/120: (66.8940 87.7780) | 881.3488 M (822.7333 -80.8840) || train (65.5694 85.4270) | 884.4203 M (824.9085 -79.9877)| loss 2.3981 || lr 0.024061-0.004812-0.004812 | Time 2021-08-09 10:03:00
152 | Epoch 074/120: (66.8960 87.7800) | 881.3488 M (822.7333 -80.8840) || train (65.5346 85.4422) | 884.4346 M (824.9203 -79.9852)| loss 2.3967 || lr 0.024061-0.004812-0.004812 | Time 2021-08-09 10:03:00
153 | Epoch 075/120: (67.6300 87.8400) | 880.5568 M (822.0845 -81.0273) || train (65.7523 85.5266) | 884.4307 M (824.8664 -79.9352)| loss 2.3905 || lr 0.023149-0.004630-0.004630 | Time 2021-08-09 10:20:12
154 | Epoch 075/120: (67.6280 87.8400) | 880.5568 M (822.0845 -81.0273) || train (65.7826 85.5418) | 884.4399 M (824.8714 -79.9311)| loss 2.3901 || lr 0.023149-0.004630-0.004630 | Time 2021-08-09 10:20:12
155 | Epoch 076/120: (67.8160 88.0240) | 880.9884 M (822.5284 -81.0395) || train (65.9262 85.6504) | 884.4144 M (824.8059 -79.8910)| loss 2.3826 || lr 0.022247-0.004449-0.004449 | Time 2021-08-09 10:37:31
156 | Epoch 076/120: (67.8160 88.0240) | 880.9884 M (822.5284 -81.0395) || train (65.9637 85.6679) | 884.4419 M (824.8277 -79.8853)| loss 2.3813 || lr 0.022247-0.004449-0.004449 | Time 2021-08-09 10:37:31
157 | Epoch 077/120: (67.7860 88.1720) | 881.4669 M (822.7918 -80.8244) || train (66.1909 85.8290) | 884.4188 M (824.7681 -79.8487)| loss 2.3717 || lr 0.021356-0.004271-0.004271 | Time 2021-08-09 10:54:41
158 | Epoch 077/120: (67.7860 88.1720) | 881.4669 M (822.7917 -80.8244) || train (66.1646 85.7858) | 884.4338 M (824.7806 -79.8463)| loss 2.3737 || lr 0.021356-0.004271-0.004271 | Time 2021-08-09 10:54:41
159 | Epoch 078/120: (67.7920 88.0720) | 881.2287 M (822.6208 -80.8916) || train (66.3863 86.0410) | 884.4200 M (824.7003 -79.7798)| loss 2.3626 || lr 0.020475-0.004095-0.004095 | Time 2021-08-09 11:11:52
160 | Epoch 078/120: (67.7900 88.0720) | 881.2287 M (822.6208 -80.8916) || train (66.3246 85.9264) | 884.4203 M (824.7005 -79.7797)| loss 2.3663 || lr 0.020475-0.004095-0.004095 | Time 2021-08-09 11:11:52
161 | Epoch 079/120: (68.0020 88.1460) | 880.7295 M (822.1233 -80.8934) || train (66.4811 85.9773) | 884.4019 M (824.6680 -79.7656)| loss 2.3601 || lr 0.019607-0.003921-0.003921 | Time 2021-08-09 11:29:07
162 | Epoch 079/120: (68.0020 88.1460) | 880.7295 M (822.1234 -80.8934) || train (66.5807 86.0493) | 884.3987 M (824.6648 -79.7656)| loss 2.3568 || lr 0.019607-0.003921-0.003921 | Time 2021-08-09 11:29:07
163 | Epoch 080/120: (67.7700 88.1800) | 881.0491 M (822.3069 -80.7574) || train (66.7269 86.1214) | 884.4206 M (824.6408 -79.7197)| loss 2.3521 || lr 0.018750-0.003750-0.003750 | Time 2021-08-09 11:46:27
164 | Epoch 080/120: (67.7700 88.1800) | 881.0491 M (822.3069 -80.7574) || train (66.8165 86.1859) | 884.4038 M (824.6291 -79.7248)| loss 2.3487 || lr 0.018750-0.003750-0.003750 | Time 2021-08-09 11:46:27
165 | Epoch 081/120: (68.3100 88.4400) | 880.8390 M (821.9820 -80.6425) || train (67.0078 86.2955) | 884.3782 M (824.5598 -79.6812)| loss 2.3416 || lr 0.017906-0.003581-0.003581 | Time 2021-08-09 12:03:41
166 | Epoch 081/120: (68.3100 88.4380) | 880.8390 M (821.9820 -80.6425) || train (66.9773 86.2547) | 884.3736 M (824.5577 -79.6836)| loss 2.3434 || lr 0.017906-0.003581-0.003581 | Time 2021-08-09 12:03:41
167 | Epoch 082/120: (68.2380 88.4020) | 880.1769 M (821.4865 -80.8092) || train (67.1893 86.3905) | 884.4011 M (824.5054 -79.6039)| loss 2.3346 || lr 0.017076-0.003415-0.003415 | Time 2021-08-09 12:20:57
168 | Epoch 082/120: (68.2380 88.4020) | 880.1769 M (821.4865 -80.8092) || train (67.1718 86.3798) | 884.4085 M (824.5093 -79.6003)| loss 2.3333 || lr 0.017076-0.003415-0.003415 | Time 2021-08-09 12:20:57
169 | Epoch 083/120: (68.4360 88.6520) | 881.0709 M (822.1789 -80.6075) || train (67.4175 86.5393) | 884.3813 M (824.4650 -79.5833)| loss 2.3256 || lr 0.016260-0.003252-0.003252 | Time 2021-08-09 12:38:19
170 | Epoch 083/120: (68.4380 88.6520) | 881.0709 M (822.1789 -80.6075) || train (67.3820 86.5353) | 884.3974 M (824.4775 -79.5796)| loss 2.3252 || lr 0.016260-0.003252-0.003252 | Time 2021-08-09 12:38:19
171 | Epoch 084/120: (68.3920 88.5420) | 880.7269 M (821.8770 -80.6496) || train (67.5899 86.6317) | 884.3988 M (824.4474 -79.5481)| loss 2.3184 || lr 0.015458-0.003092-0.003092 | Time 2021-08-09 12:55:46
172 | Epoch 084/120: (68.3880 88.5420) | 880.7269 M (821.8770 -80.6496) || train (67.4848 86.6433) | 884.4036 M (824.4507 -79.5467)| loss 2.3202 || lr 0.015458-0.003092-0.003092 | Time 2021-08-09 12:55:46
173 | Epoch 085/120: (68.6440 88.6620) | 880.6215 M (821.7692 -80.6472) || train (67.7844 86.7410) | 884.3928 M (824.4114 -79.5181)| loss 2.3112 || lr 0.014671-0.002934-0.002934 | Time 2021-08-09 13:13:06
174 | Epoch 085/120: (68.6420 88.6620) | 880.6215 M (821.7692 -80.6472) || train (67.7160 86.7240) | 884.3710 M (824.3928 -79.5214)| loss 2.3119 || lr 0.014671-0.002934-0.002934 | Time 2021-08-09 13:13:06
175 | Epoch 086/120: (68.2860 88.4140) | 880.7967 M (821.9687 -80.6716) || train (68.0187 86.9183) | 884.3743 M (824.4237 -79.5489)| loss 2.3010 || lr 0.013900-0.002780-0.002780 | Time 2021-08-09 19:38:50
176 | Epoch 086/120: (68.2860 88.4140) | 880.7967 M (821.9687 -80.6716) || train (67.9246 86.8547) | 884.3744 M (824.4212 -79.5463)| loss 2.3035 || lr 0.013900-0.002780-0.002780 | Time 2021-08-09 19:38:50
177 | Epoch 087/120: (68.9560 88.8400) | 880.8803 M (821.9116 -80.5308) || train (68.1770 87.0234) | 884.3897 M (824.4279 -79.5377)| loss 2.2951 || lr 0.013146-0.002629-0.002629 | Time 2021-08-09 19:57:22
178 | Epoch 087/120: (68.9560 88.8380) | 880.8803 M (821.9116 -80.5308) || train (68.1726 86.9970) | 884.3807 M (824.4220 -79.5409)| loss 2.2948 || lr 0.013146-0.002629-0.002629 | Time 2021-08-09 19:57:22
179 | Epoch 088/120: (68.9580 88.8160) | 881.0954 M (822.0875 -80.4916) || train (68.4947 87.1436) | 884.3719 M (824.3866 -79.5142)| loss 2.2822 || lr 0.012408-0.002482-0.002482 | Time 2021-08-09 20:16:01
180 | Epoch 088/120: (68.9580 88.8160) | 881.0954 M (822.0875 -80.4916) || train (68.3854 87.1631) | 884.3828 M (824.3931 -79.5098)| loss 2.2858 || lr 0.012408-0.002482-0.002482 | Time 2021-08-09 20:16:01
181 | Epoch 089/120: (69.4580 89.0280) | 880.9575 M (821.9800 -80.5221) || train (68.5598 87.2619) | 884.3722 M (824.3723 -79.4996)| loss 2.2783 || lr 0.011687-0.002337-0.002337 | Time 2021-08-09 20:35:26
182 | Epoch 089/120: (69.4580 89.0280) | 880.9574 M (821.9800 -80.5221) || train (68.6138 87.2385) | 884.3703 M (824.3712 -79.5004)| loss 2.2768 || lr 0.011687-0.002337-0.002337 | Time 2021-08-09 20:35:26
183 | Epoch 090/120: (69.3180 89.0660) | 880.3153 M (821.4689 -80.6530) || train (68.7445 87.4057) | 884.3395 M (824.3205 -79.4805)| loss 2.2698 || lr 0.010983-0.002197-0.002197 | Time 2021-08-09 20:54:08
184 | Epoch 090/120: (69.3180 89.0660) | 880.3154 M (821.4689 -80.6530) || train (68.8850 87.3476) | 884.3336 M (824.3169 -79.4828)| loss 2.2693 || lr 0.010983-0.002197-0.002197 | Time 2021-08-09 20:54:08
185 | Epoch 091/120: (69.3940 89.2780) | 880.5376 M (821.5609 -80.5228) || train (68.9579 87.4674) | 884.3647 M (824.2906 -79.4254)| loss 2.2631 || lr 0.010298-0.002060-0.002060 | Time 2021-08-09 21:12:47
186 | Epoch 091/120: (69.3940 89.2780) | 880.5376 M (821.5609 -80.5228) || train (69.0717 87.5194) | 884.3489 M (824.2793 -79.4300)| loss 2.2617 || lr 0.010298-0.002060-0.002060 | Time 2021-08-09 21:12:47
187 | Epoch 092/120: (69.7180 89.2000) | 881.3733 M (822.1243 -80.2505) || train (69.1352 87.5523) | 884.3564 M (824.2561 -79.3992)| loss 2.2560 || lr 0.009632-0.001926-0.001926 | Time 2021-08-09 21:31:10
188 | Epoch 092/120: (69.7180 89.2000) | 881.3733 M (822.1243 -80.2505) || train (69.1744 87.5752) | 884.3370 M (824.2416 -79.4041)| loss 2.2556 || lr 0.009632-0.001926-0.001926 | Time 2021-08-09 21:31:10
189 | Epoch 093/120: (69.4020 89.1180) | 879.2478 M (820.5252 -80.7769) || train (69.4736 87.7401) | 884.3385 M (824.1681 -79.3291)| loss 2.2455 || lr 0.008985-0.001797-0.001797 | Time 2021-08-09 21:49:19
190 | Epoch 093/120: (69.4020 89.1180) | 879.2478 M (820.5252 -80.7769) || train (69.4562 87.7284) | 884.3532 M (824.1811 -79.3274)| loss 2.2448 || lr 0.008985-0.001797-0.001797 | Time 2021-08-09 21:49:19
191 | Epoch 094/120: (69.9260 89.3720) | 880.6937 M (821.4667 -80.2725) || train (69.6560 87.8573) | 884.3288 M (824.1141 -79.2848)| loss 2.2369 || lr 0.008357-0.001671-0.001671 | Time 2021-08-09 22:07:31
192 | Epoch 094/120: (69.9260 89.3720) | 880.6937 M (821.4667 -80.2725) || train (69.6457 87.8132) | 884.3533 M (824.1340 -79.2802)| loss 2.2381 || lr 0.008357-0.001671-0.001671 | Time 2021-08-09 22:07:31
193 | Epoch 095/120: (70.1160 89.4440) | 880.3177 M (821.1288 -80.3106) || train (69.9109 88.0276) | 884.3594 M (824.0674 -79.2075)| loss 2.2285 || lr 0.007749-0.001550-0.001550 | Time 2021-08-09 22:25:59
194 | Epoch 095/120: (70.1160 89.4440) | 880.3177 M (821.1288 -80.3106) || train (69.8012 87.8697) | 884.3746 M (824.0756 -79.2005)| loss 2.2327 || lr 0.007749-0.001550-0.001550 | Time 2021-08-09 22:25:59
195 | Epoch 096/120: (70.0680 89.4560) | 880.8468 M (821.4983 -80.1511) || train (69.9977 88.0458) | 884.3393 M (824.0124 -79.1725)| loss 2.2236 || lr 0.007162-0.001432-0.001432 | Time 2021-08-09 22:44:36
196 | Epoch 096/120: (70.0680 89.4560) | 880.8468 M (821.4983 -80.1511) || train (70.0831 88.0675) | 884.3400 M (824.0134 -79.1729)| loss 2.2220 || lr 0.007162-0.001432-0.001432 | Time 2021-08-09 22:44:36
197 | Epoch 097/120: (70.0240 89.5140) | 880.8839 M (821.5704 -80.1860) || train (70.2482 88.1407) | 884.3001 M (823.9617 -79.1611)| loss 2.2170 || lr 0.006595-0.001319-0.001319 | Time 2021-08-09 23:03:12
198 | Epoch 097/120: (70.0240 89.5140) | 880.8839 M (821.5704 -80.1860) || train (70.1986 88.1694) | 884.3118 M (823.9697 -79.1574)| loss 2.2177 || lr 0.006595-0.001319-0.001319 | Time 2021-08-09 23:03:12
199 | Epoch 098/120: (70.2180 89.7000) | 879.9543 M (820.8298 -80.3750) || train (70.3508 88.2818) | 884.3481 M (823.9612 -79.1126)| loss 2.2093 || lr 0.006050-0.001210-0.001210 | Time 2021-08-09 23:22:01
200 | Epoch 098/120: (70.2180 89.7000) | 879.9543 M (820.8298 -80.3750) || train (70.3745 88.2659) | 884.3260 M (823.9454 -79.1189)| loss 2.2095 || lr 0.006050-0.001210-0.001210 | Time 2021-08-09 23:22:01
201 | Epoch 099/120: (70.2520 89.6100) | 880.9913 M (821.4743 -79.9825) || train (70.6295 88.4288) | 884.3381 M (823.9170 -79.0784)| loss 2.1992 || lr 0.005526-0.001105-0.001105 | Time 2021-08-09 23:40:56
202 | Epoch 099/120: (70.2520 89.6100) | 880.9913 M (821.4743 -79.9825) || train (70.6192 88.3562) | 884.3470 M (823.9233 -79.0759)| loss 2.2026 || lr 0.005526-0.001105-0.001105 | Time 2021-08-09 23:40:56
203 | Epoch 100/120: (70.3400 89.7800) | 880.6598 M (821.3078 -80.1475) || train (70.7486 88.4173) | 884.3058 M (823.8622 -79.0559)| loss 2.1974 || lr 0.005024-0.001005-0.001005 | Time 2021-08-09 23:59:23
204 | Epoch 100/120: (70.3400 89.7800) | 880.6598 M (821.3078 -80.1475) || train (70.7283 88.4683) | 884.3252 M (823.8777 -79.0520)| loss 2.1953 || lr 0.005024-0.001005-0.001005 | Time 2021-08-09 23:59:23
205 | Epoch 101/120: (70.4140 89.7240) | 880.7161 M (821.3704 -80.1539) || train (70.9303 88.5625) | 884.3438 M (823.8958 -79.0515)| loss 2.1905 || lr 0.004544-0.000909-0.000909 | Time 2021-08-10 00:17:56
206 | Epoch 101/120: (70.4140 89.7240) | 880.7161 M (821.3704 -80.1539) || train (70.9677 88.5849) | 884.3472 M (823.8947 -79.0470)| loss 2.1881 || lr 0.004544-0.000909-0.000909 | Time 2021-08-10 00:17:56
207 | Epoch 102/120: (70.7480 89.8420) | 880.7492 M (821.3527 -80.1030) || train (71.1729 88.6380) | 884.3513 M (823.8777 -79.0259)| loss 2.1822 || lr 0.004087-0.000817-0.000817 | Time 2021-08-10 00:36:24
208 | Epoch 102/120: (70.7480 89.8420) | 880.7492 M (821.3527 -80.1030) || train (71.0307 88.5896) | 884.3338 M (823.8657 -79.0314)| loss 2.1862 || lr 0.004087-0.000817-0.000817 | Time 2021-08-10 00:36:24
209 | Epoch 103/120: (70.7660 89.8400) | 879.7951 M (820.7058 -80.4102) || train (71.2534 88.6917) | 884.3457 M (823.8929 -79.0466)| loss 2.1792 || lr 0.003653-0.000731-0.000731 | Time 2021-08-10 00:54:48
210 | Epoch 103/120: (70.7660 89.8400) | 879.7951 M (820.7058 -80.4102) || train (71.2063 88.7407) | 884.3333 M (823.8822 -79.0484)| loss 2.1779 || lr 0.003653-0.000731-0.000731 | Time 2021-08-10 00:54:48
211 | Epoch 104/120: (70.7380 89.9240) | 880.9555 M (821.5126 -80.0566) || train (71.4097 88.7919) | 884.3277 M (823.8812 -79.0531)| loss 2.1715 || lr 0.003242-0.000648-0.000648 | Time 2021-08-10 01:13:25
212 | Epoch 104/120: (70.7380 89.9240) | 880.9555 M (821.5126 -80.0566) || train (71.3774 88.7798) | 884.3190 M (823.8775 -79.0580)| loss 2.1722 || lr 0.003242-0.000648-0.000648 | Time 2021-08-10 01:13:25
213 | Epoch 105/120: (70.8480 90.0340) | 880.6178 M (821.2649 -80.1466) || train (71.5600 88.9334) | 884.3237 M (823.8885 -79.0643)| loss 2.1660 || lr 0.002855-0.000571-0.000571 | Time 2021-08-10 01:32:04
214 | Epoch 105/120: (70.8480 90.0340) | 880.6178 M (821.2649 -80.1466) || train (71.6682 88.9801) | 884.3051 M (823.8767 -79.0711)| loss 2.1621 || lr 0.002855-0.000571-0.000571 | Time 2021-08-10 01:32:04
215 | Epoch 106/120: (70.9480 89.9400) | 881.2351 M (821.7340 -79.9984) || train (71.8024 89.0491) | 884.3182 M (823.8758 -79.0572)| loss 2.1565 || lr 0.002491-0.000498-0.000498 | Time 2021-08-10 01:50:23
216 | Epoch 106/120: (70.9480 89.9380) | 881.2351 M (821.7340 -79.9984) || train (71.6822 88.9574) | 884.3232 M (823.8777 -79.0540)| loss 2.1607 || lr 0.002491-0.000498-0.000498 | Time 2021-08-10 01:50:23
217 | Epoch 107/120: (70.9220 89.9640) | 880.4678 M (821.1633 -80.1950) || train (71.8766 89.0634) | 884.3156 M (823.8745 -79.0585)| loss 2.1538 || lr 0.002151-0.000430-0.000430 | Time 2021-08-10 02:08:22
218 | Epoch 107/120: (70.9220 89.9640) | 880.4678 M (821.1633 -80.1950) || train (71.6915 88.9760) | 884.3051 M (823.8623 -79.0567)| loss 2.1596 || lr 0.002151-0.000430-0.000430 | Time 2021-08-10 02:08:22
219 | Epoch 108/120: (71.1420 89.9880) | 880.4612 M (821.1424 -80.1807) || train (72.0767 89.1521) | 884.3128 M (823.8672 -79.0539)| loss 2.1475 || lr 0.001835-0.000367-0.000367 | Time 2021-08-10 02:26:24
220 | Epoch 108/120: (71.1420 89.9880) | 880.4612 M (821.1424 -80.1807) || train (72.0460 89.1519) | 884.3163 M (823.8666 -79.0498)| loss 2.1482 || lr 0.001835-0.000367-0.000367 | Time 2021-08-10 02:26:24
221 | Epoch 109/120: (71.2920 90.1260) | 880.4116 M (821.1162 -80.2042) || train (72.1632 89.1903) | 884.3122 M (823.8663 -79.0535)| loss 2.1444 || lr 0.001544-0.000309-0.000309 | Time 2021-08-10 02:44:13
222 | Epoch 109/120: (71.2920 90.1260) | 880.4116 M (821.1162 -80.2042) || train (72.0680 89.1844) | 884.3229 M (823.8741 -79.0507)| loss 2.1475 || lr 0.001544-0.000309-0.000309 | Time 2021-08-10 02:44:13
223 | Epoch 110/120: (71.2600 90.1540) | 880.7798 M (821.3897 -80.1094) || train (72.2180 89.2097) | 884.3007 M (823.8560 -79.0548)| loss 2.1418 || lr 0.001278-0.000256-0.000256 | Time 2021-08-10 03:02:06
224 | Epoch 110/120: (71.2600 90.1540) | 880.7798 M (821.3897 -80.1094) || train (72.1924 89.2283) | 884.3050 M (823.8565 -79.0510)| loss 2.1421 || lr 0.001278-0.000256-0.000256 | Time 2021-08-10 03:02:06
225 | Epoch 111/120: (71.1660 90.1260) | 880.5534 M (821.2358 -80.1818) || train (72.2367 89.2209) | 884.3243 M (823.8738 -79.0490)| loss 2.1414 || lr 0.001036-0.000207-0.000207 | Time 2021-08-10 03:19:47
226 | Epoch 111/120: (71.1660 90.1260) | 880.5534 M (821.2357 -80.1818) || train (72.3329 89.2740) | 884.3258 M (823.8742 -79.0479)| loss 2.1374 || lr 0.001036-0.000207-0.000207 | Time 2021-08-10 03:19:47
227 | Epoch 112/120: (71.3080 90.1700) | 881.3649 M (821.7807 -79.9154) || train (72.3935 89.2926) | 884.3046 M (823.8552 -79.0501)| loss 2.1370 || lr 0.000819-0.000164-0.000164 | Time 2021-08-10 03:37:35
228 | Epoch 112/120: (71.3080 90.1700) | 881.3649 M (821.7807 -79.9153) || train (72.4420 89.3563) | 884.3089 M (823.8561 -79.0467)| loss 2.1341 || lr 0.000819-0.000164-0.000164 | Time 2021-08-10 03:37:35
229 | Epoch 113/120: (71.3480 90.1800) | 880.3818 M (821.0762 -80.1939) || train (72.5121 89.3552) | 884.3001 M (823.8457 -79.0451)| loss 2.1325 || lr 0.000628-0.000126-0.000126 | Time 2021-08-10 03:55:13
230 | Epoch 113/120: (71.3500 90.1800) | 880.3818 M (821.0762 -80.1939) || train (72.5193 89.4229) | 884.3033 M (823.8501 -79.0464)| loss 2.1301 || lr 0.000628-0.000126-0.000126 | Time 2021-08-10 03:55:13
231 | Epoch 114/120: (71.3680 90.2080) | 880.6740 M (821.2798 -80.1054) || train (72.5349 89.3764) | 884.3225 M (823.8654 -79.0424)| loss 2.1306 || lr 0.000462-0.000092-0.000092 | Time 2021-08-10 04:12:54
232 | Epoch 114/120: (71.3680 90.2080) | 880.6739 M (821.2798 -80.1054) || train (72.4562 89.3649) | 884.3367 M (823.8740 -79.0369)| loss 2.1316 || lr 0.000462-0.000092-0.000092 | Time 2021-08-10 04:12:54
233 | Epoch 115/120: (71.3660 90.2440) | 880.6476 M (821.2734 -80.1253) || train (72.6020 89.4549) | 884.3301 M (823.8666 -79.0360)| loss 2.1284 || lr 0.000321-0.000064-0.000064 | Time 2021-08-10 04:30:38
234 | Epoch 115/120: (71.3660 90.2440) | 880.6476 M (821.2734 -80.1253) || train (72.5653 89.3837) | 884.3285 M (823.8632 -79.0341)| loss 2.1294 || lr 0.000321-0.000064-0.000064 | Time 2021-08-10 04:30:38
235 | Epoch 116/120: (71.4120 90.2340) | 880.6813 M (821.3064 -80.1247) || train (72.6283 89.4211) | 884.3165 M (823.8523 -79.0354)| loss 2.1272 || lr 0.000205-0.000041-0.000041 | Time 2021-08-10 04:48:16
236 | Epoch 116/120: (71.4120 90.2340) | 880.6813 M (821.3064 -80.1247) || train (72.5742 89.4407) | 884.3360 M (823.8671 -79.0306)| loss 2.1288 || lr 0.000205-0.000041-0.000041 | Time 2021-08-10 04:48:16
237 | Epoch 117/120: (71.5040 90.2020) | 881.4224 M (821.8044 -79.8815) || train (72.5917 89.4151) | 884.3291 M (823.8607 -79.0312)| loss 2.1292 || lr 0.000116-0.000023-0.000023 | Time 2021-08-10 05:05:57
238 | Epoch 117/120: (71.5040 90.2020) | 881.4224 M (821.8044 -79.8815) || train (72.6654 89.4774) | 884.3078 M (823.8473 -79.0390)| loss 2.1247 || lr 0.000116-0.000023-0.000023 | Time 2021-08-10 05:05:57
239 | Epoch 118/120: (71.4500 90.2400) | 881.1058 M (821.6119 -80.0056) || train (72.7502 89.4559) | 884.3115 M (823.8502 -79.0383)| loss 2.1247 || lr 0.000051-0.000010-0.000010 | Time 2021-08-10 05:23:32
240 | Epoch 118/120: (71.4520 90.2400) | 881.1058 M (821.6119 -80.0056) || train (72.5758 89.4916) | 884.3200 M (823.8530 -79.0325)| loss 2.1266 || lr 0.000051-0.000010-0.000010 | Time 2021-08-10 05:23:32
241 | Epoch 119/120: (71.4620 90.2420) | 880.6930 M (821.3069 -80.1134) || train (72.7455 89.4720) | 884.3115 M (823.8495 -79.0375)| loss 2.1245 || lr 0.000013-0.000003-0.000003 | Time 2021-08-10 05:41:07
242 | Epoch 119/120: (71.4620 90.2420) | 880.6930 M (821.3069 -80.1134) || train (72.6757 89.4804) | 884.3165 M (823.8503 -79.0333)| loss 2.1251 || lr 0.000013-0.000003-0.000003 | Time 2021-08-10 05:41:07
243 |
--------------------------------------------------------------------------------
/logs/msgc_condensenet_noatt_log.txt:
--------------------------------------------------------------------------------
1 | Flops of dydensenet74: original 2049.913552 M, target 528.877696416 M, possible 539.135152 M, main 9.15528 M, dgc 2040.758272 M, mask 19.790304 M
2 | Flops of dydensenet74: original 2049.913552 M, target 528.877696416 M, possible 539.135152 M, main 9.15528 M, dgc 2040.758272 M, mask 19.790304 M
3 | Epoch 000/120: (66.8880 87.8980) | 2068.9446 M (2039.9989 -0.0000) || train (51.5425 74.3620) | 1856.9717 M (1828.1606 -0.1346)| loss 3.0317 || lr 0.075000-0.015000-0.015000 | Time 2021-08-16 00:12:00
4 | Epoch 000/120: (66.8880 87.8980) | 2068.9446 M (2039.9989 -0.0000) || train (51.5519 74.3415) | 1856.9699 M (1828.1586 -0.1344)| loss 3.0303 || lr 0.075000-0.015000-0.015000 | Time 2021-08-16 00:12:00
5 | Epoch 001/120: (69.4880 89.4780) | 2068.8700 M (2039.9243 -0.0000) || train (62.6191 83.7757) | 1895.9003 M (1867.0278 -0.0731)| loss 2.4980 || lr 0.074987-0.014997-0.014997 | Time 2021-08-16 01:00:58
6 | Epoch 001/120: (69.4880 89.4780) | 2068.8700 M (2039.9243 -0.0000) || train (62.5041 83.7145) | 1895.9029 M (1867.0304 -0.0732)| loss 2.4997 || lr 0.074987-0.014997-0.014997 | Time 2021-08-16 01:00:58
7 | Epoch 002/120: (70.7420 90.1980) | 2068.8194 M (2039.8738 -0.0000) || train (65.3974 85.6092) | 1919.1576 M (1890.2568 -0.0449)| loss 2.3803 || lr 0.074949-0.014990-0.014990 | Time 2021-08-16 01:49:51
8 | Epoch 002/120: (70.7420 90.1980) | 2068.8194 M (2039.8738 -0.0000) || train (65.4184 85.5713) | 1919.1554 M (1890.2545 -0.0448)| loss 2.3794 || lr 0.074949-0.014990-0.014990 | Time 2021-08-16 01:49:51
9 | Epoch 003/120: (71.5360 90.4540) | 2068.6325 M (2039.6869 -0.0000) || train (66.8986 86.4631) | 1935.9563 M (1907.0401 -0.0294)| loss 2.3204 || lr 0.074884-0.014977-0.014977 | Time 2021-08-16 02:38:48
10 | Epoch 003/120: (71.5360 90.4540) | 2068.6325 M (2039.6869 -0.0000) || train (66.8773 86.5229) | 1935.9533 M (1907.0369 -0.0293)| loss 2.3183 || lr 0.074884-0.014977-0.014977 | Time 2021-08-16 02:38:48
11 | Epoch 004/120: (71.6240 90.5560) | 2057.2974 M (2028.3518 -0.0000) || train (67.6812 86.9789) | 1934.9192 M (1905.9979 -0.0244)| loss 2.2880 || lr 0.074795-0.014959-0.014959 | Time 2021-08-16 03:27:43
12 | Epoch 004/120: (71.6240 90.5560) | 2057.2974 M (2028.3518 -0.0000) || train (67.8287 87.0240) | 1934.9186 M (1905.9974 -0.0244)| loss 2.2838 || lr 0.074795-0.014959-0.014959 | Time 2021-08-16 03:27:43
13 | Epoch 005/120: (71.6540 90.6340) | 2033.1968 M (2004.2512 -0.0000) || train (68.1561 87.2409) | 1909.8353 M (1880.9191 -0.0295)| loss 2.2677 || lr 0.074679-0.014936-0.014936 | Time 2021-08-16 04:16:35
14 | Epoch 005/120: (71.6540 90.6340) | 2033.1968 M (2004.2512 -0.0000) || train (68.1541 87.2134) | 1909.8349 M (1880.9188 -0.0296)| loss 2.2702 || lr 0.074679-0.014936-0.014936 | Time 2021-08-16 04:16:35
15 | Epoch 006/120: (71.2600 90.6480) | 2002.7406 M (1973.7993 -0.0043) || train (68.5239 87.5226) | 1884.4526 M (1855.5541 -0.0471)| loss 2.2531 || lr 0.074538-0.014908-0.014908 | Time 2021-08-16 05:05:32
16 | Epoch 006/120: (71.2600 90.6480) | 2002.7406 M (1973.7993 -0.0043) || train (68.5710 87.4597) | 1884.4512 M (1855.5528 -0.0472)| loss 2.2518 || lr 0.074538-0.014908-0.014908 | Time 2021-08-16 05:05:32
17 | Epoch 007/120: (71.7620 90.7780) | 1968.2798 M (1939.3893 -0.0552) || train (68.8615 87.6614) | 1859.0462 M (1830.1981 -0.0976)| loss 2.2391 || lr 0.074372-0.014874-0.014874 | Time 2021-08-16 05:54:23
18 | Epoch 007/120: (71.7620 90.7780) | 1968.2798 M (1939.3893 -0.0552) || train (68.8338 87.6619) | 1859.0508 M (1830.2027 -0.0976)| loss 2.2408 || lr 0.074372-0.014874-0.014874 | Time 2021-08-16 05:54:24
19 | Epoch 008/120: (72.1920 90.9880) | 1935.5620 M (1906.8294 -0.2131) || train (69.2142 87.8491) | 1833.6693 M (1804.9282 -0.2046)| loss 2.2262 || lr 0.074181-0.014836-0.014836 | Time 2021-08-16 06:43:39
20 | Epoch 008/120: (72.1920 90.9880) | 1935.5620 M (1906.8294 -0.2131) || train (69.1922 87.9012) | 1833.6720 M (1804.9308 -0.2045)| loss 2.2259 || lr 0.074181-0.014836-0.014836 | Time 2021-08-16 06:43:39
21 | Epoch 009/120: (71.9380 90.8380) | 1901.3248 M (1872.7792 -0.4000) || train (69.3444 87.9527) | 1808.2230 M (1779.6523 -0.3750)| loss 2.2188 || lr 0.073964-0.014793-0.014793 | Time 2021-08-16 07:32:36
22 | Epoch 009/120: (71.9380 90.8380) | 1901.3248 M (1872.7792 -0.4000) || train (69.5364 88.0125) | 1808.2185 M (1779.6479 -0.3750)| loss 2.2157 || lr 0.073964-0.014793-0.014793 | Time 2021-08-16 07:32:36
23 | Epoch 010/120: (72.1640 90.9060) | 1866.5937 M (1838.4189 -0.7709) || train (69.5946 88.1137) | 1782.8908 M (1754.5440 -0.5989)| loss 2.2087 || lr 0.073722-0.014744-0.014744 | Time 2021-08-16 08:21:30
24 | Epoch 010/120: (72.1640 90.9060) | 1866.5937 M (1838.4189 -0.7709) || train (69.6870 88.1691) | 1782.8917 M (1754.5451 -0.5991)| loss 2.2069 || lr 0.073722-0.014744-0.014744 | Time 2021-08-16 08:21:30
25 | Epoch 011/120: (72.4600 91.1520) | 1832.0878 M (1804.1543 -1.0121) || train (69.8723 88.1987) | 1757.5261 M (1729.4480 -0.8675)| loss 2.1990 || lr 0.073456-0.014691-0.014691 | Time 2021-08-16 09:10:27
26 | Epoch 011/120: (72.4600 91.1520) | 1832.0878 M (1804.1543 -1.0121) || train (69.7150 88.1583) | 1757.5254 M (1729.4476 -0.8679)| loss 2.2044 || lr 0.073456-0.014691-0.014691 | Time 2021-08-16 09:10:27
27 | Epoch 012/120: (72.4900 91.0340) | 1799.4172 M (1771.8335 -1.3619) || train (69.9393 88.3049) | 1732.1578 M (1704.4011 -1.1889)| loss 2.1947 || lr 0.073165-0.014633-0.014633 | Time 2021-08-16 09:59:25
28 | Epoch 012/120: (72.4900 91.0340) | 1799.4172 M (1771.8335 -1.3619) || train (69.8982 88.2529) | 1732.1567 M (1704.4003 -1.1892)| loss 2.1963 || lr 0.073165-0.014633-0.014633 | Time 2021-08-16 09:59:25
29 | Epoch 013/120: (72.3140 90.9320) | 1766.9608 M (1739.8676 -1.8524) || train (69.9913 88.3445) | 1706.7978 M (1679.3924 -1.5402)| loss 2.1916 || lr 0.072849-0.014570-0.014570 | Time 2021-08-16 10:48:48
30 | Epoch 013/120: (72.3140 90.9320) | 1766.9608 M (1739.8676 -1.8524) || train (70.0842 88.3843) | 1706.7960 M (1679.3911 -1.5407)| loss 2.1885 || lr 0.072849-0.014570-0.014570 | Time 2021-08-16 10:48:48
31 | Epoch 014/120: (72.5200 91.1140) | 1733.8678 M (1707.3512 -2.4290) || train (70.1945 88.4591) | 1681.4369 M (1654.3956 -1.9044)| loss 2.1826 || lr 0.072509-0.014502-0.014502 | Time 2021-08-16 11:38:58
32 | Epoch 014/120: (72.5200 91.1140) | 1733.8678 M (1707.3512 -2.4290) || train (70.2692 88.5347) | 1681.4355 M (1654.3940 -1.9042)| loss 2.1810 || lr 0.072509-0.014502-0.014502 | Time 2021-08-16 11:38:58
33 | Epoch 015/120: (72.5820 91.1400) | 1702.7748 M (1676.6259 -2.7968) || train (70.2688 88.5214) | 1656.1198 M (1629.4709 -2.2967)| loss 2.1789 || lr 0.072145-0.014429-0.014429 | Time 2021-08-16 12:47:05
34 | Epoch 015/120: (72.5820 91.1400) | 1702.7748 M (1676.6259 -2.7968) || train (70.4140 88.5353) | 1656.1157 M (1629.4673 -2.2973)| loss 2.1766 || lr 0.072145-0.014429-0.014429 | Time 2021-08-16 12:47:05
35 | Epoch 016/120: (72.5800 91.1640) | 1671.9802 M (1646.1266 -3.0921) || train (70.3905 88.6149) | 1630.7603 M (1604.4480 -2.6333)| loss 2.1739 || lr 0.071758-0.014352-0.014352 | Time 2021-08-16 14:39:36
36 | Epoch 016/120: (72.5800 91.1640) | 1671.9802 M (1646.1266 -3.0921) || train (70.4821 88.6466) | 1630.7595 M (1604.4471 -2.6332)| loss 2.1709 || lr 0.071758-0.014352-0.014352 | Time 2021-08-16 14:39:36
37 | Epoch 017/120: (72.6280 91.1920) | 1641.3129 M (1615.6373 -3.2700) || train (70.4335 88.5529) | 1605.4256 M (1579.4461 -2.9662)| loss 2.1747 || lr 0.071347-0.014269-0.014269 | Time 2021-08-16 15:44:38
38 | Epoch 017/120: (72.6280 91.1920) | 1641.3129 M (1615.6373 -3.2700) || train (70.5366 88.6357) | 1605.4206 M (1579.4414 -2.9664)| loss 2.1691 || lr 0.071347-0.014269-0.014269 | Time 2021-08-16 15:44:38
39 | Epoch 018/120: (72.5820 91.1380) | 1611.0178 M (1585.6090 -3.5369) || train (70.5576 88.6529) | 1580.0881 M (1554.4589 -3.3164)| loss 2.1671 || lr 0.070913-0.014183-0.014183 | Time 2021-08-16 16:43:28
40 | Epoch 018/120: (72.5820 91.1380) | 1611.0178 M (1585.6090 -3.5369) || train (70.6535 88.6755) | 1580.0907 M (1554.4615 -3.3165)| loss 2.1654 || lr 0.070913-0.014183-0.014183 | Time 2021-08-16 16:43:28
41 | Epoch 019/120: (72.7300 91.2520) | 1580.2740 M (1555.4066 -4.0783) || train (70.7152 88.7406) | 1554.7565 M (1529.5390 -3.7281)| loss 2.1621 || lr 0.070456-0.014091-0.014091 | Time 2021-08-16 17:47:30
42 | Epoch 019/120: (72.7300 91.2520) | 1580.2740 M (1555.4066 -4.0783) || train (70.6154 88.7098) | 1554.7537 M (1529.5370 -3.7290)| loss 2.1639 || lr 0.070456-0.014091-0.014091 | Time 2021-08-16 17:47:30
43 | Epoch 020/120: (72.4180 91.1020) | 1551.4392 M (1527.0956 -4.6021) || train (70.6601 88.7398) | 1529.4046 M (1504.6166 -4.1576)| loss 2.1636 || lr 0.069976-0.013995-0.013995 | Time 2021-08-16 18:51:17
44 | Epoch 020/120: (72.4180 91.1020) | 1551.4392 M (1527.0956 -4.6021) || train (70.6733 88.7094) | 1529.4018 M (1504.6135 -4.1574)| loss 2.1612 || lr 0.069976-0.013995-0.013995 | Time 2021-08-16 18:51:17
45 | Epoch 021/120: (72.2140 90.9300) | 1519.3063 M (1495.5175 -5.1569) || train (70.7589 88.7841) | 1504.0620 M (1479.7322 -4.6159)| loss 2.1581 || lr 0.069474-0.013895-0.013895 | Time 2021-08-16 19:49:31
46 | Epoch 021/120: (72.2140 90.9300) | 1519.3063 M (1495.5175 -5.1569) || train (70.6190 88.7126) | 1504.0681 M (1479.7378 -4.6153)| loss 2.1620 || lr 0.069474-0.013895-0.013895 | Time 2021-08-16 19:49:31
47 | Epoch 022/120: (72.4560 91.0100) | 1491.4140 M (1468.0074 -5.5390) || train (70.7534 88.7340) | 1478.7512 M (1454.8603 -5.0548)| loss 2.1601 || lr 0.068950-0.013790-0.013790 | Time 2021-08-16 21:53:32
48 | Epoch 022/120: (72.4560 91.0100) | 1491.4140 M (1468.0074 -5.5390) || train (70.8941 88.7812) | 1478.7498 M (1454.8594 -5.0552)| loss 2.1547 || lr 0.068950-0.013790-0.013790 | Time 2021-08-16 21:53:32
49 | Epoch 023/120: (72.7220 91.2760) | 1461.7817 M (1438.6668 -5.8308) || train (70.8466 88.7934) | 1453.3922 M (1429.9302 -5.4836)| loss 2.1552 || lr 0.068405-0.013681-0.013681 | Time 2021-08-17 00:37:35
50 | Epoch 023/120: (72.7220 91.2760) | 1461.7817 M (1438.6668 -5.8308) || train (70.8939 88.8302) | 1453.4023 M (1429.9400 -5.4833)| loss 2.1530 || lr 0.068405-0.013681-0.013681 | Time 2021-08-17 00:37:35
51 | Epoch 024/120: (72.7160 91.2220) | 1432.8714 M (1410.2942 -6.3685) || train (70.8622 88.8044) | 1428.0721 M (1405.0278 -5.9014)| loss 2.1533 || lr 0.067838-0.013568-0.013568 | Time 2021-08-17 02:36:41
52 | Epoch 024/120: (72.7160 91.2220) | 1432.8714 M (1410.2942 -6.3685) || train (70.8567 88.8536) | 1428.0788 M (1405.0350 -5.9018)| loss 2.1535 || lr 0.067838-0.013568-0.013568 | Time 2021-08-17 02:36:41
53 | Epoch 025/120: (72.1880 91.1580) | 1404.0404 M (1381.9830 -6.8882) || train (70.9702 88.9054) | 1402.7395 M (1380.2117 -6.4178)| loss 2.1479 || lr 0.067251-0.013450-0.013450 | Time 2021-08-17 04:27:53
54 | Epoch 025/120: (72.1880 91.1580) | 1404.0404 M (1381.9830 -6.8882) || train (70.8848 88.8553) | 1402.7378 M (1380.2098 -6.4177)| loss 2.1508 || lr 0.067251-0.013450-0.013450 | Time 2021-08-17 04:27:53
55 | Epoch 026/120: (72.5900 91.2140) | 1374.1217 M (1352.9379 -7.7619) || train (70.9720 88.8556) | 1377.3932 M (1355.4788 -7.0312)| loss 2.1492 || lr 0.066643-0.013329-0.013329 | Time 2021-08-17 06:19:32
56 | Epoch 026/120: (72.5900 91.2140) | 1374.1217 M (1352.9379 -7.7619) || train (70.9429 88.9018) | 1377.3881 M (1355.4733 -7.0309)| loss 2.1498 || lr 0.066643-0.013329-0.013329 | Time 2021-08-17 06:19:32
57 | Epoch 027/120: (72.3840 91.0500) | 1348.1574 M (1327.6551 -8.4433) || train (70.9523 88.9334) | 1352.0433 M (1330.8420 -7.7444)| loss 2.1476 || lr 0.066015-0.013203-0.013203 | Time 2021-08-17 08:12:13
58 | Epoch 027/120: (72.3840 91.0500) | 1348.1574 M (1327.6551 -8.4433) || train (71.0350 88.9134) | 1352.0530 M (1330.8508 -7.7434)| loss 2.1476 || lr 0.066015-0.013203-0.013203 | Time 2021-08-17 08:12:13
59 | Epoch 028/120: (72.5120 91.1940) | 1318.7129 M (1298.9159 -9.1486) || train (71.0002 88.9367) | 1326.7339 M (1306.2973 -8.5091)| loss 2.1468 || lr 0.065368-0.013074-0.013074 | Time 2021-08-17 13:47:39
60 | Epoch 028/120: (72.5120 91.1940) | 1318.7129 M (1298.9159 -9.1486) || train (70.9546 88.9228) | 1326.7365 M (1306.2999 -8.5090)| loss 2.1478 || lr 0.065368-0.013074-0.013074 | Time 2021-08-17 13:47:39
61 | Epoch 029/120: (72.3220 91.0180) | 1291.6440 M (1272.7824 -10.0841) || train (70.9439 88.8622) | 1301.3885 M (1281.8206 -9.3778)| loss 2.1498 || lr 0.064702-0.012940-0.012940 | Time 2021-08-17 14:37:06
62 | Epoch 029/120: (72.3220 91.0180) | 1291.6440 M (1272.7824 -10.0841) || train (70.9846 88.8883) | 1301.3802 M (1281.8136 -9.3791)| loss 2.1488 || lr 0.064702-0.012940-0.012940 | Time 2021-08-17 14:37:06
63 | Epoch 030/120: (72.2940 90.8240) | 1265.6373 M (1247.6990 -11.0074) || train (70.9331 88.8506) | 1276.0421 M (1257.4650 -10.3686)| loss 2.1482 || lr 0.064017-0.012803-0.012803 | Time 2021-08-17 15:26:26
64 | Epoch 030/120: (72.2940 90.8240) | 1265.6373 M (1247.6990 -11.0074) || train (70.9521 88.9193) | 1276.0423 M (1257.4653 -10.3686)| loss 2.1470 || lr 0.064017-0.012803-0.012803 | Time 2021-08-17 15:26:26
65 | Epoch 031/120: (72.3240 91.1840) | 1236.6019 M (1219.8019 -12.1457) || train (70.9535 88.8769) | 1250.6950 M (1233.1289 -11.3795)| loss 2.1478 || lr 0.063313-0.012663-0.012663 | Time 2021-08-17 16:15:43
66 | Epoch 031/120: (72.3240 91.1840) | 1236.6019 M (1219.8019 -12.1457) || train (70.9659 88.8756) | 1250.6830 M (1233.1178 -11.3805)| loss 2.1486 || lr 0.063313-0.012663-0.012663 | Time 2021-08-17 16:15:43
67 | Epoch 032/120: (72.6780 91.3100) | 1207.5716 M (1192.2503 -13.6243) || train (70.9031 88.8608) | 1225.3637 M (1208.9808 -12.5627)| loss 2.1489 || lr 0.062592-0.012518-0.012518 | Time 2021-08-17 17:05:11
68 | Epoch 032/120: (72.6780 91.3100) | 1207.5716 M (1192.2503 -13.6243) || train (70.8941 88.8762) | 1225.3685 M (1208.9856 -12.5627)| loss 2.1476 || lr 0.062592-0.012518-0.012518 | Time 2021-08-17 17:05:11
69 | Epoch 033/120: (72.4420 91.1160) | 1181.6708 M (1168.0084 -15.2832) || train (70.9140 88.8895) | 1200.0382 M (1184.9966 -13.9041)| loss 2.1472 || lr 0.061854-0.012371-0.012371 | Time 2021-08-17 17:54:10
70 | Epoch 033/120: (72.4420 91.1160) | 1181.6708 M (1168.0084 -15.2832) || train (70.8925 88.8638) | 1200.0313 M (1184.9906 -13.9049)| loss 2.1502 || lr 0.061854-0.012371-0.012371 | Time 2021-08-17 17:54:10
71 | Epoch 034/120: (72.1820 91.0820) | 1153.0382 M (1140.9828 -16.8903) || train (70.8390 88.7802) | 1174.7101 M (1161.1079 -15.3434)| loss 2.1526 || lr 0.061100-0.012220-0.012220 | Time 2021-08-17 18:43:05
72 | Epoch 034/120: (72.1820 91.0820) | 1153.0382 M (1140.9828 -16.8903) || train (70.9371 88.8786) | 1174.6965 M (1161.0939 -15.3430)| loss 2.1466 || lr 0.061100-0.012220-0.012220 | Time 2021-08-17 18:43:05
73 | Epoch 035/120: (72.1800 90.9680) | 1125.6014 M (1115.2609 -18.6051) || train (70.8892 88.8388) | 1149.3360 M (1137.2730 -16.8827)| loss 2.1499 || lr 0.060329-0.012066-0.012066 | Time 2021-08-17 19:31:58
74 | Epoch 035/120: (72.1800 90.9680) | 1125.6014 M (1115.2609 -18.6051) || train (70.8981 88.8759) | 1149.3366 M (1137.2741 -16.8832)| loss 2.1484 || lr 0.060329-0.012066-0.012066 | Time 2021-08-17 19:31:58
75 | Epoch 036/120: (72.0520 90.7660) | 1097.9145 M (1089.4832 -20.5144) || train (70.7739 88.7993) | 1124.0150 M (1113.5297 -18.4604)| loss 2.1540 || lr 0.059542-0.011908-0.011908 | Time 2021-08-17 20:20:50
76 | Epoch 036/120: (72.0520 90.7660) | 1097.9145 M (1089.4832 -20.5144) || train (70.8613 88.8363) | 1124.0142 M (1113.5292 -18.4606)| loss 2.1511 || lr 0.059542-0.011908-0.011908 | Time 2021-08-17 20:20:50
77 | Epoch 037/120: (71.9320 91.0080) | 1072.3998 M (1065.5013 -22.0471) || train (70.8077 88.7812) | 1098.6408 M (1089.9299 -20.2348)| loss 2.1541 || lr 0.058740-0.011748-0.011748 | Time 2021-08-17 21:09:44
78 | Epoch 037/120: (71.9320 91.0080) | 1072.3998 M (1065.5013 -22.0471) || train (70.8055 88.8934) | 1098.6332 M (1089.9237 -20.2361)| loss 2.1511 || lr 0.058740-0.011748-0.011748 | Time 2021-08-17 21:09:44
79 | Epoch 038/120: (72.3580 90.9460) | 1046.0702 M (1041.4248 -24.3002) || train (70.7180 88.7543) | 1073.2920 M (1066.4180 -22.0716)| loss 2.1566 || lr 0.057924-0.011585-0.011585 | Time 2021-08-17 21:58:36
80 | Epoch 038/120: (72.3580 90.9460) | 1046.0702 M (1041.4248 -24.3002) || train (70.6911 88.7161) | 1073.2955 M (1066.4220 -22.0721)| loss 2.1577 || lr 0.057924-0.011585-0.011585 | Time 2021-08-17 21:58:36
81 | Epoch 039/120: (71.9340 90.9900) | 1017.3257 M (1014.9044 -26.5243) || train (70.6391 88.7793) | 1047.9594 M (1043.1240 -24.1102)| loss 2.1571 || lr 0.057094-0.011419-0.011419 | Time 2021-08-17 22:47:29
82 | Epoch 039/120: (71.9340 90.9900) | 1017.3257 M (1014.9044 -26.5243) || train (70.6668 88.7832) | 1047.9808 M (1043.1458 -24.1106)| loss 2.1569 || lr 0.057094-0.011419-0.011419 | Time 2021-08-17 22:47:29
83 | Epoch 040/120: (71.9800 90.8840) | 988.5829 M (988.9074 -29.2700) || train (70.6714 88.7495) | 1022.6230 M (1020.0632 -26.3857)| loss 2.1575 || lr 0.056250-0.011250-0.011250 | Time 2021-08-17 23:36:20
84 | Epoch 040/120: (71.9800 90.8840) | 988.5829 M (988.9074 -29.2700) || train (70.7639 88.7559) | 1022.6140 M (1020.0522 -26.3838)| loss 2.1560 || lr 0.056250-0.011250-0.011250 | Time 2021-08-17 23:36:20
85 | Epoch 041/120: (72.0240 91.0040) | 962.9410 M (966.1198 -32.1243) || train (70.5283 88.6636) | 997.2721 M (997.2471 -28.9205)| loss 2.1642 || lr 0.055393-0.011079-0.011079 | Time 2021-08-18 00:25:13
86 | Epoch 041/120: (72.0240 91.0040) | 962.9410 M (966.1198 -32.1243) || train (70.5684 88.7117) | 997.2814 M (997.2585 -28.9227)| loss 2.1612 || lr 0.055393-0.011079-0.011079 | Time 2021-08-18 00:25:13
87 | Epoch 042/120: (72.1120 91.0580) | 935.5174 M (941.7604 -35.1886) || train (70.4910 88.6090) | 971.9271 M (974.6427 -31.6613)| loss 2.1668 || lr 0.054525-0.010905-0.010905 | Time 2021-08-18 01:14:10
88 | Epoch 042/120: (72.1120 91.0580) | 935.5174 M (941.7604 -35.1886) || train (70.5121 88.6727) | 971.9242 M (974.6412 -31.6626)| loss 2.1643 || lr 0.054525-0.010905-0.010905 | Time 2021-08-18 01:14:10
89 | Epoch 043/120: (71.7520 90.9900) | 908.2829 M (917.7098 -38.3724) || train (70.3296 88.5592) | 946.5612 M (952.1083 -34.4927)| loss 2.1709 || lr 0.053644-0.010729-0.010729 | Time 2021-08-18 02:03:04
90 | Epoch 043/120: (71.7520 90.9900) | 908.2829 M (917.7098 -38.3724) || train (70.3945 88.5743) | 946.5612 M (952.1082 -34.4925)| loss 2.1698 || lr 0.053644-0.010729-0.010729 | Time 2021-08-18 02:03:04
91 | Epoch 044/120: (71.7700 90.8620) | 883.3600 M (895.6713 -41.2569) || train (70.1549 88.4964) | 921.1961 M (929.7438 -37.4932)| loss 2.1764 || lr 0.052753-0.010551-0.010551 | Time 2021-08-18 02:51:57
92 | Epoch 044/120: (71.7700 90.8620) | 883.3600 M (895.6713 -41.2569) || train (70.2258 88.5002) | 921.2147 M (929.7617 -37.4926)| loss 2.1753 || lr 0.052753-0.010551-0.010551 | Time 2021-08-18 02:51:57
93 | Epoch 045/120: (71.7600 90.6880) | 857.3979 M (873.1383 -44.6860) || train (70.1209 88.4387) | 895.8751 M (907.6792 -40.7496)| loss 2.1798 || lr 0.051851-0.010370-0.010370 | Time 2021-08-18 03:40:49
94 | Epoch 045/120: (71.7600 90.6880) | 857.3979 M (873.1383 -44.6860) || train (70.1268 88.4498) | 895.8669 M (907.6706 -40.7492)| loss 2.1793 || lr 0.051851-0.010370-0.010370 | Time 2021-08-18 03:40:49
95 | Epoch 046/120: (71.7080 90.7080) | 829.9879 M (850.2610 -49.2187) || train (70.1062 88.3778) | 870.5367 M (885.9141 -44.3230)| loss 2.1834 || lr 0.050939-0.010188-0.010188 | Time 2021-08-18 04:29:43
96 | Epoch 046/120: (71.7080 90.7080) | 829.9879 M (850.2610 -49.2187) || train (69.9819 88.3330) | 870.5430 M (885.9185 -44.3210)| loss 2.1856 || lr 0.050939-0.010188-0.010188 | Time 2021-08-18 04:29:43
97 | Epoch 047/120: (71.8120 90.7060) | 804.3339 M (828.8844 -53.4961) || train (69.8533 88.1914) | 845.1783 M (864.4523 -48.2195)| loss 2.1930 || lr 0.050018-0.010004-0.010004 | Time 2021-08-18 05:18:36
98 | Epoch 047/120: (71.8120 90.7060) | 804.3339 M (828.8844 -53.4961) || train (69.8232 88.2618) | 845.1985 M (864.4751 -48.2221)| loss 2.1925 || lr 0.050018-0.010004-0.010004 | Time 2021-08-18 05:18:36
99 | Epoch 048/120: (71.1920 90.5160) | 777.2574 M (806.6862 -58.3744) || train (69.5767 88.1332) | 819.8230 M (843.2152 -52.3377)| loss 2.2006 || lr 0.049088-0.009818-0.009818 | Time 2021-08-18 06:07:31
100 | Epoch 048/120: (71.1920 90.5160) | 777.2574 M (806.6862 -58.3744) || train (69.6369 88.1491) | 819.8225 M (843.2139 -52.3370)| loss 2.1999 || lr 0.049088-0.009818-0.009818 | Time 2021-08-18 06:07:31
101 | Epoch 049/120: (71.2220 90.6320) | 753.3303 M (787.2494 -62.8647) || train (69.2741 87.9685) | 794.4914 M (822.2464 -56.7005)| loss 2.2110 || lr 0.048151-0.009630-0.009630 | Time 2021-08-18 06:56:23
102 | Epoch 049/120: (71.2220 90.6320) | 753.3303 M (787.2494 -62.8647) || train (69.4922 88.0267) | 794.4888 M (822.2429 -56.6997)| loss 2.2061 || lr 0.048151-0.009630-0.009630 | Time 2021-08-18 06:56:23
103 | Epoch 050/120: (70.9600 90.1000) | 729.3681 M (767.7428 -67.3203) || train (69.2980 87.8781) | 769.1426 M (801.5053 -61.3082)| loss 2.2130 || lr 0.047206-0.009441-0.009441 | Time 2021-08-18 07:45:19
104 | Epoch 050/120: (70.9600 90.1000) | 729.3681 M (767.7428 -67.3203) || train (69.2104 87.9466) | 769.1520 M (801.5157 -61.3093)| loss 2.2163 || lr 0.047206-0.009441-0.009441 | Time 2021-08-18 07:45:19
105 | Epoch 051/120: (70.6720 90.1860) | 704.5078 M (747.5980 -72.0358) || train (69.1236 87.8133) | 743.8154 M (780.7578 -65.8880)| loss 2.2223 || lr 0.046254-0.009251-0.009251 | Time 2021-08-18 08:34:11
106 | Epoch 051/120: (70.6720 90.1860) | 704.5078 M (747.5980 -72.0358) || train (69.0358 87.7885) | 743.8134 M (780.7575 -65.8897)| loss 2.2232 || lr 0.046254-0.009251-0.009251 | Time 2021-08-18 08:34:11
107 | Epoch 052/120: (70.6720 90.2280) | 680.8884 M (727.8541 -75.9113) || train (68.7416 87.6463) | 718.4890 M (759.8987 -70.3553)| loss 2.2356 || lr 0.045297-0.009059-0.009059 | Time 2021-08-18 09:23:09
108 | Epoch 052/120: (70.6720 90.2280) | 680.8884 M (727.8541 -75.9113) || train (68.7335 87.6113) | 718.5035 M (759.9090 -70.3511)| loss 2.2362 || lr 0.045297-0.009059-0.009059 | Time 2021-08-18 09:23:09
109 | Epoch 053/120: (70.4260 90.1980) | 656.8364 M (707.7849 -79.8941) || train (68.3960 87.3931) | 693.2034 M (738.8231 -74.5652)| loss 2.2505 || lr 0.044334-0.008867-0.008867 | Time 2021-08-18 10:12:02
110 | Epoch 053/120: (70.4260 90.1980) | 656.8364 M (707.7849 -79.8941) || train (68.2866 87.3217) | 693.2072 M (738.8293 -74.5677)| loss 2.2543 || lr 0.044334-0.008867-0.008867 | Time 2021-08-18 10:12:02
111 | Epoch 054/120: (70.2640 89.9940) | 634.8241 M (689.0961 -83.2175) || train (67.9577 87.1450) | 667.9188 M (717.5627 -78.5895)| loss 2.2673 || lr 0.043366-0.008673-0.008673 | Time 2021-08-18 11:00:55
112 | Epoch 054/120: (70.2640 89.9940) | 634.8241 M (689.0961 -83.2175) || train (68.0295 87.1797) | 667.9184 M (717.5645 -78.5916)| loss 2.2657 || lr 0.043366-0.008673-0.008673 | Time 2021-08-18 11:00:55
113 | Epoch 055/120: (69.9240 89.8040) | 611.3495 M (669.1656 -86.7616) || train (67.6344 86.8498) | 642.6324 M (696.4911 -82.8044)| loss 2.2850 || lr 0.042395-0.008479-0.008479 | Time 2021-08-18 11:49:48
114 | Epoch 055/120: (69.9240 89.8040) | 611.3495 M (669.1656 -86.7616) || train (67.5416 86.7924) | 642.6523 M (696.5087 -82.8020)| loss 2.2872 || lr 0.042395-0.008479-0.008479 | Time 2021-08-18 11:49:48
115 | Epoch 056/120: (69.3000 89.3920) | 588.4999 M (649.5748 -90.0205) || train (67.1192 86.5510) | 617.3605 M (675.1299 -86.7150)| loss 2.3057 || lr 0.041420-0.008284-0.008284 | Time 2021-08-18 12:38:45
116 | Epoch 056/120: (69.3000 89.3920) | 588.4999 M (649.5748 -90.0205) || train (66.9856 86.4236) | 617.3659 M (675.1321 -86.7118)| loss 2.3124 || lr 0.041420-0.008284-0.008284 | Time 2021-08-18 12:38:45
117 | Epoch 057/120: (69.2580 89.1560) | 566.3783 M (629.9764 -92.5437) || train (66.3223 86.0788) | 592.1097 M (653.0836 -89.9195)| loss 2.3385 || lr 0.040442-0.008088-0.008088 | Time 2021-08-18 13:27:42
118 | Epoch 057/120: (69.2580 89.1560) | 566.3783 M (629.9764 -92.5437) || train (66.3890 86.0190) | 592.1008 M (653.0788 -89.9235)| loss 2.3383 || lr 0.040442-0.008088-0.008088 | Time 2021-08-18 13:27:42
119 | Epoch 058/120: (68.4700 88.6480) | 544.1996 M (610.4410 -95.1870) || train (65.5980 85.4854) | 566.8506 M (630.9185 -93.0135)| loss 2.3718 || lr 0.039463-0.007893-0.007893 | Time 2021-08-18 14:16:45
120 | Epoch 058/120: (68.4700 88.6480) | 544.1996 M (610.4410 -95.1870) || train (65.5789 85.5085) | 566.8429 M (630.9115 -93.0142)| loss 2.3718 || lr 0.039463-0.007893-0.007893 | Time 2021-08-18 14:16:45
121 | Epoch 059/120: (68.1280 88.5900) | 520.9000 M (590.1964 -98.2419) || train (64.9584 84.9628) | 541.5681 M (608.6327 -96.0102)| loss 2.4039 || lr 0.038482-0.007696-0.007696 | Time 2021-08-18 15:05:43
122 | Epoch 059/120: (68.1280 88.5900) | 520.9000 M (590.1964 -98.2419) || train (64.8263 85.0664) | 541.5755 M (608.6392 -96.0092)| loss 2.4037 || lr 0.038482-0.007696-0.007696 | Time 2021-08-18 15:05:43
123 | Epoch 060/120: (69.0660 89.3100) | 519.2288 M (589.0367 -98.7535) || train (65.5474 85.3702) | 528.7342 M (597.5745 -97.7859)| loss 2.3789 || lr 0.037500-0.007500-0.007500 | Time 2021-08-18 15:54:35
124 | Epoch 060/120: (69.0660 89.3100) | 519.2288 M (589.0367 -98.7535) || train (65.5099 85.4767) | 528.7330 M (597.5754 -97.7879)| loss 2.3768 || lr 0.037500-0.007500-0.007500 | Time 2021-08-18 15:54:35
125 | Epoch 061/120: (70.0120 89.6720) | 518.3791 M (588.3916 -98.9581) || train (66.6098 86.1380) | 528.6261 M (597.9674 -98.2869)| loss 2.3285 || lr 0.036518-0.007304-0.007304 | Time 2021-08-18 16:43:28
126 | Epoch 061/120: (70.0120 89.6720) | 518.3791 M (588.3916 -98.9581) || train (66.5563 86.0441) | 528.6300 M (597.9698 -98.2855)| loss 2.3335 || lr 0.036518-0.007304-0.007304 | Time 2021-08-18 16:43:28
127 | Epoch 062/120: (70.1820 89.7740) | 517.4345 M (587.6847 -99.1958) || train (67.2480 86.5295) | 528.5654 M (598.1580 -98.5381)| loss 2.3021 || lr 0.035537-0.007107-0.007107 | Time 2021-08-18 17:32:14
128 | Epoch 062/120: (70.1820 89.7740) | 517.4345 M (587.6847 -99.1958) || train (67.1495 86.4940) | 528.5685 M (598.1609 -98.5379)| loss 2.3046 || lr 0.035537-0.007107-0.007107 | Time 2021-08-18 17:32:14
129 | Epoch 063/120: (70.6100 89.9200) | 517.0996 M (587.2103 -99.0563) || train (67.7590 86.7839) | 528.5234 M (598.2400 -98.6622)| loss 2.2820 || lr 0.034558-0.006912-0.006912 | Time 2021-08-18 18:21:03
130 | Epoch 063/120: (70.6100 89.9200) | 517.0996 M (587.2103 -99.0563) || train (67.7165 86.8921) | 528.5279 M (598.2383 -98.6560)| loss 2.2809 || lr 0.034558-0.006912-0.006912 | Time 2021-08-18 18:21:03
131 | Epoch 064/120: (70.5660 89.9400) | 516.8346 M (587.4100 -99.5210) || train (68.0584 87.0741) | 528.4976 M (598.4602 -98.9081)| loss 2.2651 || lr 0.033580-0.006716-0.006716 | Time 2021-08-18 19:09:52
132 | Epoch 064/120: (70.5660 89.9400) | 516.8346 M (587.4100 -99.5210) || train (68.1961 87.1033) | 528.4889 M (598.4570 -98.9136)| loss 2.2621 || lr 0.033580-0.006716-0.006716 | Time 2021-08-18 19:09:52
133 | Epoch 065/120: (70.6720 90.0860) | 516.9873 M (587.6284 -99.5866) || train (68.5062 87.2889) | 528.4672 M (598.6882 -99.1665)| loss 2.2490 || lr 0.032605-0.006521-0.006521 | Time 2021-08-18 20:31:37
134 | Epoch 065/120: (70.6720 90.0860) | 516.9873 M (587.6284 -99.5866) || train (68.5287 87.3662) | 528.4543 M (598.6735 -99.1647)| loss 2.2462 || lr 0.032605-0.006521-0.006521 | Time 2021-08-18 20:31:37
135 | Epoch 066/120: (71.0080 90.2940) | 516.7441 M (587.5952 -99.7967) || train (68.7974 87.4894) | 528.4506 M (598.8469 -99.3419)| loss 2.2355 || lr 0.031634-0.006327-0.006327 | Time 2021-08-18 21:22:47
136 | Epoch 066/120: (71.0080 90.2940) | 516.7441 M (587.5952 -99.7967) || train (68.7655 87.5504) | 528.4432 M (598.8403 -99.3427)| loss 2.2352 || lr 0.031634-0.006327-0.006327 | Time 2021-08-18 21:22:47
137 | Epoch 067/120: (70.8040 90.2540) | 515.9640 M (586.9589 -99.9404) || train (69.1180 87.6737) | 528.4307 M (598.9465 -99.4614)| loss 2.2237 || lr 0.030666-0.006133-0.006133 | Time 2021-08-18 22:14:42
138 | Epoch 067/120: (70.8040 90.2540) | 515.9640 M (586.9589 -99.9404) || train (69.0899 87.6739) | 528.4256 M (598.9364 -99.4563)| loss 2.2243 || lr 0.030666-0.006133-0.006133 | Time 2021-08-18 22:14:42
139 | Epoch 068/120: (71.3500 90.3380) | 515.7037 M (586.9207 -100.1626) || train (69.3873 87.8758) | 528.4019 M (599.1721 -99.7158)| loss 2.2107 || lr 0.029703-0.005941-0.005941 | Time 2021-08-18 23:09:13
140 | Epoch 068/120: (71.3500 90.3380) | 515.7037 M (586.9207 -100.1626) || train (69.3932 87.9015) | 528.3926 M (599.1658 -99.7187)| loss 2.2116 || lr 0.029703-0.005941-0.005941 | Time 2021-08-18 23:09:13
141 | Epoch 069/120: (71.4080 90.4380) | 515.2758 M (586.4914 -100.1612) || train (69.6802 88.0571) | 528.3801 M (599.3914 -99.9569)| loss 2.1990 || lr 0.028746-0.005749-0.005749 | Time 2021-08-19 00:00:14
142 | Epoch 069/120: (71.4080 90.4380) | 515.2758 M (586.4914 -100.1612) || train (69.6499 88.0014) | 528.3655 M (599.3809 -99.9609)| loss 2.2011 || lr 0.028746-0.005749-0.005749 | Time 2021-08-19 00:00:14
143 | Epoch 070/120: (71.8240 90.6760) | 515.6496 M (587.2677 -100.5637) || train (69.9012 88.1329) | 528.3530 M (599.5016 -100.0941)| loss 2.1891 || lr 0.027794-0.005559-0.005559 | Time 2021-08-19 00:50:40
144 | Epoch 070/120: (71.8240 90.6760) | 515.6496 M (587.2677 -100.5637) || train (69.8561 88.1310) | 528.3697 M (599.5121 -100.0880)| loss 2.1922 || lr 0.027794-0.005559-0.005559 | Time 2021-08-19 00:50:40
145 | Epoch 071/120: (71.6780 90.6840) | 515.5636 M (587.1061 -100.4881) || train (70.0881 88.3122) | 528.3516 M (599.6872 -100.2811)| loss 2.1795 || lr 0.026849-0.005370-0.005370 | Time 2021-08-19 14:07:45
146 | Epoch 071/120: (71.6780 90.6840) | 515.5636 M (587.1061 -100.4881) || train (70.1945 88.3722) | 528.3486 M (599.6833 -100.2802)| loss 2.1777 || lr 0.026849-0.005370-0.005370 | Time 2021-08-19 14:07:45
147 | Epoch 072/120: (71.7700 90.6120) | 515.0914 M (586.6557 -100.5098) || train (70.3001 88.3911) | 528.3581 M (599.8245 -100.4120)| loss 2.1730 || lr 0.025912-0.005182-0.005182 | Time 2021-08-19 14:57:34
148 | Epoch 072/120: (71.7700 90.6120) | 515.0914 M (586.6557 -100.5098) || train (70.3987 88.3909) | 528.3616 M (599.8325 -100.4164)| loss 2.1725 || lr 0.025912-0.005182-0.005182 | Time 2021-08-19 14:57:34
149 | Epoch 073/120: (71.8360 90.7260) | 515.2443 M (586.9381 -100.6393) || train (70.5601 88.5854) | 528.3430 M (599.8893 -100.4919)| loss 2.1623 || lr 0.024982-0.004996-0.004996 | Time 2021-08-19 15:48:06
150 | Epoch 073/120: (71.8360 90.7260) | 515.2443 M (586.9381 -100.6393) || train (70.5729 88.5305) | 528.3337 M (599.8830 -100.4949)| loss 2.1644 || lr 0.024982-0.004996-0.004996 | Time 2021-08-19 15:48:06
151 | Epoch 074/120: (72.0300 91.0120) | 515.5038 M (587.2641 -100.7058) || train (70.8547 88.7086) | 528.3263 M (600.0317 -100.6509)| loss 2.1522 || lr 0.024061-0.004812-0.004812 | Time 2021-08-19 16:38:29
152 | Epoch 074/120: (72.0300 91.0120) | 515.5038 M (587.2641 -100.7058) || train (70.8344 88.7250) | 528.3217 M (600.0282 -100.6521)| loss 2.1505 || lr 0.024061-0.004812-0.004812 | Time 2021-08-19 16:38:29
153 | Epoch 075/120: (72.1060 90.9260) | 514.4231 M (586.3469 -100.8693) || train (70.9425 88.7254) | 528.3297 M (600.1815 -100.7974)| loss 2.1483 || lr 0.023149-0.004630-0.004630 | Time 2021-08-19 17:30:51
154 | Epoch 075/120: (72.1060 90.9260) | 514.4231 M (586.3469 -100.8693) || train (71.0333 88.8374) | 528.3220 M (600.1760 -100.7996)| loss 2.1443 || lr 0.023149-0.004630-0.004630 | Time 2021-08-19 17:30:51
155 | Epoch 076/120: (72.4380 90.9640) | 514.9254 M (587.2339 -101.2541) || train (71.2097 88.9248) | 528.2978 M (600.3210 -100.9688)| loss 2.1366 || lr 0.022247-0.004449-0.004449 | Time 2021-08-19 18:20:49
156 | Epoch 076/120: (72.4380 90.9640) | 514.9254 M (587.2339 -101.2541) || train (71.1290 88.8983) | 528.3095 M (600.3242 -100.9603)| loss 2.1368 || lr 0.022247-0.004449-0.004449 | Time 2021-08-19 18:20:49
157 | Epoch 077/120: (72.4840 91.0680) | 515.0593 M (587.2009 -101.0871) || train (71.4350 89.0369) | 528.3023 M (600.4543 -101.0976)| loss 2.1294 || lr 0.021356-0.004271-0.004271 | Time 2021-08-19 19:12:38
158 | Epoch 077/120: (72.4840 91.0680) | 515.0593 M (587.2009 -101.0871) || train (71.4354 89.0030) | 528.2980 M (600.4467 -101.0943)| loss 2.1280 || lr 0.021356-0.004271-0.004271 | Time 2021-08-19 19:12:38
159 | Epoch 078/120: (72.4380 91.0380) | 515.2583 M (587.7215 -101.4087) || train (71.6318 89.1176) | 528.2935 M (600.5729 -101.2249)| loss 2.1218 || lr 0.020475-0.004095-0.004095 | Time 2021-08-19 20:05:59
160 | Epoch 078/120: (72.4380 91.0380) | 515.2583 M (587.7215 -101.4087) || train (71.5981 89.1839) | 528.2955 M (600.5626 -101.2127)| loss 2.1187 || lr 0.020475-0.004095-0.004095 | Time 2021-08-19 20:06:00
161 | Epoch 079/120: (72.6480 91.0640) | 515.1542 M (587.5948 -101.3862) || train (71.7572 89.2561) | 528.3056 M (600.6580 -101.2980)| loss 2.1130 || lr 0.019607-0.003921-0.003921 | Time 2021-08-19 20:55:21
162 | Epoch 079/120: (72.6480 91.0640) | 515.1542 M (587.5948 -101.3862) || train (71.7634 89.2305) | 528.3028 M (600.6545 -101.2973)| loss 2.1146 || lr 0.019607-0.003921-0.003921 | Time 2021-08-19 20:55:21
163 | Epoch 080/120: (72.6520 91.1340) | 515.3355 M (587.8407 -101.4508) || train (72.1083 89.3814) | 528.2899 M (600.7356 -101.3912)| loss 2.1028 || lr 0.018750-0.003750-0.003750 | Time 2021-08-19 21:47:06
164 | Epoch 080/120: (72.6520 91.1340) | 515.3355 M (587.8407 -101.4508) || train (71.9548 89.3616) | 528.2806 M (600.7316 -101.3966)| loss 2.1061 || lr 0.018750-0.003750-0.003750 | Time 2021-08-19 21:47:06
165 | Epoch 081/120: (72.5700 91.2200) | 515.3307 M (587.8180 -101.4329) || train (72.1857 89.4409) | 528.2847 M (600.7830 -101.4439)| loss 2.0980 || lr 0.017906-0.003581-0.003581 | Time 2021-08-19 22:39:08
166 | Epoch 081/120: (72.5700 91.2200) | 515.3307 M (587.8180 -101.4329) || train (72.2524 89.5146) | 528.2840 M (600.7819 -101.4435)| loss 2.0950 || lr 0.017906-0.003581-0.003581 | Time 2021-08-19 22:39:08
167 | Epoch 082/120: (72.7720 91.3240) | 514.7424 M (587.4461 -101.6492) || train (72.3429 89.4696) | 528.2683 M (600.8565 -101.5338)| loss 2.0925 || lr 0.017076-0.003415-0.003415 | Time 2021-08-19 23:32:43
168 | Epoch 082/120: (72.7720 91.3240) | 514.7424 M (587.4461 -101.6492) || train (72.3223 89.5072) | 528.2752 M (600.8594 -101.5298)| loss 2.0918 || lr 0.017076-0.003415-0.003415 | Time 2021-08-19 23:32:44
169 | Epoch 083/120: (72.9280 91.2240) | 514.6887 M (587.3096 -101.5665) || train (72.5163 89.6252) | 528.2711 M (600.9512 -101.6256)| loss 2.0841 || lr 0.016260-0.003252-0.003252 | Time 2021-08-20 00:21:52
170 | Epoch 083/120: (72.9280 91.2240) | 514.6887 M (587.3096 -101.5665) || train (72.5835 89.6769) | 528.2651 M (600.9426 -101.6231)| loss 2.0823 || lr 0.016260-0.003252-0.003252 | Time 2021-08-20 00:21:52
171 | Epoch 084/120: (73.1140 91.3060) | 515.1625 M (588.0359 -101.8189) || train (72.6628 89.7562) | 528.2641 M (601.0080 -101.6895)| loss 2.0772 || lr 0.015458-0.003092-0.003092 | Time 2021-08-20 01:11:41
172 | Epoch 084/120: (73.1140 91.3060) | 515.1625 M (588.0359 -101.8189) || train (72.8343 89.6980) | 528.2521 M (600.9931 -101.6866)| loss 2.0769 || lr 0.015458-0.003092-0.003092 | Time 2021-08-20 01:11:41
173 | Epoch 085/120: (72.9160 91.3300) | 514.5791 M (587.3696 -101.7360) || train (72.9584 89.8207) | 528.2700 M (601.0662 -101.7417)| loss 2.0691 || lr 0.014671-0.002934-0.002934 | Time 2021-08-20 02:25:52
174 | Epoch 085/120: (72.9160 91.3300) | 514.5791 M (587.3696 -101.7360) || train (72.8640 89.7623) | 528.2586 M (601.0591 -101.7460)| loss 2.0721 || lr 0.014671-0.002934-0.002934 | Time 2021-08-20 02:25:52
175 | Epoch 086/120: (73.1860 91.4660) | 514.3439 M (587.2347 -101.8364) || train (73.0880 89.9423) | 528.2516 M (601.1335 -101.8275)| loss 2.0630 || lr 0.013900-0.002780-0.002780 | Time 2021-08-20 03:15:18
176 | Epoch 086/120: (73.1860 91.4660) | 514.3439 M (587.2347 -101.8364) || train (73.1292 89.9893) | 528.2531 M (601.1346 -101.8271)| loss 2.0602 || lr 0.013900-0.002780-0.002780 | Time 2021-08-20 03:15:18
177 | Epoch 087/120: (73.3120 91.3300) | 514.6106 M (587.4504 -101.7853) || train (73.3198 90.1034) | 528.2552 M (601.2244 -101.9147)| loss 2.0517 || lr 0.013146-0.002629-0.002629 | Time 2021-08-20 04:04:23
178 | Epoch 087/120: (73.3120 91.3300) | 514.6106 M (587.4504 -101.7853) || train (73.3465 90.0936) | 528.2444 M (601.2127 -101.9138)| loss 2.0522 || lr 0.013146-0.002629-0.002629 | Time 2021-08-20 04:04:23
179 | Epoch 088/120: (73.2120 91.3420) | 514.6781 M (587.5774 -101.8449) || train (73.4616 90.1324) | 528.2535 M (601.2760 -101.9680)| loss 2.0483 || lr 0.012408-0.002482-0.002482 | Time 2021-08-20 04:53:56
180 | Epoch 088/120: (73.2120 91.3420) | 514.6781 M (587.5774 -101.8449) || train (73.5632 90.1826) | 528.2530 M (601.2788 -101.9714)| loss 2.0454 || lr 0.012408-0.002482-0.002482 | Time 2021-08-20 04:53:56
181 | Epoch 089/120: (73.4960 91.4820) | 515.1937 M (588.2972 -102.0490) || train (73.6839 90.2512) | 528.2554 M (601.3218 -102.0120)| loss 2.0397 || lr 0.011687-0.002337-0.002337 | Time 2021-08-20 05:44:14
182 | Epoch 089/120: (73.4960 91.4820) | 515.1937 M (588.2972 -102.0490) || train (73.6704 90.2854) | 528.2534 M (601.3193 -102.0115)| loss 2.0385 || lr 0.011687-0.002337-0.002337 | Time 2021-08-20 05:44:14
183 | Epoch 090/120: (73.5720 91.5560) | 515.0114 M (588.0303 -101.9644) || train (73.8757 90.3344) | 528.2396 M (601.3643 -102.0703)| loss 2.0326 || lr 0.010983-0.002197-0.002197 | Time 2021-08-20 06:34:07
184 | Epoch 090/120: (73.5720 91.5560) | 515.0114 M (588.0303 -101.9644) || train (73.8429 90.3697) | 528.2443 M (601.3682 -102.0695)| loss 2.0332 || lr 0.010983-0.002197-0.002197 | Time 2021-08-20 06:34:07
185 | Epoch 091/120: (73.6240 91.5060) | 515.2121 M (588.3540 -102.0875) || train (74.0958 90.4456) | 528.2611 M (601.4140 -102.0985)| loss 2.0264 || lr 0.010298-0.002060-0.002060 | Time 2021-08-20 07:23:01
186 | Epoch 091/120: (73.6240 91.5060) | 515.2121 M (588.3540 -102.0875) || train (73.9675 90.3938) | 528.2423 M (601.3956 -102.0989)| loss 2.0272 || lr 0.010298-0.002060-0.002060 | Time 2021-08-20 07:23:01
187 | Epoch 092/120: (73.7000 91.5300) | 515.1611 M (588.2827 -102.0671) || train (74.1431 90.4824) | 528.2487 M (601.4153 -102.1122)| loss 2.0222 || lr 0.009632-0.001926-0.001926 | Time 2021-08-20 08:13:40
188 | Epoch 092/120: (73.7000 91.5300) | 515.1611 M (588.2827 -102.0671) || train (74.2323 90.5029) | 528.2529 M (601.4176 -102.1103)| loss 2.0206 || lr 0.009632-0.001926-0.001926 | Time 2021-08-20 08:13:40
189 | Epoch 093/120: (73.7680 91.6580) | 514.8684 M (587.9894 -102.0666) || train (74.3333 90.5964) | 528.2170 M (601.4351 -102.1637)| loss 2.0145 || lr 0.008985-0.001797-0.001797 | Time 2021-08-20 09:04:49
190 | Epoch 093/120: (73.7680 91.6580) | 514.8684 M (587.9894 -102.0666) || train (74.2911 90.5553) | 528.2345 M (601.4475 -102.1585)| loss 2.0166 || lr 0.008985-0.001797-0.001797 | Time 2021-08-20 09:04:49
191 | Epoch 094/120: (73.7640 91.6580) | 514.1840 M (587.3031 -102.0647) || train (74.5048 90.6526) | 528.2367 M (601.4586 -102.1675)| loss 2.0089 || lr 0.008357-0.001671-0.001671 | Time 2021-08-20 09:54:46
192 | Epoch 094/120: (73.7640 91.6580) | 514.1840 M (587.3031 -102.0647) || train (74.5247 90.7122) | 528.2323 M (601.4556 -102.1689)| loss 2.0067 || lr 0.008357-0.001671-0.001671 | Time 2021-08-20 09:54:46
193 | Epoch 095/120: (73.7840 91.6520) | 514.5392 M (587.6685 -102.0748) || train (74.6995 90.7656) | 528.2283 M (601.4423 -102.1596)| loss 2.0029 || lr 0.007749-0.001550-0.001550 | Time 2021-08-20 10:44:41
194 | Epoch 095/120: (73.7840 91.6520) | 514.5392 M (587.6685 -102.0748) || train (74.6544 90.7235) | 528.2316 M (601.4427 -102.1566)| loss 2.0036 || lr 0.007749-0.001550-0.001550 | Time 2021-08-20 10:44:41
195 | Epoch 096/120: (73.8460 91.6720) | 514.8140 M (588.0123 -102.1438) || train (74.8350 90.8187) | 528.2384 M (601.4748 -102.1820)| loss 1.9963 || lr 0.007162-0.001432-0.001432 | Time 2021-08-21 01:51:39
196 | Epoch 096/120: (73.8460 91.6720) | 514.8140 M (588.0123 -102.1438) || train (74.8620 90.8246) | 528.2295 M (601.4657 -102.1818)| loss 1.9961 || lr 0.007162-0.001432-0.001432 | Time 2021-08-21 01:51:39
197 | Epoch 097/120: (73.9700 91.8180) | 514.8280 M (588.1018 -102.2194) || train (75.0535 90.9912) | 528.2170 M (601.5287 -102.2573)| loss 1.9886 || lr 0.006595-0.001319-0.001319 | Time 2021-08-21 02:40:21
198 | Epoch 097/120: (73.9700 91.8180) | 514.8280 M (588.1018 -102.2194) || train (75.0242 90.9192) | 528.2192 M (601.5289 -102.2553)| loss 1.9896 || lr 0.006595-0.001319-0.001319 | Time 2021-08-21 02:40:21
199 | Epoch 098/120: (74.1640 91.8320) | 514.7488 M (588.0407 -102.2374) || train (75.0938 90.9639) | 528.2407 M (601.5554 -102.2603)| loss 1.9867 || lr 0.006050-0.001210-0.001210 | Time 2021-08-21 03:29:01
200 | Epoch 098/120: (74.1640 91.8320) | 514.7488 M (588.0407 -102.2374) || train (75.2473 91.0243) | 528.2190 M (601.5305 -102.2571)| loss 1.9821 || lr 0.006050-0.001210-0.001210 | Time 2021-08-21 03:29:01
201 | Epoch 099/120: (74.1520 91.7400) | 515.2655 M (588.5511 -102.2311) || train (75.3231 91.1239) | 528.2284 M (601.5479 -102.2651)| loss 1.9771 || lr 0.005526-0.001105-0.001105 | Time 2021-08-21 04:17:42
202 | Epoch 099/120: (74.1520 91.7400) | 515.2655 M (588.5511 -102.2311) || train (75.2925 91.0468) | 528.2223 M (601.5408 -102.2641)| loss 1.9789 || lr 0.005526-0.001105-0.001105 | Time 2021-08-21 04:17:42
203 | Epoch 100/120: (74.2100 91.8480) | 514.5014 M (587.7542 -102.1984) || train (75.4613 91.1729) | 528.2289 M (601.5808 -102.2975)| loss 1.9721 || lr 0.005024-0.001005-0.001005 | Time 2021-08-21 05:06:21
204 | Epoch 100/120: (74.2100 91.8480) | 514.5014 M (587.7542 -102.1984) || train (75.4319 91.1763) | 528.2317 M (601.5805 -102.2944)| loss 1.9733 || lr 0.005024-0.001005-0.001005 | Time 2021-08-21 05:06:21
205 | Epoch 101/120: (74.0160 91.8680) | 514.6555 M (588.1390 -102.4291) || train (75.6578 91.2163) | 528.2210 M (601.5680 -102.2925)| loss 1.9664 || lr 0.004544-0.000909-0.000909 | Time 2021-08-21 05:55:01
206 | Epoch 101/120: (74.0160 91.8680) | 514.6555 M (588.1390 -102.4291) || train (75.6346 91.2408) | 528.2169 M (601.5582 -102.2868)| loss 1.9653 || lr 0.004544-0.000909-0.000909 | Time 2021-08-21 05:55:01
207 | Epoch 102/120: (74.1620 91.9500) | 515.1085 M (588.3885 -102.2256) || train (75.8728 91.3950) | 528.2133 M (601.5657 -102.2980)| loss 1.9583 || lr 0.004087-0.000817-0.000817 | Time 2021-08-21 06:43:40
208 | Epoch 102/120: (74.1620 91.9500) | 515.1085 M (588.3885 -102.2256) || train (75.7114 91.2616) | 528.2177 M (601.5687 -102.2966)| loss 1.9642 || lr 0.004087-0.000817-0.000817 | Time 2021-08-21 06:43:40
209 | Epoch 103/120: (74.0720 91.9240) | 513.9633 M (587.1516 -102.1339) || train (75.8249 91.3214) | 528.2091 M (601.5943 -102.3308)| loss 1.9583 || lr 0.003653-0.000731-0.000731 | Time 2021-08-21 07:32:21
210 | Epoch 103/120: (74.0720 91.9240) | 513.9633 M (587.1516 -102.1339) || train (75.8478 91.3242) | 528.2293 M (601.6127 -102.3289)| loss 1.9595 || lr 0.003653-0.000731-0.000731 | Time 2021-08-21 07:32:21
211 | Epoch 104/120: (74.3100 91.9760) | 514.6400 M (587.8689 -102.1745) || train (76.1244 91.5059) | 528.2205 M (601.5904 -102.3155)| loss 1.9497 || lr 0.003242-0.000648-0.000648 | Time 2021-08-21 08:21:01
212 | Epoch 104/120: (74.3100 91.9760) | 514.6400 M (587.8689 -102.1745) || train (75.9604 91.3543) | 528.2220 M (601.5971 -102.3207)| loss 1.9569 || lr 0.003242-0.000648-0.000648 | Time 2021-08-21 08:21:01
213 | Epoch 105/120: (74.6200 92.0340) | 515.2262 M (588.5347 -102.2540) || train (76.0405 91.4258) | 528.2204 M (601.5991 -102.3243)| loss 1.9513 || lr 0.002855-0.000571-0.000571 | Time 2021-08-21 09:09:42
214 | Epoch 105/120: (74.6200 92.0340) | 515.2262 M (588.5347 -102.2540) || train (76.2446 91.5176) | 528.2004 M (601.5828 -102.3280)| loss 1.9451 || lr 0.002855-0.000571-0.000571 | Time 2021-08-21 09:09:42
215 | Epoch 106/120: (74.4820 92.0920) | 515.0799 M (588.3632 -102.2288) || train (76.2052 91.4968) | 528.2155 M (601.6098 -102.3399)| loss 1.9456 || lr 0.002491-0.000498-0.000498 | Time 2021-08-21 09:58:21
216 | Epoch 106/120: (74.4820 92.0920) | 515.0799 M (588.3632 -102.2288) || train (76.1799 91.4452) | 528.2211 M (601.6141 -102.3386)| loss 1.9471 || lr 0.002491-0.000498-0.000498 | Time 2021-08-21 09:58:21
217 | Epoch 107/120: (74.5920 92.1420) | 514.8863 M (588.2293 -102.2886) || train (76.3247 91.5354) | 528.2255 M (601.6132 -102.3333)| loss 1.9414 || lr 0.002151-0.000430-0.000430 | Time 2021-08-21 10:46:59
218 | Epoch 107/120: (74.5920 92.1420) | 514.8863 M (588.2293 -102.2886) || train (76.3480 91.6002) | 528.2190 M (601.6025 -102.3291)| loss 1.9390 || lr 0.002151-0.000430-0.000430 | Time 2021-08-21 10:46:59
219 | Epoch 108/120: (74.6880 92.0920) | 514.6597 M (587.9243 -102.2102) || train (76.4851 91.6370) | 528.2127 M (601.6157 -102.3485)| loss 1.9366 || lr 0.001835-0.000367-0.000367 | Time 2021-08-21 11:35:39
220 | Epoch 108/120: (74.6880 92.0920) | 514.6597 M (587.9243 -102.2102) || train (76.4281 91.6120) | 528.2187 M (601.6232 -102.3500)| loss 1.9376 || lr 0.001835-0.000367-0.000367 | Time 2021-08-21 11:35:39
221 | Epoch 109/120: (74.6420 92.1200) | 515.1069 M (588.4367 -102.2754) || train (76.5136 91.6098) | 528.1977 M (601.6105 -102.3584)| loss 1.9346 || lr 0.001544-0.000309-0.000309 | Time 2021-08-21 12:24:20
222 | Epoch 109/120: (74.6420 92.1200) | 515.1069 M (588.4367 -102.2754) || train (76.5239 91.6798) | 528.1945 M (601.6071 -102.3582)| loss 1.9323 || lr 0.001544-0.000309-0.000309 | Time 2021-08-21 12:24:20
223 | Epoch 110/120: (74.6420 92.1040) | 515.0642 M (588.4147 -102.2961) || train (76.6615 91.6935) | 528.2074 M (601.6350 -102.3731)| loss 1.9297 || lr 0.001278-0.000256-0.000256 | Time 2021-08-21 13:13:00
224 | Epoch 110/120: (74.6420 92.1040) | 515.0642 M (588.4147 -102.2961) || train (76.6652 91.6987) | 528.2062 M (601.6295 -102.3689)| loss 1.9294 || lr 0.001278-0.000256-0.000256 | Time 2021-08-21 13:13:00
225 | Epoch 111/120: (74.5960 92.1600) | 514.7508 M (588.1994 -102.3942) || train (76.7665 91.7700) | 528.2139 M (601.6489 -102.3806)| loss 1.9265 || lr 0.001036-0.000207-0.000207 | Time 2021-08-21 14:01:41
226 | Epoch 111/120: (74.5960 92.1600) | 514.7508 M (588.1994 -102.3942) || train (76.7450 91.7188) | 528.2101 M (601.6473 -102.3828)| loss 1.9274 || lr 0.001036-0.000207-0.000207 | Time 2021-08-21 14:01:41
227 | Epoch 112/120: (74.5940 92.1900) | 514.8956 M (588.2319 -102.2819) || train (76.7667 91.7722) | 528.2155 M (601.6499 -102.3799)| loss 1.9254 || lr 0.000819-0.000164-0.000164 | Time 2021-08-21 14:50:21
228 | Epoch 112/120: (74.5940 92.1900) | 514.8956 M (588.2319 -102.2819) || train (76.7478 91.8011) | 528.2167 M (601.6521 -102.3810)| loss 1.9248 || lr 0.000819-0.000164-0.000164 | Time 2021-08-21 14:50:21
229 | Epoch 113/120: (74.8140 92.1720) | 515.0024 M (588.3334 -102.2766) || train (76.8831 91.7798) | 528.2100 M (601.6450 -102.3805)| loss 1.9219 || lr 0.000628-0.000126-0.000126 | Time 2021-08-21 15:38:59
230 | Epoch 113/120: (74.8140 92.1720) | 515.0024 M (588.3334 -102.2766) || train (76.7532 91.7380) | 528.2045 M (601.6403 -102.3814)| loss 1.9258 || lr 0.000628-0.000126-0.000126 | Time 2021-08-21 15:38:59
231 | Epoch 114/120: (74.6420 92.2020) | 514.6888 M (587.9732 -102.2300) || train (76.8382 91.7102) | 528.2273 M (601.6587 -102.3770)| loss 1.9261 || lr 0.000462-0.000092-0.000092 | Time 2021-08-21 16:27:40
232 | Epoch 114/120: (74.6420 92.2020) | 514.6888 M (587.9732 -102.2300) || train (76.7127 91.7052) | 528.2289 M (601.6579 -102.3745)| loss 1.9266 || lr 0.000462-0.000092-0.000092 | Time 2021-08-21 16:27:40
233 | Epoch 115/120: (74.6960 92.1780) | 514.9542 M (588.2684 -102.2598) || train (76.8580 91.7956) | 528.2030 M (601.6300 -102.3726)| loss 1.9225 || lr 0.000321-0.000064-0.000064 | Time 2021-08-21 17:16:21
234 | Epoch 115/120: (74.6960 92.1780) | 514.9542 M (588.2684 -102.2598) || train (76.8399 91.7861) | 528.2056 M (601.6377 -102.3777)| loss 1.9241 || lr 0.000321-0.000064-0.000064 | Time 2021-08-21 17:16:21
235 | Epoch 116/120: (74.6780 92.1520) | 515.0678 M (588.3804 -102.2582) || train (76.8608 91.8307) | 528.2124 M (601.6429 -102.3760)| loss 1.9213 || lr 0.000205-0.000041-0.000041 | Time 2021-08-21 18:05:02
236 | Epoch 116/120: (74.6780 92.1520) | 515.0678 M (588.3804 -102.2582) || train (76.8521 91.8184) | 528.2188 M (601.6467 -102.3735)| loss 1.9219 || lr 0.000205-0.000041-0.000041 | Time 2021-08-21 18:05:02
237 | Epoch 117/120: (74.7560 92.1760) | 515.0918 M (588.4624 -102.3162) || train (76.9560 91.8354) | 528.2065 M (601.6334 -102.3724)| loss 1.9187 || lr 0.000116-0.000023-0.000023 | Time 2021-08-21 18:53:41
238 | Epoch 117/120: (74.7560 92.1760) | 515.0918 M (588.4624 -102.3162) || train (76.8887 91.8406) | 528.2102 M (601.6360 -102.3714)| loss 1.9199 || lr 0.000116-0.000023-0.000023 | Time 2021-08-21 18:53:41
239 | Epoch 118/120: (74.7120 92.2040) | 515.1220 M (588.4781 -102.3017) || train (76.9172 91.8359) | 528.2098 M (601.6369 -102.3726)| loss 1.9192 || lr 0.000051-0.000010-0.000010 | Time 2021-08-21 19:42:20
240 | Epoch 118/120: (74.7120 92.2040) | 515.1220 M (588.4781 -102.3017) || train (76.9406 91.8343) | 528.2161 M (601.6411 -102.3706)| loss 1.9194 || lr 0.000051-0.000010-0.000010 | Time 2021-08-21 19:42:20
241 | Epoch 119/120: (74.5420 92.1420) | 514.5430 M (587.7976 -102.2002) || train (76.9126 91.8858) | 528.2041 M (601.6274 -102.3689)| loss 1.9175 || lr 0.000013-0.000003-0.000003 | Time 2021-08-21 20:31:00
242 | Epoch 119/120: (74.5420 92.1420) | 514.5430 M (587.7976 -102.2002) || train (76.8976 91.8192) | 528.2039 M (601.6324 -102.3740)| loss 1.9215 || lr 0.000013-0.000003-0.000003 | Time 2021-08-21 20:31:00
243 |
--------------------------------------------------------------------------------
/logs/msgc_resnet18_noatt_log.txt:
--------------------------------------------------------------------------------
1 | Flops of dyresnet18: original 1817.209344 M, target 890.4325785599999 M, possible 560.652512 M, main 139.424256 M, dgc 1677.785088 M, mask 1.781984 M
2 | Flops of dyresnet18: original 1817.209344 M, target 890.4325785599999 M, possible 560.652512 M, main 139.424256 M, dgc 1677.785088 M, mask 1.781984 M
3 | Epoch 000/120: (61.6160 84.1040) | 1818.7949 M (1677.5886 -0.0000) || train (50.8386 74.2558) | 1702.6498 M (1561.5042 -0.0607)| loss 3.1189 || lr 0.075000-0.015000-0.015000 | Time 2021-09-13 23:40:11
4 | Epoch 000/120: (61.6180 84.1040) | 1818.7949 M (1677.5886 -0.0000) || train (50.8701 74.2719) | 1702.6487 M (1561.5029 -0.0604)| loss 3.1195 || lr 0.075000-0.015000-0.015000 | Time 2021-09-13 23:40:11
5 | Epoch 001/120: (62.3060 84.6920) | 1818.2589 M (1677.0526 -0.0000) || train (57.1686 79.4525) | 1765.4962 M (1624.2986 -0.0087)| loss 2.8256 || lr 0.074987-0.014997-0.014997 | Time 2021-09-13 23:53:08
6 | Epoch 001/120: (62.3060 84.6920) | 1818.2589 M (1677.0526 -0.0000) || train (57.1247 79.4336) | 1765.5010 M (1624.3035 -0.0088)| loss 2.8264 || lr 0.074987-0.014997-0.014997 | Time 2021-09-13 23:53:08
7 | Epoch 002/120: (63.2620 85.3300) | 1802.0412 M (1660.8349 -0.0000) || train (58.5519 80.4800) | 1777.3151 M (1636.1117 -0.0029)| loss 2.7594 || lr 0.074949-0.014990-0.014990 | Time 2021-09-14 00:06:16
8 | Epoch 002/120: (63.2640 85.3320) | 1802.0412 M (1660.8349 -0.0000) || train (58.6245 80.4900) | 1777.3185 M (1636.1151 -0.0028)| loss 2.7601 || lr 0.074949-0.014990-0.014990 | Time 2021-09-14 00:06:19
9 | Epoch 003/120: (63.4180 85.2380) | 1785.6143 M (1644.4081 -0.0000) || train (59.0163 80.7178) | 1762.2845 M (1621.0871 -0.0088)| loss 2.7380 || lr 0.074884-0.014977-0.014977 | Time 2021-09-14 00:19:26
10 | Epoch 003/120: (63.4180 85.2360) | 1785.6144 M (1644.4081 -0.0000) || train (59.0514 80.8278) | 1762.2768 M (1621.0794 -0.0089)| loss 2.7362 || lr 0.074884-0.014977-0.014977 | Time 2021-09-14 00:19:28
11 | Epoch 004/120: (63.6160 85.2880) | 1765.0201 M (1623.8157 -0.0019) || train (59.3471 81.0885) | 1746.7034 M (1605.5294 -0.0322)| loss 2.7183 || lr 0.074795-0.014959-0.014959 | Time 2021-09-14 00:32:22
12 | Epoch 004/120: (63.6180 85.2880) | 1765.0201 M (1623.8157 -0.0019) || train (59.3323 80.9649) | 1746.7114 M (1605.5371 -0.0321)| loss 2.7210 || lr 0.074795-0.014959-0.014959 | Time 2021-09-14 00:32:23
13 | Epoch 005/120: (63.6580 85.6500) | 1743.4220 M (1602.2295 -0.0138) || train (59.4879 81.1068) | 1731.2392 M (1590.1185 -0.0856)| loss 2.7091 || lr 0.074679-0.014936-0.014936 | Time 2021-09-14 00:45:17
14 | Epoch 005/120: (63.6580 85.6500) | 1743.4220 M (1602.2295 -0.0138) || train (59.4948 81.1616) | 1731.2397 M (1590.1193 -0.0859)| loss 2.7067 || lr 0.074679-0.014936-0.014936 | Time 2021-09-14 00:45:18
15 | Epoch 006/120: (63.0320 84.9680) | 1723.7469 M (1582.6393 -0.0987) || train (59.7047 81.3195) | 1715.8311 M (1574.8198 -0.1949)| loss 2.6982 || lr 0.074538-0.014908-0.014908 | Time 2021-09-14 00:58:12
16 | Epoch 006/120: (63.0300 84.9680) | 1723.7469 M (1582.6393 -0.0987) || train (59.7416 81.2691) | 1715.8271 M (1574.8156 -0.1948)| loss 2.6968 || lr 0.074538-0.014908-0.014908 | Time 2021-09-14 00:58:13
17 | Epoch 007/120: (63.0300 84.9760) | 1705.4741 M (1564.6823 -0.4144) || train (59.8329 81.3956) | 1700.4080 M (1559.6315 -0.4298)| loss 2.6885 || lr 0.074372-0.014874-0.014874 | Time 2021-09-14 01:11:11
18 | Epoch 007/120: (63.0300 84.9760) | 1705.4741 M (1564.6823 -0.4144) || train (59.8966 81.3801) | 1700.4099 M (1559.6326 -0.4290)| loss 2.6889 || lr 0.074372-0.014874-0.014874 | Time 2021-09-14 01:11:12
19 | Epoch 008/120: (63.7340 85.7300) | 1687.7883 M (1547.3907 -0.8087) || train (60.0347 81.5045) | 1684.9774 M (1544.6438 -0.8727)| loss 2.6805 || lr 0.074181-0.014836-0.014836 | Time 2021-09-14 01:24:09
20 | Epoch 008/120: (63.7340 85.7300) | 1687.7883 M (1547.3907 -0.8087) || train (59.8893 81.4995) | 1684.9730 M (1544.6412 -0.8745)| loss 2.6824 || lr 0.074181-0.014836-0.014836 | Time 2021-09-14 01:24:10
21 | Epoch 009/120: (63.4980 85.4580) | 1670.5914 M (1531.5882 -2.2031) || train (60.1112 81.5854) | 1669.5037 M (1529.8248 -1.5273)| loss 2.6742 || lr 0.073964-0.014793-0.014793 | Time 2021-09-14 01:37:07
22 | Epoch 009/120: (63.4980 85.4560) | 1670.5914 M (1531.5882 -2.2031) || train (60.0096 81.5326) | 1669.5071 M (1529.8299 -1.5291)| loss 2.6749 || lr 0.073964-0.014793-0.014793 | Time 2021-09-14 01:37:09
23 | Epoch 010/120: (63.5300 85.3020) | 1653.4995 M (1516.4335 -4.1403) || train (60.2224 81.6764) | 1654.0596 M (1515.6230 -2.7697)| loss 2.6661 || lr 0.073722-0.014744-0.014744 | Time 2021-09-14 01:50:02
24 | Epoch 010/120: (63.5300 85.3020) | 1653.4994 M (1516.4334 -4.1403) || train (60.1478 81.5860) | 1654.0632 M (1515.6267 -2.7698)| loss 2.6698 || lr 0.073722-0.014744-0.014744 | Time 2021-09-14 01:50:03
25 | Epoch 011/120: (63.5500 85.5300) | 1637.2281 M (1501.4057 -5.3839) || train (60.2772 81.7679) | 1638.6162 M (1501.5620 -4.1521)| loss 2.6613 || lr 0.073456-0.014691-0.014691 | Time 2021-09-14 02:02:48
26 | Epoch 011/120: (63.5520 85.5300) | 1637.2281 M (1501.4057 -5.3839) || train (60.0994 81.6151) | 1638.6208 M (1501.5648 -4.1502)| loss 2.6654 || lr 0.073456-0.014691-0.014691 | Time 2021-09-14 02:02:49
27 | Epoch 012/120: (64.3140 86.0520) | 1618.4795 M (1484.8869 -7.6136) || train (60.3128 81.7645) | 1623.1683 M (1487.5504 -5.5885)| loss 2.6570 || lr 0.073165-0.014633-0.014633 | Time 2021-09-14 02:15:34
28 | Epoch 012/120: (64.3140 86.0520) | 1618.4796 M (1484.8869 -7.6136) || train (60.2613 81.6967) | 1623.1649 M (1487.5492 -5.5906)| loss 2.6596 || lr 0.073165-0.014633-0.014633 | Time 2021-09-14 02:15:36
29 | Epoch 013/120: (63.5960 85.5300) | 1600.2170 M (1468.7355 -9.7248) || train (60.4458 81.8024) | 1607.7234 M (1473.8036 -7.2865)| loss 2.6527 || lr 0.072849-0.014570-0.014570 | Time 2021-09-14 02:28:22
30 | Epoch 013/120: (63.5960 85.5300) | 1600.2170 M (1468.7355 -9.7248) || train (60.3378 81.7940) | 1607.7180 M (1473.7978 -7.2861)| loss 2.6543 || lr 0.072849-0.014570-0.014570 | Time 2021-09-14 02:28:23
31 | Epoch 014/120: (63.1640 85.3260) | 1584.7861 M (1454.2198 -10.6399) || train (60.4172 81.9068) | 1592.2561 M (1459.7476 -8.6978)| loss 2.6478 || lr 0.072509-0.014502-0.014502 | Time 2021-09-14 02:41:09
32 | Epoch 014/120: (63.1640 85.3260) | 1584.7861 M (1454.2197 -10.6399) || train (60.4260 81.9078) | 1592.2583 M (1459.7522 -8.7002)| loss 2.6480 || lr 0.072509-0.014502-0.014502 | Time 2021-09-14 02:41:09
33 | Epoch 015/120: (64.0100 85.7040) | 1568.9682 M (1439.6401 -11.8782) || train (60.4216 81.8779) | 1576.8668 M (1445.5047 -9.8442)| loss 2.6459 || lr 0.072145-0.014429-0.014429 | Time 2021-09-14 02:53:54
34 | Epoch 015/120: (64.0100 85.7040) | 1568.9682 M (1439.6401 -11.8782) || train (60.4884 81.9310) | 1576.8646 M (1445.5071 -9.8488)| loss 2.6433 || lr 0.072145-0.014429-0.014429 | Time 2021-09-14 02:53:54
35 | Epoch 016/120: (64.0160 85.7840) | 1551.8382 M (1424.4128 -13.7808) || train (60.6234 82.0050) | 1561.4522 M (1431.7738 -11.5279)| loss 2.6382 || lr 0.071758-0.014352-0.014352 | Time 2021-09-14 03:06:42
36 | Epoch 016/120: (64.0160 85.7820) | 1551.8382 M (1424.4128 -13.7808) || train (60.5457 81.9218) | 1561.4468 M (1431.7687 -11.5282)| loss 2.6419 || lr 0.071758-0.014352-0.014352 | Time 2021-09-14 03:06:42
37 | Epoch 017/120: (64.6100 85.9960) | 1537.5855 M (1411.6719 -15.2927) || train (60.6507 81.9707) | 1546.0131 M (1417.7097 -12.9028)| loss 2.6367 || lr 0.071347-0.014269-0.014269 | Time 2021-09-14 03:19:35
38 | Epoch 017/120: (64.6140 85.9960) | 1537.5855 M (1411.6719 -15.2927) || train (60.5499 82.0189) | 1546.0036 M (1417.7001 -12.9028)| loss 2.6390 || lr 0.071347-0.014269-0.014269 | Time 2021-09-14 03:19:35
39 | Epoch 018/120: (63.9280 85.7280) | 1519.8890 M (1395.7532 -17.0704) || train (60.5707 81.9616) | 1530.4765 M (1403.9338 -14.6636)| loss 2.6376 || lr 0.070913-0.014183-0.014183 | Time 2021-09-14 03:32:25
40 | Epoch 018/120: (63.9280 85.7280) | 1519.8890 M (1395.7532 -17.0704) || train (60.5807 81.9633) | 1530.4806 M (1403.9407 -14.6663)| loss 2.6357 || lr 0.070913-0.014183-0.014183 | Time 2021-09-14 03:32:26
41 | Epoch 019/120: (63.7600 85.7320) | 1502.8557 M (1381.0404 -19.3909) || train (60.6401 82.0242) | 1515.0804 M (1390.0753 -16.2011)| loss 2.6332 || lr 0.070456-0.014091-0.014091 | Time 2021-09-14 03:45:16
42 | Epoch 019/120: (63.7600 85.7320) | 1502.8557 M (1381.0403 -19.3909) || train (60.7084 82.0981) | 1515.0719 M (1390.0653 -16.1996)| loss 2.6291 || lr 0.070456-0.014091-0.014091 | Time 2021-09-14 03:45:16
43 | Epoch 020/120: (64.2660 85.6880) | 1488.4252 M (1369.0229 -21.8040) || train (60.7185 82.0423) | 1499.6975 M (1376.7533 -18.2620)| loss 2.6318 || lr 0.069976-0.013995-0.013995 | Time 2021-09-14 03:58:06
44 | Epoch 020/120: (64.2660 85.6880) | 1488.4252 M (1369.0229 -21.8040) || train (60.7491 82.0924) | 1499.6862 M (1376.7522 -18.2722)| loss 2.6288 || lr 0.069976-0.013995-0.013995 | Time 2021-09-14 03:58:07
45 | Epoch 021/120: (64.5300 86.0580) | 1472.9067 M (1355.3798 -23.6794) || train (60.7372 82.1109) | 1484.2521 M (1364.0383 -20.9926)| loss 2.6268 || lr 0.069474-0.013895-0.013895 | Time 2021-09-14 04:10:58
46 | Epoch 021/120: (64.5300 86.0580) | 1472.9068 M (1355.3798 -23.6794) || train (60.8087 82.0355) | 1484.2427 M (1364.0344 -20.9980)| loss 2.6284 || lr 0.069474-0.013895-0.013895 | Time 2021-09-14 04:10:58
47 | Epoch 022/120: (64.2640 85.8000) | 1457.6161 M (1341.4001 -24.9903) || train (60.6436 82.1152) | 1468.8313 M (1350.5050 -22.8800)| loss 2.6261 || lr 0.068950-0.013790-0.013790 | Time 2021-09-14 04:23:48
48 | Epoch 022/120: (64.2640 85.8000) | 1457.6161 M (1341.4001 -24.9903) || train (60.8858 82.2055) | 1468.8351 M (1350.5082 -22.8793)| loss 2.6220 || lr 0.068950-0.013790-0.013790 | Time 2021-09-14 04:23:48
49 | Epoch 023/120: (64.7920 86.2200) | 1440.7428 M (1326.5000 -26.9635) || train (60.8941 82.2312) | 1453.4029 M (1336.5826 -24.3859)| loss 2.6198 || lr 0.068405-0.013681-0.013681 | Time 2021-09-14 04:36:38
50 | Epoch 023/120: (64.7920 86.2200) | 1440.7428 M (1326.5000 -26.9635) || train (60.9161 82.2203) | 1453.4011 M (1336.5785 -24.3837)| loss 2.6178 || lr 0.068405-0.013681-0.013681 | Time 2021-09-14 04:36:38
51 | Epoch 024/120: (64.4600 85.8460) | 1425.8592 M (1312.6293 -27.9763) || train (60.8376 82.1931) | 1437.9639 M (1322.5802 -25.8225)| loss 2.6191 || lr 0.067838-0.013568-0.013568 | Time 2021-09-14 04:49:26
52 | Epoch 024/120: (64.4600 85.8480) | 1425.8592 M (1312.6293 -27.9763) || train (60.8360 82.1781) | 1437.9674 M (1322.5815 -25.8204)| loss 2.6197 || lr 0.067838-0.013568-0.013568 | Time 2021-09-14 04:49:26
53 | Epoch 025/120: (64.0040 85.6920) | 1411.7065 M (1300.9981 -30.4979) || train (60.9592 82.2916) | 1422.5039 M (1309.3668 -28.0692)| loss 2.6155 || lr 0.067251-0.013450-0.013450 | Time 2021-09-14 05:02:09
54 | Epoch 025/120: (64.0040 85.6920) | 1411.7065 M (1300.9981 -30.4979) || train (60.9536 82.2022) | 1422.5179 M (1309.3733 -28.0617)| loss 2.6184 || lr 0.067251-0.013450-0.013450 | Time 2021-09-14 05:02:09
55 | Epoch 026/120: (63.4440 85.3060) | 1393.4755 M (1285.5926 -33.3233) || train (60.9815 82.2297) | 1407.0073 M (1296.2639 -30.4628)| loss 2.6154 || lr 0.066643-0.013329-0.013329 | Time 2021-09-14 05:14:51
56 | Epoch 026/120: (63.4420 85.3060) | 1393.4755 M (1285.5926 -33.3233) || train (60.8557 82.2421) | 1407.0060 M (1296.2602 -30.4604)| loss 2.6188 || lr 0.066643-0.013329-0.013329 | Time 2021-09-14 05:14:52
57 | Epoch 027/120: (63.8500 85.3560) | 1378.7826 M (1274.6339 -37.0576) || train (60.9277 82.2732) | 1391.6159 M (1283.1242 -32.7146)| loss 2.6155 || lr 0.066015-0.013203-0.013203 | Time 2021-09-14 05:27:34
58 | Epoch 027/120: (63.8500 85.3560) | 1378.7826 M (1274.6339 -37.0576) || train (60.9644 82.2771) | 1391.6236 M (1283.1294 -32.7122)| loss 2.6141 || lr 0.066015-0.013203-0.013203 | Time 2021-09-14 05:27:34
59 | Epoch 028/120: (64.1660 85.8680) | 1362.9644 M (1261.8720 -40.1139) || train (61.0195 82.3055) | 1376.1730 M (1271.3242 -36.3575)| loss 2.6113 || lr 0.065368-0.013074-0.013074 | Time 2021-09-14 05:40:17
60 | Epoch 028/120: (64.1660 85.8680) | 1362.9644 M (1261.8720 -40.1139) || train (61.0343 82.2368) | 1376.1753 M (1271.3255 -36.3564)| loss 2.6130 || lr 0.065368-0.013074-0.013074 | Time 2021-09-14 05:40:17
61 | Epoch 029/120: (64.3020 85.9900) | 1347.6479 M (1249.5243 -43.0827) || train (61.0098 82.2640) | 1360.7743 M (1258.7613 -39.1933)| loss 2.6129 || lr 0.064702-0.012940-0.012940 | Time 2021-09-14 05:53:00
62 | Epoch 029/120: (64.3020 85.9900) | 1347.6479 M (1249.5243 -43.0827) || train (60.9533 82.2819) | 1360.7538 M (1258.7455 -39.1980)| loss 2.6138 || lr 0.064702-0.012940-0.012940 | Time 2021-09-14 05:53:00
63 | Epoch 030/120: (63.9580 85.5980) | 1331.2858 M (1234.4004 -44.3209) || train (61.0168 82.2907) | 1345.2951 M (1245.4652 -41.3764)| loss 2.6114 || lr 0.064017-0.012803-0.012803 | Time 2021-09-14 06:05:41
64 | Epoch 030/120: (63.9580 85.5980) | 1331.2858 M (1234.4004 -44.3209) || train (61.0301 82.3378) | 1345.3036 M (1245.4724 -41.3751)| loss 2.6101 || lr 0.064017-0.012803-0.012803 | Time 2021-09-14 06:05:41
65 | Epoch 031/120: (64.2460 85.9920) | 1316.3634 M (1221.6042 -46.4472) || train (61.0296 82.3164) | 1329.8829 M (1231.9061 -43.2295)| loss 2.6110 || lr 0.063313-0.012663-0.012663 | Time 2021-09-14 06:18:24
66 | Epoch 031/120: (64.2460 85.9940) | 1316.3634 M (1221.6043 -46.4472) || train (61.0323 82.3497) | 1329.8739 M (1231.8992 -43.2316)| loss 2.6091 || lr 0.063313-0.012663-0.012663 | Time 2021-09-14 06:18:24
67 | Epoch 032/120: (63.9420 85.9020) | 1299.9223 M (1205.5092 -46.7932) || train (60.9883 82.2871) | 1314.4285 M (1217.4167 -44.1945)| loss 2.6097 || lr 0.062592-0.012518-0.012518 | Time 2021-09-14 06:31:08
68 | Epoch 032/120: (63.9420 85.9000) | 1299.9222 M (1205.5092 -46.7932) || train (60.9934 82.3347) | 1314.4259 M (1217.4136 -44.1940)| loss 2.6100 || lr 0.062592-0.012518-0.012518 | Time 2021-09-14 06:31:08
69 | Epoch 033/120: (64.0700 85.9340) | 1284.3283 M (1190.5084 -47.3864) || train (60.9489 82.2617) | 1299.0060 M (1202.3485 -44.5488)| loss 2.6103 || lr 0.061854-0.012371-0.012371 | Time 2021-09-14 06:43:51
70 | Epoch 033/120: (64.0720 85.9340) | 1284.3282 M (1190.5084 -47.3864) || train (61.0415 82.2652) | 1299.0028 M (1202.3457 -44.5492)| loss 2.6112 || lr 0.061854-0.012371-0.012371 | Time 2021-09-14 06:43:51
71 | Epoch 034/120: (64.6620 86.1540) | 1269.0458 M (1175.7312 -47.8917) || train (61.1067 82.2788) | 1283.5355 M (1187.6185 -45.2893)| loss 2.6091 || lr 0.061100-0.012220-0.012220 | Time 2021-09-14 06:56:34
72 | Epoch 034/120: (64.6640 86.1540) | 1269.0458 M (1175.7312 -47.8917) || train (61.0810 82.3776) | 1283.5422 M (1187.6251 -45.2891)| loss 2.6058 || lr 0.061100-0.012220-0.012220 | Time 2021-09-14 06:56:34
73 | Epoch 035/120: (64.7800 86.1740) | 1254.2483 M (1163.6325 -50.5904) || train (61.0423 82.3958) | 1268.1235 M (1174.0564 -47.1392)| loss 2.6055 || lr 0.060329-0.012066-0.012066 | Time 2021-09-14 07:09:18
74 | Epoch 035/120: (64.7780 86.1740) | 1254.2483 M (1163.6324 -50.5904) || train (60.9567 82.3054) | 1268.1194 M (1174.0572 -47.1441)| loss 2.6096 || lr 0.060329-0.012066-0.012066 | Time 2021-09-14 07:09:18
75 | Epoch 036/120: (64.3700 85.9120) | 1236.9945 M (1146.9812 -51.1930) || train (60.8851 82.2462) | 1252.6811 M (1160.1859 -48.7111)| loss 2.6130 || lr 0.059542-0.011908-0.011908 | Time 2021-09-14 07:22:01
76 | Epoch 036/120: (64.3700 85.9120) | 1236.9945 M (1146.9812 -51.1930) || train (60.9864 82.2784) | 1252.6926 M (1160.1924 -48.7061)| loss 2.6099 || lr 0.059542-0.011908-0.011908 | Time 2021-09-14 07:22:01
77 | Epoch 037/120: (63.0360 85.2660) | 1222.8488 M (1134.0577 -52.4151) || train (60.8190 82.2802) | 1237.2240 M (1145.6004 -49.5827)| loss 2.6121 || lr 0.058740-0.011748-0.011748 | Time 2021-09-14 07:34:44
78 | Epoch 037/120: (63.0360 85.2660) | 1222.8488 M (1134.0577 -52.4151) || train (61.0059 82.2869) | 1237.2252 M (1145.6015 -49.5826)| loss 2.6081 || lr 0.058740-0.011748-0.011748 | Time 2021-09-14 07:34:44
79 | Epoch 038/120: (63.7360 85.5820) | 1207.5854 M (1121.1506 -54.7715) || train (60.8189 82.2076) | 1221.8050 M (1131.8165 -51.2177)| loss 2.6135 || lr 0.057924-0.011585-0.011585 | Time 2021-09-14 07:47:26
80 | Epoch 038/120: (63.7380 85.5820) | 1207.5854 M (1121.1506 -54.7715) || train (60.8056 82.1699) | 1221.8061 M (1131.8175 -51.2177)| loss 2.6160 || lr 0.057924-0.011585-0.011585 | Time 2021-09-14 07:47:27
81 | Epoch 039/120: (64.1960 86.1700) | 1191.9771 M (1108.9764 -58.2056) || train (60.8507 82.1822) | 1206.3279 M (1119.3716 -54.2499)| loss 2.6154 || lr 0.057094-0.011419-0.011419 | Time 2021-09-14 08:00:09
82 | Epoch 039/120: (64.1960 86.1700) | 1191.9771 M (1108.9765 -58.2056) || train (60.8095 82.2301) | 1206.3480 M (1119.3841 -54.2423)| loss 2.6148 || lr 0.057094-0.011419-0.011419 | Time 2021-09-14 08:00:09
83 | Epoch 040/120: (63.4540 85.5900) | 1175.8935 M (1094.3697 -59.6825) || train (60.6956 82.1329) | 1190.9180 M (1106.1753 -56.4636)| loss 2.6185 || lr 0.056250-0.011250-0.011250 | Time 2021-09-14 08:12:54
84 | Epoch 040/120: (63.4540 85.5900) | 1175.8935 M (1094.3697 -59.6825) || train (60.6842 82.0945) | 1190.9298 M (1106.1858 -56.4623)| loss 2.6203 || lr 0.056250-0.011250-0.011250 | Time 2021-09-14 08:12:54
85 | Epoch 041/120: (64.0620 85.7700) | 1159.7442 M (1081.1748 -62.6369) || train (60.6771 82.1477) | 1175.4541 M (1092.2877 -58.0398)| loss 2.6211 || lr 0.055393-0.011079-0.011079 | Time 2021-09-14 08:25:36
86 | Epoch 041/120: (64.0620 85.7700) | 1159.7442 M (1081.1748 -62.6369) || train (60.6526 82.1716) | 1175.4407 M (1092.2794 -58.0450)| loss 2.6190 || lr 0.055393-0.011079-0.011079 | Time 2021-09-14 08:25:37
87 | Epoch 042/120: (64.2700 86.0180) | 1144.4530 M (1068.5097 -65.2630) || train (60.5818 82.1045) | 1160.0165 M (1079.8770 -61.0668)| loss 2.6220 || lr 0.054525-0.010905-0.010905 | Time 2021-09-14 08:38:21
88 | Epoch 042/120: (64.2720 86.0180) | 1144.4530 M (1068.5097 -65.2630) || train (60.6753 82.0470) | 1160.0164 M (1079.8753 -61.0652)| loss 2.6222 || lr 0.054525-0.010905-0.010905 | Time 2021-09-14 08:38:21
89 | Epoch 043/120: (63.7000 85.8760) | 1128.0170 M (1055.2227 -68.4120) || train (60.6400 82.0976) | 1144.5572 M (1067.1416 -63.7906)| loss 2.6200 || lr 0.053644-0.010729-0.010729 | Time 2021-09-14 08:51:03
90 | Epoch 043/120: (63.7020 85.8760) | 1128.0170 M (1055.2227 -68.4120) || train (60.7007 82.1174) | 1144.5657 M (1067.1489 -63.7895)| loss 2.6192 || lr 0.053644-0.010729-0.010729 | Time 2021-09-14 08:51:04
91 | Epoch 044/120: (64.2500 85.8200) | 1111.8515 M (1040.8560 -70.2108) || train (60.3680 81.9153) | 1129.1407 M (1054.0582 -66.1237)| loss 2.6292 || lr 0.052753-0.010551-0.010551 | Time 2021-09-14 09:03:46
92 | Epoch 044/120: (64.2500 85.8200) | 1111.8515 M (1040.8560 -70.2108) || train (60.4420 82.0333) | 1129.1424 M (1054.0633 -66.1272)| loss 2.6261 || lr 0.052753-0.010551-0.010551 | Time 2021-09-14 09:03:46
93 | Epoch 045/120: (64.1000 86.0360) | 1098.8399 M (1028.9007 -71.2671) || train (60.4019 81.9301) | 1113.6886 M (1039.6613 -67.1790)| loss 2.6303 || lr 0.051851-0.010370-0.010370 | Time 2021-09-14 09:16:29
94 | Epoch 045/120: (64.1000 86.0360) | 1098.8399 M (1028.9008 -71.2671) || train (60.4347 81.9374) | 1113.6787 M (1039.6500 -67.1776)| loss 2.6306 || lr 0.051851-0.010370-0.010370 | Time 2021-09-14 09:16:29
95 | Epoch 046/120: (63.6560 85.7460) | 1082.1105 M (1014.8681 -73.9639) || train (60.3162 81.8853) | 1098.2831 M (1026.5205 -69.4436)| loss 2.6321 || lr 0.050939-0.010188-0.010188 | Time 2021-09-14 09:29:11
96 | Epoch 046/120: (63.6560 85.7460) | 1082.1105 M (1014.8681 -73.9639) || train (60.3356 81.9168) | 1098.2870 M (1026.5219 -69.4412)| loss 2.6329 || lr 0.050939-0.010188-0.010188 | Time 2021-09-14 09:29:11
97 | Epoch 047/120: (64.4300 85.8960) | 1065.9092 M (1002.3091 -77.6062) || train (60.3557 81.8556) | 1082.8301 M (1014.2452 -72.6214)| loss 2.6323 || lr 0.050018-0.010004-0.010004 | Time 2021-09-14 09:41:55
98 | Epoch 047/120: (64.4300 85.8960) | 1065.9092 M (1002.3091 -77.6062) || train (60.3715 81.9035) | 1082.8144 M (1014.2339 -72.6258)| loss 2.6309 || lr 0.050018-0.010004-0.010004 | Time 2021-09-14 09:41:56
99 | Epoch 048/120: (63.6760 85.5640) | 1051.0384 M (990.2942 -80.4620) || train (60.1887 81.7840) | 1067.4177 M (1001.6857 -75.4742)| loss 2.6361 || lr 0.049088-0.009818-0.009818 | Time 2021-09-14 09:54:38
100 | Epoch 048/120: (63.6760 85.5640) | 1051.0384 M (990.2942 -80.4620) || train (60.1064 81.7541) | 1067.4266 M (1001.6896 -75.4692)| loss 2.6392 || lr 0.049088-0.009818-0.009818 | Time 2021-09-14 09:54:38
101 | Epoch 049/120: (63.7480 85.5480) | 1036.3684 M (978.6192 -83.4571) || train (60.0778 81.6884) | 1052.0174 M (989.0804 -78.2692)| loss 2.6432 || lr 0.048151-0.009630-0.009630 | Time 2021-09-14 10:07:19
102 | Epoch 049/120: (63.7480 85.5480) | 1036.3684 M (978.6192 -83.4571) || train (60.0474 81.6934) | 1052.0081 M (989.0725 -78.2707)| loss 2.6421 || lr 0.048151-0.009630-0.009630 | Time 2021-09-14 10:07:20
103 | Epoch 050/120: (63.6440 85.5180) | 1020.1386 M (966.4076 -87.4753) || train (59.9281 81.6339) | 1036.5805 M (977.4216 -82.0474)| loss 2.6471 || lr 0.047206-0.009441-0.009441 | Time 2021-09-14 10:20:02
104 | Epoch 050/120: (63.6440 85.5180) | 1020.1386 M (966.4076 -87.4753) || train (60.0931 81.7022) | 1036.5803 M (977.4262 -82.0521)| loss 2.6415 || lr 0.047206-0.009441-0.009441 | Time 2021-09-14 10:20:02
105 | Epoch 051/120: (63.5120 85.3240) | 1004.9693 M (953.2144 -89.4514) || train (59.9987 81.6418) | 1021.1263 M (964.7712 -84.8512)| loss 2.6438 || lr 0.046254-0.009251-0.009251 | Time 2021-09-14 10:32:46
106 | Epoch 051/120: (63.5120 85.3240) | 1004.9693 M (953.2144 -89.4514) || train (60.0707 81.6869) | 1021.1491 M (964.7843 -84.8415)| loss 2.6423 || lr 0.046254-0.009251-0.009251 | Time 2021-09-14 10:32:46
107 | Epoch 052/120: (63.5940 85.3440) | 990.2906 M (941.7749 -92.6905) || train (59.8549 81.6124) | 1005.7305 M (951.8854 -87.3612)| loss 2.6507 || lr 0.045297-0.009059-0.009059 | Time 2021-09-14 10:45:29
108 | Epoch 052/120: (63.5940 85.3440) | 990.2906 M (941.7748 -92.6905) || train (59.9131 81.6408) | 1005.7109 M (951.8670 -87.3624)| loss 2.6461 || lr 0.045297-0.009059-0.009059 | Time 2021-09-14 10:45:30
109 | Epoch 053/120: (63.1820 85.4560) | 974.8494 M (930.5346 -96.8914) || train (59.7547 81.4977) | 990.3739 M (940.5345 -91.3669)| loss 2.6544 || lr 0.044334-0.008867-0.008867 | Time 2021-09-14 10:58:12
110 | Epoch 053/120: (63.1800 85.4560) | 974.8494 M (930.5346 -96.8914) || train (59.7844 81.4755) | 990.3542 M (940.5217 -91.3738)| loss 2.6532 || lr 0.044334-0.008867-0.008867 | Time 2021-09-14 10:58:12
111 | Epoch 054/120: (63.0960 85.4740) | 960.4508 M (920.0136 -100.7691) || train (59.4250 81.2791) | 974.9529 M (929.0239 -95.2772)| loss 2.6641 || lr 0.043366-0.008673-0.008673 | Time 2021-09-14 11:10:55
112 | Epoch 054/120: (63.0960 85.4740) | 960.4508 M (920.0136 -100.7691) || train (59.6197 81.3333) | 974.9628 M (929.0272 -95.2706)| loss 2.6616 || lr 0.043366-0.008673-0.008673 | Time 2021-09-14 11:10:56
113 | Epoch 055/120: (62.6920 85.0200) | 945.7568 M (909.6674 -105.1168) || train (59.4211 81.2465) | 959.5380 M (917.8409 -99.5091)| loss 2.6671 || lr 0.042395-0.008479-0.008479 | Time 2021-09-14 11:23:38
114 | Epoch 055/120: (62.6920 85.0200) | 945.7568 M (909.6674 -105.1168) || train (59.4840 81.3562) | 959.5230 M (917.8306 -99.5138)| loss 2.6648 || lr 0.042395-0.008479-0.008479 | Time 2021-09-14 11:23:39
115 | Epoch 056/120: (63.0700 85.2000) | 930.2815 M (899.5567 -110.4815) || train (59.2817 81.1598) | 944.1448 M (907.9880 -105.0494)| loss 2.6734 || lr 0.041420-0.008284-0.008284 | Time 2021-09-14 11:36:23
116 | Epoch 056/120: (63.0720 85.1980) | 930.2815 M (899.5567 -110.4815) || train (59.3583 81.1925) | 944.1357 M (907.9807 -105.0512)| loss 2.6721 || lr 0.041420-0.008284-0.008284 | Time 2021-09-14 11:36:23
117 | Epoch 057/120: (62.9820 85.1600) | 915.4490 M (889.8371 -115.5944) || train (59.1332 81.0370) | 928.7586 M (897.6750 -110.1226)| loss 2.6793 || lr 0.040442-0.008088-0.008088 | Time 2021-09-14 11:49:07
118 | Epoch 057/120: (62.9820 85.1600) | 915.4490 M (889.8371 -115.5944) || train (59.1849 81.1143) | 928.7468 M (897.6714 -110.1309)| loss 2.6757 || lr 0.040442-0.008088-0.008088 | Time 2021-09-14 11:49:07
119 | Epoch 058/120: (62.8000 85.0620) | 899.5652 M (877.6507 -119.2918) || train (59.1017 80.9572) | 913.3473 M (887.2175 -115.0764)| loss 2.6831 || lr 0.039463-0.007893-0.007893 | Time 2021-09-14 12:01:52
120 | Epoch 058/120: (62.8020 85.0620) | 899.5652 M (877.6507 -119.2917) || train (59.0077 80.9725) | 913.3557 M (887.2222 -115.0728)| loss 2.6845 || lr 0.039463-0.007893-0.007893 | Time 2021-09-14 12:01:52
121 | Epoch 059/120: (62.7700 84.9460) | 884.8068 M (866.6612 -123.0606) || train (58.7917 80.7471) | 897.9792 M (874.8622 -118.0892)| loss 2.6943 || lr 0.038482-0.007696-0.007696 | Time 2021-09-14 12:14:36
122 | Epoch 059/120: (62.7700 84.9460) | 884.8068 M (866.6612 -123.0607) || train (58.8477 80.8531) | 897.9803 M (874.8638 -118.0898)| loss 2.6907 || lr 0.038482-0.007696-0.007696 | Time 2021-09-14 12:14:36
123 | Epoch 060/120: (63.1080 85.0760) | 884.0654 M (863.1094 -120.2503) || train (59.0318 80.9318) | 890.1715 M (867.4694 -118.5042)| loss 2.6835 || lr 0.037500-0.007500-0.007500 | Time 2021-09-14 12:27:18
124 | Epoch 060/120: (63.1080 85.0740) | 884.0654 M (863.1094 -120.2503) || train (59.0664 80.9708) | 890.1690 M (867.4666 -118.5038)| loss 2.6822 || lr 0.037500-0.007500-0.007500 | Time 2021-09-14 12:27:19
125 | Epoch 061/120: (63.6160 85.5620) | 884.1763 M (861.9175 -118.9474) || train (59.6172 81.3345) | 890.0404 M (865.4110 -116.5768)| loss 2.6587 || lr 0.036518-0.007304-0.007304 | Time 2021-09-14 12:40:03
126 | Epoch 061/120: (63.6160 85.5620) | 884.1764 M (861.9175 -118.9474) || train (59.6153 81.3943) | 890.0414 M (865.4106 -116.5754)| loss 2.6569 || lr 0.036518-0.007304-0.007304 | Time 2021-09-14 12:40:03
127 | Epoch 062/120: (64.0400 85.5820) | 883.3901 M (859.5483 -117.3645) || train (60.0035 81.6124) | 889.9866 M (863.9024 -115.1220)| loss 2.6414 || lr 0.035537-0.007107-0.007107 | Time 2021-09-14 12:52:47
128 | Epoch 062/120: (64.0420 85.5820) | 883.3901 M (859.5483 -117.3645) || train (60.0252 81.5999) | 889.9776 M (863.8955 -115.1242)| loss 2.6419 || lr 0.035537-0.007107-0.007107 | Time 2021-09-14 12:52:47
129 | Epoch 063/120: (64.1520 85.7520) | 883.6790 M (858.0154 -115.5426) || train (60.4172 81.7827) | 889.9097 M (861.9717 -113.2682)| loss 2.6284 || lr 0.034558-0.006912-0.006912 | Time 2021-09-14 13:05:30
130 | Epoch 063/120: (64.1520 85.7520) | 883.6791 M (858.0154 -115.5426) || train (60.3237 81.8712) | 889.9074 M (861.9699 -113.2687)| loss 2.6275 || lr 0.034558-0.006912-0.006912 | Time 2021-09-14 13:05:31
131 | Epoch 064/120: (64.0080 85.7940) | 882.7555 M (856.8909 -115.3417) || train (60.7759 82.0746) | 889.8446 M (860.8351 -112.1968)| loss 2.6106 || lr 0.033580-0.006716-0.006716 | Time 2021-09-14 13:18:13
132 | Epoch 064/120: (64.0080 85.7940) | 882.7555 M (856.8909 -115.3417) || train (60.6651 82.0192) | 889.8627 M (860.8451 -112.1887)| loss 2.6145 || lr 0.033580-0.006716-0.006716 | Time 2021-09-14 13:18:15
133 | Epoch 065/120: (64.6180 86.1520) | 883.8825 M (856.1998 -113.5236) || train (61.0538 82.3269) | 889.8134 M (860.0167 -111.4095)| loss 2.5983 || lr 0.032605-0.006521-0.006521 | Time 2021-09-14 13:30:56
134 | Epoch 065/120: (64.6180 86.1520) | 883.8825 M (856.1998 -113.5236) || train (60.9495 82.3299) | 889.8088 M (860.0135 -111.4109)| loss 2.5986 || lr 0.032605-0.006521-0.006521 | Time 2021-09-14 13:30:57
135 | Epoch 066/120: (64.9120 86.4700) | 884.2962 M (856.2916 -113.2016) || train (61.3810 82.5066) | 889.7743 M (859.1695 -110.6015)| loss 2.5873 || lr 0.031634-0.006327-0.006327 | Time 2021-09-14 13:43:39
136 | Epoch 066/120: (64.9120 86.4700) | 884.2962 M (856.2916 -113.2016) || train (61.2477 82.4410) | 889.7730 M (859.1653 -110.5985)| loss 2.5894 || lr 0.031634-0.006327-0.006327 | Time 2021-09-14 13:43:39
137 | Epoch 067/120: (65.1560 86.6340) | 883.0972 M (855.5276 -113.6367) || train (61.6356 82.7073) | 889.7415 M (859.2483 -110.7131)| loss 2.5752 || lr 0.030666-0.006133-0.006133 | Time 2021-09-14 13:56:21
138 | Epoch 067/120: (65.1560 86.6340) | 883.0972 M (855.5276 -113.6367) || train (61.5560 82.6796) | 889.7445 M (859.2477 -110.7094)| loss 2.5765 || lr 0.030666-0.006133-0.006133 | Time 2021-09-14 13:56:21
139 | Epoch 068/120: (65.2080 86.5900) | 883.1238 M (855.4188 -113.5013) || train (61.8790 82.8797) | 889.7221 M (859.0784 -110.5626)| loss 2.5650 || lr 0.029703-0.005941-0.005941 | Time 2021-09-14 14:09:03
140 | Epoch 068/120: (65.2080 86.5900) | 883.1238 M (855.4188 -113.5013) || train (61.8412 82.8396) | 889.7300 M (859.0858 -110.5620)| loss 2.5659 || lr 0.029703-0.005941-0.005941 | Time 2021-09-14 14:09:03
141 | Epoch 069/120: (65.2800 86.6920) | 882.8764 M (855.1785 -113.5084) || train (61.9944 83.0008) | 889.7307 M (858.9399 -110.4154)| loss 2.5587 || lr 0.028746-0.005749-0.005749 | Time 2021-09-14 14:21:46
142 | Epoch 069/120: (65.2820 86.6920) | 882.8764 M (855.1785 -113.5084) || train (62.0078 82.9732) | 889.7355 M (858.9454 -110.4162)| loss 2.5582 || lr 0.028746-0.005749-0.005749 | Time 2021-09-14 14:21:46
143 | Epoch 070/120: (65.1840 86.5460) | 883.6188 M (855.4719 -113.0594) || train (62.2493 83.1363) | 889.6846 M (858.8949 -110.4164)| loss 2.5485 || lr 0.027794-0.005559-0.005559 | Time 2021-09-14 14:34:30
144 | Epoch 070/120: (65.1840 86.5480) | 883.6187 M (855.4719 -113.0594) || train (62.2368 83.1652) | 889.6918 M (858.9022 -110.4166)| loss 2.5489 || lr 0.027794-0.005559-0.005559 | Time 2021-09-14 14:34:30
145 | Epoch 071/120: (65.8180 86.8940) | 882.5033 M (854.7722 -113.4751) || train (62.3124 83.1997) | 889.7008 M (858.7349 -110.2404)| loss 2.5439 || lr 0.026849-0.005370-0.005370 | Time 2021-09-14 14:47:13
146 | Epoch 071/120: (65.8180 86.8940) | 882.5033 M (854.7722 -113.4751) || train (62.3723 83.2606) | 889.6972 M (858.7330 -110.2420)| loss 2.5412 || lr 0.026849-0.005370-0.005370 | Time 2021-09-14 14:47:13
147 | Epoch 072/120: (65.9120 86.9420) | 882.8291 M (855.1478 -113.5250) || train (62.5907 83.3685) | 889.6903 M (858.6751 -110.1910)| loss 2.5348 || lr 0.025912-0.005182-0.005182 | Time 2021-09-14 14:59:56
148 | Epoch 072/120: (65.9100 86.9440) | 882.8291 M (855.1478 -113.5250) || train (62.6644 83.4030) | 889.6935 M (858.6786 -110.1914)| loss 2.5325 || lr 0.025912-0.005182-0.005182 | Time 2021-09-14 14:59:56
149 | Epoch 073/120: (65.7320 86.7760) | 882.7198 M (854.7874 -113.2739) || train (62.8558 83.5692) | 889.7016 M (858.6840 -110.1886)| loss 2.5229 || lr 0.024982-0.004996-0.004996 | Time 2021-09-14 15:12:39
150 | Epoch 073/120: (65.7320 86.7740) | 882.7198 M (854.7874 -113.2739) || train (62.8108 83.4620) | 889.7012 M (858.6837 -110.1888)| loss 2.5278 || lr 0.024982-0.004996-0.004996 | Time 2021-09-14 15:12:39
151 | Epoch 074/120: (66.0860 87.1860) | 882.3902 M (854.5286 -113.3446) || train (63.0487 83.6379) | 889.6936 M (858.5773 -110.0899)| loss 2.5163 || lr 0.024061-0.004812-0.004812 | Time 2021-09-14 15:25:22
152 | Epoch 074/120: (66.0860 87.1860) | 882.3902 M (854.5286 -113.3446) || train (63.0152 83.6185) | 889.6878 M (858.5752 -110.0936)| loss 2.5167 || lr 0.024061-0.004812-0.004812 | Time 2021-09-14 15:25:22
153 | Epoch 075/120: (66.3480 87.2140) | 881.9441 M (854.3247 -113.5869) || train (63.2131 83.7397) | 889.6737 M (858.4954 -110.0279)| loss 2.5109 || lr 0.023149-0.004630-0.004630 | Time 2021-09-14 15:38:04
154 | Epoch 075/120: (66.3500 87.2160) | 881.9441 M (854.3247 -113.5869) || train (63.1936 83.7092) | 889.6718 M (858.4943 -110.0287)| loss 2.5113 || lr 0.023149-0.004630-0.004630 | Time 2021-09-14 15:38:05
155 | Epoch 076/120: (66.2340 86.9960) | 882.1132 M (854.3619 -113.4550) || train (63.4081 83.8844) | 889.6785 M (858.4776 -110.0053)| loss 2.5005 || lr 0.022247-0.004449-0.004449 | Time 2021-09-14 15:50:47
156 | Epoch 076/120: (66.2380 86.9960) | 882.1132 M (854.3619 -113.4550) || train (63.4162 83.8916) | 889.6541 M (858.4606 -110.0127)| loss 2.5010 || lr 0.022247-0.004449-0.004449 | Time 2021-09-14 15:50:47
157 | Epoch 077/120: (66.6260 87.3880) | 882.7125 M (854.5400 -113.0338) || train (63.6432 84.0500) | 889.6602 M (858.3469 -109.8930)| loss 2.4923 || lr 0.021356-0.004271-0.004271 | Time 2021-09-14 16:03:31
158 | Epoch 077/120: (66.6280 87.3900) | 882.7125 M (854.5400 -113.0338) || train (63.5450 84.0104) | 889.6702 M (858.3478 -109.8838)| loss 2.4951 || lr 0.021356-0.004271-0.004271 | Time 2021-09-14 16:03:31
159 | Epoch 078/120: (66.4760 87.3100) | 884.1473 M (855.0940 -112.1529) || train (63.7347 84.1268) | 889.6591 M (858.2772 -109.8244)| loss 2.4876 || lr 0.020475-0.004095-0.004095 | Time 2021-09-14 16:16:15
160 | Epoch 078/120: (66.4760 87.3080) | 884.1473 M (855.0940 -112.1529) || train (63.8279 84.1718) | 889.6509 M (858.2744 -109.8297)| loss 2.4849 || lr 0.020475-0.004095-0.004095 | Time 2021-09-14 16:16:15
161 | Epoch 079/120: (66.5680 87.3680) | 882.5151 M (853.9981 -112.6893) || train (64.0010 84.2973) | 889.6579 M (858.0725 -109.6208)| loss 2.4773 || lr 0.019607-0.003921-0.003921 | Time 2021-09-14 16:28:57
162 | Epoch 079/120: (66.5680 87.3680) | 882.5151 M (853.9981 -112.6893) || train (63.9654 84.2631) | 889.6602 M (858.0748 -109.6208)| loss 2.4795 || lr 0.019607-0.003921-0.003921 | Time 2021-09-14 16:28:57
163 | Epoch 080/120: (66.7580 87.3460) | 882.6259 M (854.1394 -112.7197) || train (64.1229 84.3179) | 889.6636 M (858.0611 -109.6037)| loss 2.4732 || lr 0.018750-0.003750-0.003750 | Time 2021-09-14 16:41:40
164 | Epoch 080/120: (66.7580 87.3460) | 882.6259 M (854.1394 -112.7197) || train (64.1797 84.4376) | 889.6654 M (858.0623 -109.6031)| loss 2.4676 || lr 0.018750-0.003750-0.003750 | Time 2021-09-14 16:41:40
165 | Epoch 081/120: (66.8280 87.6200) | 883.6605 M (854.6769 -112.2226) || train (64.3176 84.5422) | 889.6293 M (857.9812 -109.5582)| loss 2.4619 || lr 0.017906-0.003581-0.003581 | Time 2021-09-14 16:54:18
166 | Epoch 081/120: (66.8280 87.6200) | 883.6605 M (854.6769 -112.2226) || train (64.2792 84.4735) | 889.6371 M (857.9818 -109.5510)| loss 2.4652 || lr 0.017906-0.003581-0.003581 | Time 2021-09-14 16:54:19
167 | Epoch 082/120: (66.9540 87.7100) | 882.3178 M (853.9777 -112.8662) || train (64.5047 84.6187) | 889.6430 M (857.8909 -109.4542)| loss 2.4570 || lr 0.017076-0.003415-0.003415 | Time 2021-09-14 17:07:02
168 | Epoch 082/120: (66.9560 87.7100) | 882.3177 M (853.9777 -112.8662) || train (64.4743 84.5923) | 889.6404 M (857.8911 -109.4569)| loss 2.4572 || lr 0.017076-0.003415-0.003415 | Time 2021-09-14 17:07:03
169 | Epoch 083/120: (67.2500 87.7840) | 882.9222 M (854.2143 -112.4983) || train (64.6390 84.7257) | 889.6448 M (857.8969 -109.4583)| loss 2.4508 || lr 0.016260-0.003252-0.003252 | Time 2021-09-14 17:19:48
170 | Epoch 083/120: (67.2500 87.7820) | 882.9222 M (854.2143 -112.4983) || train (64.7617 84.7444) | 889.6448 M (857.8989 -109.4603)| loss 2.4478 || lr 0.016260-0.003252-0.003252 | Time 2021-09-14 17:19:50
171 | Epoch 084/120: (67.0660 87.7000) | 882.2667 M (853.8044 -112.7439) || train (64.8727 84.9250) | 889.6509 M (857.8456 -109.4010)| loss 2.4420 || lr 0.015458-0.003092-0.003092 | Time 2021-09-14 17:32:36
172 | Epoch 084/120: (67.0660 87.7000) | 882.2667 M (853.8044 -112.7439) || train (64.8569 84.8496) | 889.6551 M (857.8500 -109.4012)| loss 2.4435 || lr 0.015458-0.003092-0.003092 | Time 2021-09-14 17:32:36
173 | Epoch 085/120: (67.6780 87.9780) | 882.8940 M (854.1226 -112.4349) || train (64.9859 84.9910) | 889.6295 M (857.8497 -109.4265)| loss 2.4349 || lr 0.014671-0.002934-0.002934 | Time 2021-09-14 17:45:20
174 | Epoch 085/120: (67.6780 87.9780) | 882.8940 M (854.1226 -112.4349) || train (65.0244 84.9444) | 889.6437 M (857.8587 -109.4212)| loss 2.4347 || lr 0.014671-0.002934-0.002934 | Time 2021-09-14 17:45:20
175 | Epoch 086/120: (67.4720 87.8980) | 882.5458 M (853.8803 -112.5407) || train (65.1602 85.0232) | 889.6397 M (857.8518 -109.4184)| loss 2.4301 || lr 0.013900-0.002780-0.002780 | Time 2021-09-14 17:58:05
176 | Epoch 086/120: (67.4720 87.8980) | 882.5458 M (853.8802 -112.5407) || train (65.3841 85.1579) | 889.6397 M (857.8524 -109.4189)| loss 2.4227 || lr 0.013900-0.002780-0.002780 | Time 2021-09-14 17:58:05
177 | Epoch 087/120: (67.6360 88.0040) | 882.7728 M (854.0635 -112.4969) || train (65.5052 85.2432) | 889.6216 M (857.8917 -109.4764)| loss 2.4171 || lr 0.013146-0.002629-0.002629 | Time 2021-09-14 18:10:50
178 | Epoch 087/120: (67.6360 88.0040) | 882.7728 M (854.0635 -112.4969) || train (65.4692 85.2329) | 889.6468 M (857.9040 -109.4634)| loss 2.4184 || lr 0.013146-0.002629-0.002629 | Time 2021-09-14 18:10:52
179 | Epoch 088/120: (67.8720 88.1000) | 883.1359 M (854.3243 -112.3947) || train (65.6362 85.3173) | 889.6362 M (857.8836 -109.4537)| loss 2.4110 || lr 0.012408-0.002482-0.002482 | Time 2021-09-14 18:23:37
180 | Epoch 088/120: (67.8720 88.1000) | 883.1360 M (854.3243 -112.3946) || train (65.6462 85.3342) | 889.6240 M (857.8786 -109.4608)| loss 2.4098 || lr 0.012408-0.002482-0.002482 | Time 2021-09-14 18:23:38
181 | Epoch 089/120: (68.0220 88.1280) | 882.9971 M (854.1181 -112.3273) || train (65.8182 85.4244) | 889.6219 M (857.8376 -109.4219)| loss 2.4055 || lr 0.011687-0.002337-0.002337 | Time 2021-09-14 18:36:24
182 | Epoch 089/120: (68.0220 88.1300) | 882.9971 M (854.1181 -112.3273) || train (65.8011 85.4394) | 889.6222 M (857.8379 -109.4219)| loss 2.4039 || lr 0.011687-0.002337-0.002337 | Time 2021-09-14 18:36:25
183 | Epoch 090/120: (68.1280 88.1500) | 882.6422 M (853.9916 -112.5557) || train (65.9686 85.5691) | 889.6408 M (857.7934 -109.3589)| loss 2.3980 || lr 0.010983-0.002197-0.002197 | Time 2021-09-14 18:49:10
184 | Epoch 090/120: (68.1280 88.1500) | 882.6422 M (853.9916 -112.5557) || train (66.0641 85.5577) | 889.6405 M (857.7986 -109.3644)| loss 2.3964 || lr 0.010983-0.002197-0.002197 | Time 2021-09-14 18:49:10
185 | Epoch 091/120: (68.5280 88.4680) | 882.5701 M (853.9439 -112.5800) || train (66.1329 85.5985) | 889.6309 M (857.8141 -109.3895)| loss 2.3922 || lr 0.010298-0.002060-0.002060 | Time 2021-09-14 19:01:55
186 | Epoch 091/120: (68.5280 88.4680) | 882.5701 M (853.9439 -112.5800) || train (66.2408 85.6314) | 889.6201 M (857.8074 -109.3935)| loss 2.3916 || lr 0.010298-0.002060-0.002060 | Time 2021-09-14 19:01:56
187 | Epoch 092/120: (68.4100 88.4000) | 883.2140 M (854.2457 -112.2380) || train (66.2747 85.7343) | 889.6413 M (857.8401 -109.4050)| loss 2.3856 || lr 0.009632-0.001926-0.001926 | Time 2021-09-14 19:14:42
188 | Epoch 092/120: (68.4100 88.4000) | 883.2140 M (854.2457 -112.2380) || train (66.3551 85.7511) | 889.6289 M (857.8328 -109.4101)| loss 2.3842 || lr 0.009632-0.001926-0.001926 | Time 2021-09-14 19:14:43
189 | Epoch 093/120: (68.6320 88.4860) | 883.3223 M (854.4367 -112.3207) || train (66.5719 85.8693) | 889.6300 M (857.8784 -109.4546)| loss 2.3761 || lr 0.008985-0.001797-0.001797 | Time 2021-09-14 19:27:28
190 | Epoch 093/120: (68.6320 88.4860) | 883.3223 M (854.4367 -112.3207) || train (66.4639 85.8816) | 889.6213 M (857.8697 -109.4547)| loss 2.3770 || lr 0.008985-0.001797-0.001797 | Time 2021-09-14 19:27:28
191 | Epoch 094/120: (68.7320 88.6000) | 883.1126 M (854.2587 -112.3523) || train (66.6634 86.0082) | 889.6361 M (857.8698 -109.4400)| loss 2.3695 || lr 0.008357-0.001671-0.001671 | Time 2021-09-14 19:40:11
192 | Epoch 094/120: (68.7320 88.6000) | 883.1126 M (854.2587 -112.3523) || train (66.7119 85.9984) | 889.6348 M (857.8728 -109.4443)| loss 2.3702 || lr 0.008357-0.001671-0.001671 | Time 2021-09-14 19:40:14
193 | Epoch 095/120: (68.5940 88.6080) | 882.3421 M (853.9648 -112.8289) || train (66.8368 86.0782) | 889.6305 M (857.8647 -109.4405)| loss 2.3638 || lr 0.007749-0.001550-0.001550 | Time 2021-09-14 19:52:58
194 | Epoch 095/120: (68.5940 88.6080) | 882.3421 M (853.9648 -112.8289) || train (66.8819 86.1323) | 889.6251 M (857.8652 -109.4464)| loss 2.3624 || lr 0.007749-0.001550-0.001550 | Time 2021-09-14 19:52:59
195 | Epoch 096/120: (68.9140 88.6740) | 883.0249 M (854.2101 -112.3915) || train (67.0270 86.1767) | 889.6343 M (857.9046 -109.4765)| loss 2.3595 || lr 0.007162-0.001432-0.001432 | Time 2021-09-14 20:05:44
196 | Epoch 096/120: (68.9120 88.6740) | 883.0249 M (854.2101 -112.3915) || train (67.0674 86.2098) | 889.6449 M (857.9067 -109.4680)| loss 2.3560 || lr 0.007162-0.001432-0.001432 | Time 2021-09-14 20:05:44
197 | Epoch 097/120: (68.8800 88.7060) | 883.3359 M (854.4067 -112.2771) || train (67.2371 86.2719) | 889.6187 M (857.8755 -109.4631)| loss 2.3501 || lr 0.006595-0.001319-0.001319 | Time 2021-09-14 20:18:29
198 | Epoch 097/120: (68.8800 88.7060) | 883.3359 M (854.4067 -112.2771) || train (67.2277 86.2886) | 889.6133 M (857.8748 -109.4677)| loss 2.3497 || lr 0.006595-0.001319-0.001319 | Time 2021-09-14 20:18:30
199 | Epoch 098/120: (69.1560 88.7180) | 883.0049 M (854.2425 -112.4439) || train (67.3342 86.3557) | 889.6224 M (857.8841 -109.4679)| loss 2.3452 || lr 0.006050-0.001210-0.001210 | Time 2021-09-14 20:31:15
200 | Epoch 098/120: (69.1580 88.7180) | 883.0049 M (854.2425 -112.4439) || train (67.3573 86.3793) | 889.6028 M (857.8725 -109.4759)| loss 2.3432 || lr 0.006050-0.001210-0.001210 | Time 2021-09-14 20:31:16
201 | Epoch 099/120: (69.1400 88.8060) | 882.7206 M (854.1090 -112.5946) || train (67.5212 86.4611) | 889.6443 M (857.9054 -109.4674)| loss 2.3380 || lr 0.005526-0.001105-0.001105 | Time 2021-09-14 20:44:01
202 | Epoch 099/120: (69.1400 88.8060) | 882.7206 M (854.1090 -112.5946) || train (67.5246 86.4801) | 889.6557 M (857.9101 -109.4606)| loss 2.3374 || lr 0.005526-0.001105-0.001105 | Time 2021-09-14 20:44:03
203 | Epoch 100/120: (69.1840 88.7300) | 882.2395 M (853.9029 -112.8697) || train (67.6551 86.5134) | 889.6021 M (857.9100 -109.5141)| loss 2.3334 || lr 0.005024-0.001005-0.001005 | Time 2021-09-14 20:56:46
204 | Epoch 100/120: (69.1820 88.7300) | 882.2396 M (853.9030 -112.8697) || train (67.7287 86.5373) | 889.6100 M (857.9140 -109.5103)| loss 2.3325 || lr 0.005024-0.001005-0.001005 | Time 2021-09-14 20:56:47
205 | Epoch 101/120: (69.4060 88.8940) | 882.7131 M (854.1002 -112.5934) || train (67.8618 86.6366) | 889.6124 M (857.9202 -109.5141)| loss 2.3264 || lr 0.004544-0.000909-0.000909 | Time 2021-09-14 21:09:33
206 | Epoch 101/120: (69.4060 88.8940) | 882.7131 M (854.1002 -112.5933) || train (67.8724 86.6764) | 889.6037 M (857.9171 -109.5197)| loss 2.3254 || lr 0.004544-0.000909-0.000909 | Time 2021-09-14 21:09:33
207 | Epoch 102/120: (69.4180 89.0460) | 882.7648 M (854.0772 -112.5187) || train (67.8862 86.7177) | 889.6340 M (857.9203 -109.4925)| loss 2.3230 || lr 0.004087-0.000817-0.000817 | Time 2021-09-14 21:22:20
208 | Epoch 102/120: (69.4180 89.0480) | 882.7648 M (854.0772 -112.5187) || train (68.0757 86.7811) | 889.6322 M (857.9186 -109.4926)| loss 2.3175 || lr 0.004087-0.000817-0.000817 | Time 2021-09-14 21:22:20
209 | Epoch 103/120: (69.5060 88.9260) | 882.0666 M (853.7832 -112.9229) || train (68.0857 86.8423) | 889.6168 M (857.8712 -109.4606)| loss 2.3160 || lr 0.003653-0.000731-0.000731 | Time 2021-09-14 21:35:07
210 | Epoch 103/120: (69.5060 88.9260) | 882.0666 M (853.7832 -112.9229) || train (68.0724 86.7472) | 889.6350 M (857.8849 -109.4561)| loss 2.3199 || lr 0.003653-0.000731-0.000731 | Time 2021-09-14 21:35:08
211 | Epoch 104/120: (69.6140 89.0380) | 882.7687 M (854.0998 -112.5373) || train (68.2977 86.9421) | 889.6259 M (857.8878 -109.4682)| loss 2.3079 || lr 0.003242-0.000648-0.000648 | Time 2021-09-14 21:47:53
212 | Epoch 104/120: (69.6140 89.0380) | 882.7687 M (854.0998 -112.5373) || train (68.2744 86.8635) | 889.5994 M (857.8703 -109.4772)| loss 2.3122 || lr 0.003242-0.000648-0.000648 | Time 2021-09-14 21:47:54
213 | Epoch 105/120: (69.7080 89.1060) | 882.7867 M (854.1748 -112.5944) || train (68.5140 87.0668) | 889.6130 M (857.8915 -109.4847)| loss 2.3006 || lr 0.002855-0.000571-0.000571 | Time 2021-09-14 22:00:40
214 | Epoch 105/120: (69.7080 89.1060) | 882.7867 M (854.1748 -112.5944) || train (68.2989 86.9653) | 889.6369 M (857.9039 -109.4732)| loss 2.3078 || lr 0.002855-0.000571-0.000571 | Time 2021-09-14 22:00:40
215 | Epoch 106/120: (69.6300 89.1680) | 882.7552 M (854.1050 -112.5560) || train (68.5161 87.0343) | 889.6239 M (857.8836 -109.4659)| loss 2.3006 || lr 0.002491-0.000498-0.000498 | Time 2021-09-14 22:13:22
216 | Epoch 106/120: (69.6280 89.1680) | 882.7553 M (854.1050 -112.5560) || train (68.5286 87.0272) | 889.6201 M (857.8844 -109.4705)| loss 2.3009 || lr 0.002491-0.000498-0.000498 | Time 2021-09-14 22:13:22
217 | Epoch 107/120: (69.8820 89.2120) | 882.5606 M (853.9543 -112.6000) || train (68.5774 87.0751) | 889.6246 M (857.8970 -109.4787)| loss 2.2968 || lr 0.002151-0.000430-0.000430 | Time 2021-09-14 22:26:05
218 | Epoch 107/120: (69.8820 89.2120) | 882.5606 M (853.9543 -112.6000) || train (68.6110 87.0993) | 889.6093 M (857.8840 -109.4810)| loss 2.2958 || lr 0.002151-0.000430-0.000430 | Time 2021-09-14 22:26:05
219 | Epoch 108/120: (69.9040 89.2620) | 882.6877 M (854.0653 -112.5838) || train (68.7540 87.2260) | 889.6111 M (857.8721 -109.4672)| loss 2.2913 || lr 0.001835-0.000367-0.000367 | Time 2021-09-14 22:38:51
220 | Epoch 108/120: (69.9020 89.2620) | 882.6877 M (854.0653 -112.5838) || train (68.6923 87.1564) | 889.6101 M (857.8755 -109.4716)| loss 2.2942 || lr 0.001835-0.000367-0.000367 | Time 2021-09-14 22:38:51
221 | Epoch 109/120: (70.0700 89.2480) | 882.9171 M (854.1265 -112.4157) || train (68.8192 87.2432) | 889.6151 M (857.8693 -109.4605)| loss 2.2887 || lr 0.001544-0.000309-0.000309 | Time 2021-09-14 22:51:35
222 | Epoch 109/120: (70.0700 89.2480) | 882.9171 M (854.1265 -112.4157) || train (68.8319 87.2435) | 889.6089 M (857.8642 -109.4616)| loss 2.2867 || lr 0.001544-0.000309-0.000309 | Time 2021-09-14 22:51:35
223 | Epoch 110/120: (69.9900 89.0960) | 882.6980 M (853.9954 -112.5037) || train (68.9547 87.2807) | 889.6010 M (857.8458 -109.4511)| loss 2.2848 || lr 0.001278-0.000256-0.000256 | Time 2021-09-14 23:04:17
224 | Epoch 110/120: (69.9900 89.0960) | 882.6980 M (853.9955 -112.5037) || train (68.8484 87.2857) | 889.6033 M (857.8445 -109.4474)| loss 2.2858 || lr 0.001278-0.000256-0.000256 | Time 2021-09-14 23:04:18
225 | Epoch 111/120: (70.0680 89.2500) | 882.7318 M (854.0183 -112.4927) || train (68.9975 87.3161) | 889.6090 M (857.8373 -109.4345)| loss 2.2829 || lr 0.001036-0.000207-0.000207 | Time 2021-09-14 23:17:01
226 | Epoch 111/120: (70.0680 89.2500) | 882.7318 M (854.0183 -112.4927) || train (69.0000 87.3459) | 889.6202 M (857.8476 -109.4337)| loss 2.2802 || lr 0.001036-0.000207-0.000207 | Time 2021-09-14 23:17:01
227 | Epoch 112/120: (70.1080 89.2640) | 883.3966 M (854.2965 -112.1061) || train (69.0287 87.3523) | 889.5963 M (857.8136 -109.4235)| loss 2.2812 || lr 0.000819-0.000164-0.000164 | Time 2021-09-14 23:29:44
228 | Epoch 112/120: (70.1080 89.2640) | 883.3966 M (854.2965 -112.1061) || train (69.0657 87.4249) | 889.5961 M (857.8142 -109.4243)| loss 2.2778 || lr 0.000819-0.000164-0.000164 | Time 2021-09-14 23:29:44
229 | Epoch 113/120: (70.1060 89.2880) | 882.8278 M (854.0229 -112.4013) || train (69.1226 87.3913) | 889.6112 M (857.8216 -109.4167)| loss 2.2774 || lr 0.000628-0.000126-0.000126 | Time 2021-09-14 23:42:27
230 | Epoch 113/120: (70.1040 89.2860) | 882.8278 M (854.0229 -112.4013) || train (69.0877 87.3762) | 889.6019 M (857.8150 -109.4193)| loss 2.2777 || lr 0.000628-0.000126-0.000126 | Time 2021-09-14 23:42:27
231 | Epoch 114/120: (70.2240 89.3160) | 882.7266 M (853.9528 -112.4325) || train (69.0331 87.4201) | 889.6103 M (857.8170 -109.4129)| loss 2.2779 || lr 0.000462-0.000092-0.000092 | Time 2021-09-14 23:55:09
232 | Epoch 114/120: (70.2220 89.3160) | 882.7266 M (853.9528 -112.4325) || train (69.2064 87.3979) | 889.6023 M (857.8150 -109.4190)| loss 2.2755 || lr 0.000462-0.000092-0.000092 | Time 2021-09-14 23:55:09
233 | Epoch 115/120: (70.1880 89.2740) | 882.6807 M (853.8977 -112.4233) || train (69.1703 87.4237) | 889.6268 M (857.8263 -109.4058)| loss 2.2752 || lr 0.000321-0.000064-0.000064 | Time 2021-09-15 00:07:51
234 | Epoch 115/120: (70.1880 89.2740) | 882.6807 M (853.8977 -112.4233) || train (69.1940 87.4700) | 889.6259 M (857.8212 -109.4015)| loss 2.2721 || lr 0.000321-0.000064-0.000064 | Time 2021-09-15 00:07:51
235 | Epoch 116/120: (70.2900 89.2680) | 882.6882 M (853.9271 -112.4452) || train (69.3044 87.5066) | 889.6117 M (857.8069 -109.4014)| loss 2.2711 || lr 0.000205-0.000041-0.000041 | Time 2021-09-15 00:20:34
236 | Epoch 116/120: (70.2880 89.2700) | 882.6882 M (853.9271 -112.4452) || train (69.3007 87.4683) | 889.6008 M (857.7964 -109.4019)| loss 2.2711 || lr 0.000205-0.000041-0.000041 | Time 2021-09-15 00:20:34
237 | Epoch 117/120: (70.2180 89.3040) | 882.9589 M (854.0181 -112.2655) || train (69.3125 87.4566) | 889.6088 M (857.8027 -109.4001)| loss 2.2716 || lr 0.000116-0.000023-0.000023 | Time 2021-09-15 00:33:17
238 | Epoch 117/120: (70.2180 89.3040) | 882.9589 M (854.0181 -112.2655) || train (69.3192 87.4856) | 889.5927 M (857.7892 -109.4027)| loss 2.2709 || lr 0.000116-0.000023-0.000023 | Time 2021-09-15 00:33:18
239 | Epoch 118/120: (70.2680 89.3120) | 883.0031 M (854.0827 -112.2858) || train (69.2473 87.5117) | 889.5958 M (857.7914 -109.4018)| loss 2.2713 || lr 0.000051-0.000010-0.000010 | Time 2021-09-15 00:45:59
240 | Epoch 118/120: (70.2680 89.3100) | 883.0031 M (854.0827 -112.2858) || train (69.3459 87.4772) | 889.5914 M (857.7918 -109.4066)| loss 2.2698 || lr 0.000051-0.000010-0.000010 | Time 2021-09-15 00:46:00
241 | Epoch 119/120: (70.2740 89.3240) | 882.7236 M (853.9479 -112.4306) || train (69.3024 87.5162) | 889.6021 M (857.7938 -109.3979)| loss 2.2701 || lr 0.000013-0.000003-0.000003 | Time 2021-09-15 00:58:43
242 | Epoch 119/120: (70.2740 89.3240) | 882.7236 M (853.9479 -112.4306) || train (69.2863 87.4883) | 889.5979 M (857.7913 -109.3996)| loss 2.2713 || lr 0.000013-0.000003-0.000003 | Time 2021-09-15 00:58:43
243 |
--------------------------------------------------------------------------------