├── imet
    ├── __init__.py
    ├── utils.py
    ├── make_submission.py
    ├── loss.py
    ├── dataset.py
    ├── make_folds.py
    ├── transforms.py
    ├── partialconv2d.py
    ├── models.py
    ├── adabound.py
    ├── main.py
    └── seresnet_partial.py
├── requirements.txt
├── zipfiles.bash
├── pytorch_helper_bot
    ├── examples
    │   └── imagenette
    │   │   ├── requirements.txt
    │   │   ├── dataset.py
    │   │   ├── README.md
    │   │   ├── transforms.py
    │   │   ├── models.py
    │   │   ├── main.py
    │   │   └── logs
    │   │       ├── colab_o0_bs64_e5.txt
    │   │       ├── colab_o1_bs64_e5_2.txt
    │   │       ├── colab_o2_bs64_e5.txt
    │   │       ├── colab_o1_bs64_e5.txt
    │   │       ├── bs32_8460.txt
    │   │       ├── bs64_mixup02_8600.txt
    │   │       ├── bs64_8680.txt
    │   │       └── bs64_e10.txt
    ├── .gitignore
    ├── helperbot
    │   ├── __init__.py
    │   ├── loss.py
    │   ├── differential_learning_rates.py
    │   ├── weight_decay.py
    │   ├── logger.py
    │   ├── metrics.py
    │   ├── callbacks.py
    │   ├── lr_scheduler.py
    │   └── bot.py
    ├── setup.py
    └── README.md
├── .gitignore
├── setup.py
├── README.md
├── LICENSE
└── pylintrc


/imet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.0.0
2 | albumentations>=0.2.3
3 | pretrainedmodels>=0.7.4
4 | tqdm==4.29.1
5 | scikit-learn>=0.21.2
6 | pandas>=0.24.0


--------------------------------------------------------------------------------
/zipfiles.bash:
--------------------------------------------------------------------------------
1 | rm imet.7z
2 | 7za a -bd -mx=0 imet.7z imet/*.py setup.py
3 | rm helperbot.7z
4 | cd pytorch_helper_bot
5 | 7za a -bd -mx=0 ../helperbot.7z helperbot/*.py *.py
6 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/requirements.txt:
--------------------------------------------------------------------------------
1 | albumentations>=0.1.12
2 | pretrainedmodels>=0.7.4
3 | https://github.com/ceshine/pytorch_helper_bot/archive/0.1.6.zip
4 | tensorboardX


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | 
 7 | # submission file
 8 | *.csv
 9 | 
10 | # pyCharm files
11 | .idea/
12 | 
13 | .mypy_cache/
14 | .vscode/
15 | input/
16 | references/
17 | 
18 | *.7z
19 | *.zip
20 | 
21 | bot
22 | input
23 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/.gitignore:
--------------------------------------------------------------------------------
 1 | *.csv
 2 | *#
 3 | *~
 4 | cache
 5 | __pycache__
 6 | .dir-locals.el
 7 | .idea/
 8 | .vscode/
 9 | .ipynb_checkpoints/
10 | *.7z
11 | *.html
12 | *.gz
13 | *.out
14 | runs/
15 | data/
16 | plots
17 | *.zip
18 | .mypy_cache
19 | pylintrc
20 | *.egg-info/
21 | .cache/
22 | core
23 | .nv/
24 | .bash_history
25 | data


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='imet',
 5 |     packages=['imet'],
 6 |     install_requires=[
 7 |         'torch>=1.0.0',
 8 |         'albumentations>=0.2.3',
 9 |         'pretrainedmodels>=0.7.4',
10 |         'pandas>=0.24.0',
11 |         'scikit-learn>=0.21.2',
12 |         'tqdm==4.29.1',
13 |         'helperbot>=0.1.3'
14 |     ]
15 | )
16 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/helperbot/__init__.py:
--------------------------------------------------------------------------------
1 | from .differential_learning_rates import setup_differential_learning_rates, freeze_layers
2 | from .bot import BaseBot
3 | from .lr_scheduler import TriangularLR, GradualWarmupScheduler
4 | from .weight_decay import WeightDecayOptimizerWrapper
5 | from .metrics import Metric, AUC, FBeta, Top1Accuracy, TopKAccuracy
6 | from .callbacks import LearningRateSchedulerCallback, MixUpCallback
7 | 


--------------------------------------------------------------------------------
/imet/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from datetime import datetime
 3 | from pathlib import Path
 4 | from multiprocessing.pool import ThreadPool
 5 | from typing import Dict
 6 | 
 7 | import pandas as pd
 8 | from scipy.stats.mstats import gmean
 9 | import torch
10 | from torch import nn
11 | from torch.utils.data import DataLoader
12 | 
13 | 
14 | ON_KAGGLE: bool = 'KAGGLE_WORKING_DIR' in os.environ
15 | 
16 | 
17 | def gmean_df(df: pd.DataFrame) -> pd.DataFrame:
18 |     return df.groupby(level=0).agg(lambda x: gmean(list(x)))
19 | 
20 | 
21 | def mean_df(df: pd.DataFrame) -> pd.DataFrame:
22 |     return df.groupby(level=0).mean()
23 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | 
 3 | setup(
 4 |     name='PyTorchHelperBot',
 5 |     version='0.1.6',
 6 |     packages=['helperbot', ],
 7 |     install_requires=[
 8 |         'torch>=0.4.1',
 9 |         'dataclasses'
10 |     ],
11 |     classifiers=[  # Optional
12 |         # How mature is this project? Common values are
13 |         #   3 - Alpha
14 |         #   4 - Beta
15 |         #   5 - Production/Stable
16 |         'Development Status :: 3 - Alpha',
17 | 
18 |         # Indicate who your project is intended for
19 |         'Intended Audience :: Developers',
20 | 
21 |         # Pick your license as you wish
22 |         'License :: OSI Approved :: MIT License',
23 | 
24 |         # Specify the Python versions you support here. In particular, ensure
25 |         # that you indicate whether you support Python 2, Python 3 or both.
26 |         'Programming Language :: Python :: 3.6',
27 |         'Programming Language :: Python :: 3.7',
28 |     ],
29 | )
30 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/helperbot/loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class MixUpSoftmaxLoss(nn.Module):
 5 |     "Reference: https://github.com/fastai/fastai/blob/master/fastai/callbacks/mixup.py#L6"
 6 | 
 7 |     def __init__(self, crit, reduction='mean'):
 8 |         super().__init__()
 9 |         self.crit = crit
10 |         setattr(self.crit, 'reduction', 'none')
11 |         self.reduction = reduction
12 | 
13 |     def forward(self, output, target):
14 |         if len(target.size()) == 2:
15 |             loss1 = self.crit(output, target[:, 0].long())
16 |             loss2 = self.crit(output, target[:, 1].long())
17 |             lambda_ = target[:, 2]
18 |             d = (loss1 * lambda_ + loss2 * (1-lambda_)).mean()
19 |         else:
20 |             # This handles the cases without MixUp for backward compatibility
21 |             d = self.crit(output, target)
22 |         if self.reduction == 'mean':
23 |             return d.mean()
24 |         elif self.reduction == 'sum':
25 |             return d.sum()
26 |         return d
27 | 


--------------------------------------------------------------------------------
/imet/make_submission.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from .utils import mean_df
 6 | from .dataset import DATA_ROOT
 7 | from .main import binarize_prediction, CACHE_DIR
 8 | 
 9 | 
10 | def main():
11 |     parser = argparse.ArgumentParser()
12 |     arg = parser.add_argument
13 |     arg('predictions', nargs='+')
14 |     arg('--threshold', type=float, default=0.2)
15 |     args = parser.parse_args()
16 |     sample_submission = pd.read_csv(
17 |         DATA_ROOT / 'sample_submission.csv', index_col='id')
18 |     dfs = []
19 |     for prediction in args.predictions:
20 |         df = pd.read_pickle(
21 |             CACHE_DIR / f"preds_{prediction}.pkl")
22 |         print(df.shape)
23 |         # print(df.isnull().sum().sum())
24 |         df = df.reindex(sample_submission.index)
25 |         print(df.isnull().sum().sum())
26 |         dfs.append(df)
27 |     df = pd.concat(dfs)
28 |     df = mean_df(df)
29 |     df[:] = binarize_prediction(df.values, threshold=args.threshold)
30 |     df = df.apply(get_classes, axis=1)
31 |     df.name = 'attribute_ids'
32 |     df.to_csv("submission.csv", header=True)
33 | 
34 | 
35 | def get_classes(item):
36 |     return ' '.join(str(cls_idx) for cls_idx, is_present in item.items() if is_present)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     main()
41 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch Helper Bot
 2 | [WIP] a high-level PyTorch helper package
 3 | 
 4 | This project is intended for my personal use. Backward compatibility will not be guaranteed. Important releases will be tagged.
 5 | 
 6 | ## Motivation
 7 | 
 8 | [*fast.ai*](https://github.com/fastai/fastai) is great, and I recommend it for all deep learning beginners. But since it's beginner-friendly, a lot of more sophisticated stuffs are abstracted heavily and hidden from users. Reading the source code is often required before you can tweak the underlying algorithms. The advent of `doc` function greatly speeds up the process by quickly directing the user to the source code and documentation.
 9 | 
10 | However, *fast.ai* has become stronger and bigger. Not everyone has time to keep up with its codebase. Hence the creation of this project. I built a relatively thin layer of abstraction upon PyTorch from scratch, with a lot of ideas and code borrowed from various sources (mainly *fast.ai*). Only features that are relevant to my use cases are added.
11 | 
12 | Another similar project is [pytorch/ignite](https://github.com/pytorch/ignite).
13 | 
14 | ## Examples
15 | 
16 | There are almost no unit tests yet. The following example(s) are somewhat functional tests.
17 | 
18 | * [Imagenette Image Classification](examples/imagenette/)


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/dataset.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Callable, List, Optional, Dict
 3 | 
 4 | import cv2
 5 | import torch
 6 | import pandas as pd
 7 | from torch.utils.data import Dataset
 8 | 
 9 | from transforms import tensor_transform
10 | 
11 | 
12 | N_CLASSES = 1103
13 | DATA_ROOT = Path('./data')
14 | 
15 | 
16 | def build_dataframe_from_folder(root: Path, class_map: Optional[Dict] = None):
17 |     if class_map is None:
18 |         new_class_map = {}
19 |     tmp = []
20 |     for subfolder in root.iterdir():
21 |         if class_map is None:
22 |             new_class_map[subfolder.name] = len(new_class_map)
23 |             class_id = new_class_map[subfolder.name]
24 |         else:
25 |             class_id = class_map[subfolder.name]
26 |         for image in subfolder.iterdir():
27 |             tmp.append((image, class_id))
28 |     df = pd.DataFrame(tmp, columns=["image_path", "label"])
29 |     if class_map is None:
30 |         return df, new_class_map
31 |     return df
32 | 
33 | 
34 | class TrainDataset(Dataset):
35 |     def __init__(self, df: pd.DataFrame, image_transform: Callable, debug: bool = True):
36 |         super().__init__()
37 |         self._df = df
38 |         self._image_transform = image_transform
39 |         self._debug = debug
40 | 
41 |     def __len__(self):
42 |         return len(self._df)
43 | 
44 |     def __getitem__(self, idx: int):
45 |         item = self._df.iloc[idx]
46 |         image = load_transform_image(
47 |             item.image_path, self._image_transform, debug=self._debug)
48 |         target = torch.tensor(item.label).long()
49 |         return image, target
50 | 
51 | 
52 | def load_transform_image(
53 |         image_path: Path, image_transform: Callable, debug: bool = False):
54 |     image = cv2.imread(str(image_path.absolute()))
55 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
56 |     image = image_transform(image=image)["image"]
57 |     # if debug:
58 |     #     image.save('_debug.jpg')
59 |     tensor = tensor_transform(image=image)["image"]
60 |     return tensor
61 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/helperbot/differential_learning_rates.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, List, Dict
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | def opt_params(layer, learning_rate):
 8 |     return {'params': layer.parameters(), 'lr': learning_rate}
 9 | 
10 | 
11 | def setup_differential_learning_rates(
12 |         optimizer_constructor: Callable[[List[Dict]], torch.optim.Optimizer],
13 |         model: torch.nn.Module,
14 |         lrs: List[float]) -> torch.optim.Optimizer:
15 |     """Set up a optimizer with differential learning rates
16 | 
17 |     Reference: fast.ai v0.7
18 | 
19 |     Parameters
20 |     ----------
21 |     optimizer_constructor : Callable[[List[Dict]]]
22 |         Optimizer constructor or a partial that returns an Optimizer object.
23 |     model : torch.nn.Module
24 |         The PyTorch model you want to optimize. Needs to have .get_layer_groups() method.
25 |     lrs : List[float]
26 |         A list of learning rates for each layer group.
27 |     """
28 |     layer_groups = model.get_layer_groups()
29 |     assert len(layer_groups) == len(
30 |         lrs), f'size mismatch, expected {len(layer_groups)} lrs, but got {len(lrs)}'
31 |     optimizer = optimizer_constructor(
32 |         [opt_params(*p) for p in zip(layer_groups, lrs)])
33 |     return optimizer
34 | 
35 | 
36 | def children(m):
37 |     return m if isinstance(m, (list, tuple)) else list(m.children())
38 | 
39 | 
40 | def set_trainable_attr(m, b):
41 |     m.trainable = b
42 |     for p in m.parameters():
43 |         p.requires_grad = b
44 | 
45 | 
46 | def apply_leaf(m, f):
47 |     c = children(m)
48 |     if isinstance(m, nn.Module):
49 |         f(m)
50 |     if len(c) > 0:
51 |         for l in c:
52 |             apply_leaf(l, f)
53 | 
54 | 
55 | def set_trainable(l, b):
56 |     apply_leaf(l, lambda m: set_trainable_attr(m, b))
57 | 
58 | 
59 | def freeze_layers(layer_groups: List, freeze_flags: List[bool]):
60 |     assert len(freeze_flags) == len(layer_groups)
61 |     for layer, flag in zip(layer_groups, freeze_flags):
62 |         set_trainable(layer, not flag)
63 | 


--------------------------------------------------------------------------------
/imet/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | # Source: https://www.kaggle.com/c/human-protein-atlas-image-classification/discussion/78109
 6 | 
 7 | 
 8 | class FocalLoss(nn.Module):
 9 |     """Adapted from: https://github.com/kuangliu/pytorch-retinanet/blob/master/loss.py
10 |     F.logsimoid used as in https://gist.github.com/AdrienLE/bf31dfe94569319f6e47b2de8df13416#file-focal_dice_1-py
11 |     """
12 | 
13 |     def __init__(self, alpha, gamma):
14 |         super(FocalLoss, self).__init__()
15 |         assert alpha > 0 and alpha < 1
16 |         self.alpha = alpha
17 |         self.gamma = gamma
18 | 
19 |     def forward(self, x, y):
20 |         '''Focal loss.
21 |         Args:
22 |           x: (tensor) sized [N,].
23 |           y: (tensor) sized [N,].
24 |         Return:
25 |           (tensor) focal loss.
26 |         '''
27 |         y = y.float()
28 |         pt_log = F.logsigmoid(-x * (y * 2 - 1))
29 |         # w = alpha if t > 0 else 1-alpha
30 |         at = (self.alpha * y + (1-self.alpha) * (1-y)) * 2
31 |         w = at * (pt_log * self.gamma).exp()
32 |         # Don't calculate gradients of the weights
33 |         w = w.detach()
34 |         return F.binary_cross_entropy_with_logits(x, y, w, reduction="mean")
35 | 
36 |     def __str__(self):
37 |         return f"<Focal Loss alpha={self.alpha} gamma={self.gamma}>"
38 | 
39 | 
40 | class FbetaLoss(nn.Module):
41 |     def __init__(self, beta=1):
42 |         super(FbetaLoss, self).__init__()
43 |         self.small_value = 1e-6
44 |         self.beta = beta
45 | 
46 |     def forward(self, logits, labels):
47 |         beta = self.beta
48 |         batch_size = logits.size()[0]
49 |         p = F.sigmoid(logits)
50 |         l = labels
51 |         num_pos = torch.sum(p, 1) + self.small_value
52 |         num_pos_hat = torch.sum(l, 1) + self.small_value
53 |         tp = torch.sum(l * p, 1)
54 |         precise = tp / num_pos
55 |         recall = tp / num_pos_hat
56 |         fs = (1 + beta * beta) * precise * recall / \
57 |             (beta * beta * precise + recall + self.small_value)
58 |         loss = fs.sum() / batch_size
59 |         return 1 - loss
60 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/helperbot/weight_decay.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, Sequence
 2 | 
 3 | from torch.optim import Optimizer
 4 | 
 5 | 
 6 | class WeightDecayOptimizerWrapper(Optimizer):
 7 |     def __init__(self, optimizer: Optimizer, weight_decay: Union[Sequence[float], float], change_with_lr: bool = True) -> None:
 8 |         self.optimizer = optimizer
 9 |         if isinstance(weight_decay, (list, tuple)):
10 |             assert len(weight_decay) == len(self.optimizer.param_groups)
11 |             assert all((x >= 0 for x in weight_decay))
12 |             self.weight_decays = weight_decay
13 |         else:
14 |             assert weight_decay >= 0
15 |             self.weight_decays = [weight_decay] * \
16 |                 len(self.optimizer.param_groups)
17 |         self.state = self.optimizer.state
18 |         self.change_with_lr = change_with_lr
19 | 
20 |     def step(self, closure=None) -> None:
21 |         for group, weight_decay in zip(self.optimizer.param_groups, self.weight_decays):
22 |             for param in group['params']:
23 |                 if param.grad is None or weight_decay == 0:
24 |                     continue
25 |                 if self.change_with_lr:
26 |                     param.data = param.data.add(
27 |                         -weight_decay * group['lr'], param.data)
28 |                 else:
29 |                     param.data.add_(-weight_decay, param.data)
30 |         self.optimizer.step()
31 | 
32 |     def zero_grad(self) -> None:
33 |         self.optimizer.zero_grad()
34 | 
35 |     def add_param_group(self, param_group):
36 |         self.optimizer.add_param_group(param_group)
37 | 
38 |     def load_state_dict(self, state_dict):
39 |         self.optimizer.load_state_dict(state_dict)
40 | 
41 |     def state_dict(self):
42 |         return self.optimizer.state_dict()
43 | 
44 |     def __repr__(self):
45 |         return self.optimizer.__repr__()
46 | 
47 |     def __getstate__(self):
48 |         return self.optimizer.__getstate__()
49 | 
50 |     def __setstate__(self, state):
51 |         self.optimizer.__setstate__(state)
52 |         self.state = self.optimizer.state
53 | 
54 |     @property
55 |     def param_groups(self):
56 |         return self.optimizer.param_groups
57 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/helperbot/logger.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import logging
 3 | from pathlib import Path
 4 | 
 5 | 
 6 | class Logger:
 7 |     def __init__(self, model_name, log_dir, level=logging.INFO, use_tensorboard=False, echo=False):
 8 |         self.model_name = model_name
 9 |         (Path(log_dir) / "summaries").mkdir(parents=True, exist_ok=True)
10 |         date_str = datetime.now().strftime('%Y%m%d_%H%M')
11 |         log_file = 'log_{}.txt'.format(date_str)
12 |         formatter = logging.Formatter(
13 |             '[[%(asctime)s]] %(message)s',
14 |             datefmt='%m/%d/%Y %I:%M:%S %p'
15 |         )
16 |         self.logger = logging.getLogger("bot")
17 |         # Remove all existing handlers
18 |         self.logger.handlers = []
19 |         # Initialize handlers
20 |         fh = logging.FileHandler(
21 |             Path(log_dir) / Path(log_file))
22 |         fh.setFormatter(formatter)
23 |         self.logger.addHandler(fh)
24 |         if echo:
25 |             sh = logging.StreamHandler()
26 |             sh.setFormatter(formatter)
27 |             self.logger.addHandler(sh)
28 |         self.logger.setLevel(level)
29 |         self.logger.propagate = False
30 |         self.tbwriter = None
31 |         if use_tensorboard:
32 |             from tensorboardX import SummaryWriter
33 |             # Tensorboard
34 |             folder = str(
35 |                 Path(log_dir) / "summaries" /
36 |                 "{}_{}".format(self.model_name, date_str))
37 |             self.tbwriter = SummaryWriter(
38 |                 folder
39 |             )
40 | 
41 |     def info(self, msg, *args):
42 |         self.logger.info(msg, *args)
43 | 
44 |     def warning(self, msg, *args):
45 |         self.logger.warning(msg, *args)
46 | 
47 |     def debug(self, msg, *args):
48 |         self.logger.debug(msg, *args)
49 | 
50 |     def error(self, msg, *args):
51 |         self.logger.error(msg, *args)
52 | 
53 |     def tb_scalars(self, key, value, step):
54 |         if self.tbwriter is None:
55 |             self.debug("Tensorboard writer is not enabled.")
56 |         else:
57 |             if isinstance(value, dict):
58 |                 self.tbwriter.add_scalars(key, value, step)
59 |             else:
60 |                 self.tbwriter.add_scalar(key, value, step)
61 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/README.md:
--------------------------------------------------------------------------------
 1 | # Imagenette Example
 2 | 
 3 | ## Preparation
 4 | 
 5 | Download the [imagenette dataset (full)](https://s3.amazonaws.com/fast-ai-imageclas/imagenette.tgz) and extract into `data` folder. It should now contains two folders: `train` and `val`.
 6 | 
 7 | ## Training instructions
 8 | 
 9 | Run `python main.py -h` to view all the available arguments.
10 | 
11 | ## Some Local Results
12 | 
13 | Hardware: i7-7700 + GTX 1070
14 | 
15 | | Size (px) | Epochs | Accuracy | Params | Arch | Log |
16 | |--|--|--|--|--|--|
17 | | 192 | 5 | 86.80% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0` | seresnext50 | [bs64_8680.txt](logs/bs64_8680.txt) |
18 | | 192 | 5 | 86.00% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0.2` | seresnext50| [bs64_mixup02_8600.txt](logs/bs64_mixup02_8600.txt) |
19 | | 192 | 10 | 89.80% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0` | seresnext50| [bs64_e10.txt](logs/bs64_e10.txt) |
20 | 
21 | ## Google Colab Results
22 | 
23 | [Notebook Link](https://colab.research.google.com/drive/1NppuVSUvNYIEfL7j3DEOKemhrdZFFPDg)
24 | 
25 | | Size (px) | Epochs | Accuracy | Params | Arch | Log | Amp | Time |
26 | |--|--|--|--|--|--|--|--|
27 | | 192 | 5 | 85.60% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0` | seresnext50 | [colab_o0_bs64_e5.txt](logs/colab_o0_bs64_e5.txt) | | 13min 18s |
28 | | 192 | 5 | 84.20% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0 --amp O1` | seresnext50 | [colab_o1_bs64_e5.txt](logs/colab_o1_bs64_e5.txt) | O1 | 9min 59s |
29 | | 192 | 5 | 85.80% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0 --amp O1` | seresnext50 | [colab_o1_bs64_e5_2.txt](logs/colab_o1_bs64_e5_2.txt)* | O1 | 9min 47s |
30 | | 192 | 5 | 85.40% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0 --amp O2` | seresnext50 | [colab_o2_bs64_e5.txt](logs/colab_o2_bs64_e5.txt)* | O2 | 9min 35s |
31 | | 192 | 20 | 93.40% | `--batch-size 64 --lr 3e-3 --mixup-alpha 0` | seresnext50 | [colab_o0_bs64_e20.txt](logs/colab_o0_bs64_e20.txt) | | 52min 50s |
32 | | 192 | 20 | 92.40% | `--batch-size 64 --lr 3e-3 --mixup-alpha 0 --amp O1` | seresnext50 | [colab_o1_bs64_e20.txt](logs/colab_o1_bs64_e20.txt) | O1 | 39min 31s |
33 | | 192 | 20 | 93.00% | `--batch-size 64 --lr 3e-3 --mixup-alpha 0 --amp O2` | seresnext50 | [colab_o2_bs64_e20.txt](logs/colab_o2_bs64_e20.txt)* | O2 | 38min 1s |
34 | 
35 | *: Using the newer version where the weights of the entire network is explicitly initialized.
36 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/transforms.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from albumentations import (
 3 |     Compose, HorizontalFlip, Rotate, HueSaturationValue,
 4 |     RandomBrightness, RandomContrast, RandomGamma, JpegCompression, GaussNoise,
 5 |     Cutout, MedianBlur, Blur, OneOf, IAAAdditiveGaussianNoise, OpticalDistortion,
 6 |     GridDistortion, IAAPiecewiseAffine, ShiftScaleRotate, CenterCrop,
 7 |     RandomCrop, CenterCrop, Resize, PadIfNeeded, RandomScale, SmallestMaxSize
 8 | )
 9 | from albumentations.pytorch.transforms import ToTensor
10 | 
11 | cv2.setNumThreads(0)
12 | 
13 | train_transform = Compose([
14 |     SmallestMaxSize(224),
15 |     RandomScale(scale_limit=0.125),
16 |     # PadIfNeeded(256, 256, border_mode=cv2.BORDER_CONSTANT., value=0, p=1.),
17 |     # ShiftScaleRotate(
18 |     #     shift_limit=0.0625, scale_limit=0.1, rotate_limit=30,
19 |     #     border_mode=cv2.BORDER_REFLECT_101, p=1.),
20 |     Rotate(limit=20,  border_mode=cv2.BORDER_REFLECT_101, p=1.),
21 |     OneOf([
22 |         RandomCrop(192, 192, p=0.9),
23 |         CenterCrop(192, 192, p=0.1),
24 |     ], p=1.),
25 |     HorizontalFlip(p=0.5),
26 |     RandomContrast(limit=0.2, p=0.5),
27 |     RandomGamma(gamma_limit=(80, 120), p=0.5),
28 |     RandomBrightness(limit=0.2, p=0.5),
29 |     # HueSaturationValue(hue_shift_limit=5, sat_shift_limit=20,
30 |     #                    val_shift_limit=10, p=1.),
31 |     # OneOf([
32 |     #     OpticalDistortion(p=0.3),
33 |     #     GridDistortion(p=0.1),
34 |     #     IAAPiecewiseAffine(p=0.3),
35 |     # ], p=0.2),
36 |     # OneOf([
37 |     #     IAAAdditiveGaussianNoise(
38 |     #         loc=0, scale=(1., 6.75), per_channel=False, p=0.3),
39 |     #     GaussNoise(var_limit=(5.0, 20.0), p=0.6),
40 |     # ], p=0.5),
41 |     # Cutout(num_holes=4, max_h_size=30, max_w_size=50, p=0.75),
42 |     # JpegCompression(quality_lower=50, quality_upper=100, p=0.5)
43 | ])
44 | 
45 | 
46 | test_transform = Compose([
47 |     # RandomScale(scale_limit=0.125),
48 |     SmallestMaxSize(224),
49 |     # PadIfNeeded(256, 256, border_mode=cv2.BORDER_REFLECT_101, value=0, p=1.),
50 |     # OneOf([
51 |     #     RandomCrop(224, 224, p=0.9),
52 |     #     CenterCrop(224, 224, p=0.1),
53 |     # ], p=1.),
54 |     CenterCrop(192, 192, p=1.)
55 |     # HorizontalFlip(p=0.5),
56 | ])
57 | 
58 | 
59 | tensor_transform = ToTensor(normalize=dict(
60 |     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
61 | )
62 | 


--------------------------------------------------------------------------------
/imet/dataset.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Callable, List
 3 | 
 4 | import cv2
 5 | import torch
 6 | import pandas as pd
 7 | from PIL import Image
 8 | from torch.utils.data import Dataset
 9 | 
10 | from .transforms import tensor_transform
11 | from .utils import ON_KAGGLE
12 | 
13 | 
14 | N_CLASSES = 1103
15 | DATA_ROOT = Path('../input/imet-2019-fgvc6' if ON_KAGGLE else './data')
16 | 
17 | 
18 | class TrainDataset(Dataset):
19 |     def __init__(self, root: Path, df: pd.DataFrame,
20 |                  image_transform: Callable, debug: bool = True):
21 |         super().__init__()
22 |         self._root = root
23 |         self._df = df
24 |         self._image_transform = image_transform
25 |         self._debug = debug
26 | 
27 |     def __len__(self):
28 |         return len(self._df)
29 | 
30 |     def __getitem__(self, idx: int):
31 |         item = self._df.iloc[idx]
32 |         image = load_transform_image(
33 |             item, self._root, self._image_transform, debug=self._debug)
34 |         target = torch.from_numpy(
35 |             item.iloc[1:-1].values.astype("float32")).float()
36 |         return image, target
37 | 
38 | 
39 | class TestDataset(Dataset):
40 |     def __init__(self, root: Path, df: pd.DataFrame,
41 |                  image_transform: Callable, debug: bool = True):
42 |         self._root = root
43 |         self._df = df
44 |         self._image_transform = image_transform
45 |         self._debug = debug
46 | 
47 |     def __len__(self):
48 |         return len(self._df)
49 | 
50 |     def __getitem__(self, idx):
51 |         item = self._df.iloc[idx]
52 |         image = load_transform_image(
53 |             item, self._root, self._image_transform, debug=self._debug)
54 |         return image, 0
55 | 
56 | 
57 | def load_transform_image(
58 |         item, root: Path, image_transform: Callable, debug: bool = False):
59 |     image = load_image(item, root)
60 |     image = image_transform(image=image)["image"]
61 |     if debug:
62 |         image.save('_debug.png')
63 |     tensor = tensor_transform(image=image)["image"]
64 |     return tensor
65 | 
66 | 
67 | def load_image(item, root: Path) -> Image.Image:
68 |     image = cv2.imread(str(root / f'{item.id}.png'))
69 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
70 |     # base_size = min(image.shape[0], image.shape[1])
71 |     # ratio = 256 / base_size
72 |     # image = cv2.resize(image, None, fx=ratio, fy=ratio,
73 |     #                    interpolation=cv2.INTER_CUBIC)
74 |     return image
75 | 
76 | 
77 | def get_ids(root: Path) -> List[str]:
78 |     return sorted({p.name.split('_')[0] for p in root.glob('*.png')})
79 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/models.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import torch
 4 | import numpy as np
 5 | import pretrainedmodels
 6 | from torch import nn
 7 | from torch.nn import functional as F
 8 | 
 9 | 
10 | class Flatten(nn.Module):
11 |     def forward(self, x):
12 |         return x.view(x.size(0), -1)
13 | 
14 | 
15 | def create_net(net_cls, pretrained: bool):
16 |     net = net_cls(pretrained=pretrained)
17 |     return net
18 | 
19 | 
20 | def get_head(nf: int, n_classes):
21 |     model = nn.Sequential(
22 |         nn.AdaptiveAvgPool2d(1),
23 |         Flatten(),
24 |         # nn.Dropout(p=0.25),
25 |         nn.Linear(nf, n_classes)
26 |     )
27 |     return model
28 | 
29 | 
30 | def init_weights(model):
31 |     for i, module in enumerate(model):
32 |         if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
33 |             if module.weight is not None:
34 |                 nn.init.uniform_(module.weight)
35 |             if module.bias is not None:
36 |                 nn.init.constant_(module.bias, 0)
37 |         if isinstance(module, (nn.Linear, nn.Conv2d, nn.Conv1d)):
38 |             if getattr(module, "weight_v", None) is not None:
39 |                 print("Initing linear with weight normalization")
40 |                 assert model[i].weight_g is not None
41 |             else:
42 |                 nn.init.kaiming_normal_(module.weight)
43 |                 print("Initing linear")
44 |             if module.bias is not None:
45 |                 nn.init.constant_(module.bias, 0)
46 |     return model
47 | 
48 | 
49 | def get_seresnet_model(arch: str = "se_resnext50_32x4d", n_classes: int = 10, pretrained: bool = False):
50 |     full = pretrainedmodels.__dict__[arch](
51 |         pretrained='imagenet' if pretrained else None)
52 |     model = nn.Sequential(
53 |         nn.Sequential(full.layer0, full.layer1, full.layer2, full.layer3[:3]),
54 |         nn.Sequential(full.layer3[3:], full.layer4),
55 |         get_head(2048, n_classes))
56 |     print(" | ".join([
57 |         "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model]))
58 |     return init_weights(model)
59 | 
60 | 
61 | def get_densenet_model(arch: str = "densenet169", n_classes: int = 10, pretrained: bool = False):
62 |     full = pretrainedmodels.__dict__[arch](
63 |         pretrained='imagenet' if pretrained else None)
64 |     print(len(full.features))
65 |     model = nn.Sequential(
66 |         nn.Sequential(*full.features[:8]),
67 |         nn.Sequential(*full.features[8:]),
68 |         get_head(full.features[-1].num_features, n_classes))
69 |     print(" | ".join([
70 |         "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model]))
71 |     return init_weights(model)
72 | 
73 | 
74 | class Swish(nn.Module):
75 |     def forward(self, x):
76 |         """ Swish activation function """
77 |         return x * torch.sigmoid(x)
78 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/helperbot/metrics.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from typing import Tuple
 3 | 
 4 | import torch
 5 | import numpy as np
 6 | from sklearn.metrics import fbeta_score, roc_auc_score
 7 | from sklearn.exceptions import UndefinedMetricWarning
 8 | 
 9 | 
10 | class Metric:
11 |     name = "metric"
12 | 
13 |     def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]:
14 |         raise NotImplementedError()
15 | 
16 | 
17 | class FBeta(Metric):
18 |     """FBeta for binary targets"""
19 |     name = "fbeta"
20 | 
21 |     def __init__(self, step, beta=2, average="binary"):
22 |         self.step = step
23 |         self.beta = beta
24 |         self.average = average
25 | 
26 |     def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]:
27 |         best_fbeta, best_thres = self.find_best_fbeta_threshold(
28 |             truth.numpy(), torch.sigmoid(pred).numpy(),
29 |             step=self.step, beta=self.beta)
30 |         return best_fbeta * -1, f"{best_fbeta:.4f} @ {best_thres:.2f}"
31 | 
32 |     def find_best_fbeta_threshold(self, truth, probs, beta=2, step=0.05):
33 |         best, best_thres = 0, -1
34 |         with warnings.catch_warnings():
35 |             warnings.simplefilter('ignore', category=UndefinedMetricWarning)
36 |             for thres in np.arange(step, 1, step):
37 |                 current = fbeta_score(
38 |                     truth, (probs >= thres).astype("int8"),
39 |                     beta=beta, average=self.average)
40 |                 if current > best:
41 |                     best = current
42 |                     best_thres = thres
43 |         return best, best_thres
44 | 
45 | 
46 | class AUC(Metric):
47 |     """AUC for binary targets"""
48 |     name = "auc"
49 | 
50 |     def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]:
51 |         auc_score = roc_auc_score(
52 |             truth.numpy(), torch.sigmoid(pred).numpy())
53 |         return auc_score * -1, f"{auc_score * 100:.2f}"
54 | 
55 | 
56 | class Top1Accuracy(Metric):
57 |     name = "accuracy"
58 | 
59 |     def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]:
60 |         correct = torch.sum(
61 |             truth.view(-1) == torch.argmax(pred, dim=-1).view(-1)).item()
62 |         total = truth.view(-1).size(0)
63 |         accuracy = (correct / total)
64 |         return accuracy * -1, f"{accuracy * 100:.2f}%"
65 | 
66 | 
67 | class TopKAccuracy(Metric):
68 |     def __init__(self, k=1):
69 |         self.name = f"top_{k}_accuracy"
70 |         self.k = k
71 | 
72 |     def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]:
73 |         with torch.no_grad():
74 |             _, pred = pred.topk(self.k, dim=1, largest=True, sorted=True)
75 |             pred = pred.t()
76 |             correct = pred.eq(
77 |                 truth.view(1, -1).expand_as(pred)
78 |             ).view(-1).float().sum(0, keepdim=True)
79 |             accuracy = correct.mul_(100.0 / truth.size(0)).item()
80 |         return accuracy * -1, f"{accuracy:.2f}%"
81 | 


--------------------------------------------------------------------------------
/imet/make_folds.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import argparse
 3 | from collections import defaultdict, Counter
 4 | from typing import Dict
 5 | 
 6 | import numpy as np
 7 | import pandas as pd
 8 | from tqdm import tqdm
 9 | 
10 | from .dataset import DATA_ROOT
11 | from .main import CACHE_DIR
12 | from .utils import ON_KAGGLE
13 | 
14 | N_CLASSES = 1103
15 | 
16 | 
17 | def expand_labels():
18 |     print("Expanding labels...")
19 |     df = pd.read_csv(DATA_ROOT / 'train.csv')
20 |     df_label_names = pd.read_csv(DATA_ROOT / "labels.csv")
21 |     labels = np.zeros((len(df), N_CLASSES)).astype("uint8")
22 |     for i, row in tqdm(df.iterrows(), total=df.shape[0], disable=ON_KAGGLE):
23 |         for label in row['attribute_ids'].split(' '):
24 |             labels[i, int(label)] = 1
25 |     df_labels = pd.DataFrame(
26 |         labels,
27 |         index=df.index, columns=df_label_names.attribute_name.values
28 |     )
29 |     df = pd.concat([df[["id"]], df_labels], axis=1)
30 |     df.to_pickle(str(CACHE_DIR / "train_expanded_labels.pickle"))
31 |     return df
32 | 
33 | 
34 | def make_folds(n_folds: int, min_occurence: int = 30) -> pd.DataFrame:
35 |     from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
36 |     try:
37 |         df = pd.read_pickle(DATA_ROOT / "train_expanded_labels.pickle")
38 |     except:
39 |         df = expand_labels()
40 |     skf = MultilabelStratifiedKFold(
41 |         n_splits=n_folds, random_state=42, shuffle=True)
42 |     print("Creating folds...")
43 |     labels_to_use = (np.sum(df.iloc[:, 1:].values, axis=0) > min_occurence)
44 |     empty_rows = np.sum(df.iloc[:, 1:].values[:, labels_to_use], axis=1) == 0
45 |     print("Empty rows after truncating:", sum(empty_rows))
46 |     print("Eligible labels:", sum(labels_to_use))
47 |     df = df[~empty_rows]
48 |     folds = np.array([-1] * len(df))
49 |     for fold, (_, valid_idx) in enumerate(skf.split(df[["id"]], df.iloc[:, 1:].values[:, labels_to_use])):
50 |         folds[valid_idx] = fold
51 |     df['fold'] = folds
52 |     return df
53 | 
54 | 
55 | def make_folds_reference(n_folds: int) -> pd.DataFrame:
56 |     df = pd.read_csv(DATA_ROOT / 'train.csv')
57 |     cls_counts = Counter(cls for classes in df['attribute_ids'].str.split()
58 |                          for cls in classes)
59 |     fold_cls_counts: Dict = defaultdict(int)
60 |     folds = [-1] * len(df)
61 |     for item in tqdm(df.sample(frac=1, random_state=42, disable=ON_KAGGLE).itertuples(),
62 |                      total=len(df)):
63 |         cls = min(item.attribute_ids.split(), key=lambda cls: cls_counts[cls])
64 |         fold_counts = [(f, fold_cls_counts[f, cls]) for f in range(n_folds)]
65 |         min_count = min([count for _, count in fold_counts])
66 |         random.seed(item.Index)
67 |         fold = random.choice([f for f, count in fold_counts
68 |                               if count == min_count])
69 |         folds[item.Index] = fold
70 |         for cls in item.attribute_ids.split():
71 |             fold_cls_counts[fold, cls] += 1
72 |     df['fold'] = folds
73 |     return df
74 | 
75 | 
76 | def main():
77 |     parser = argparse.ArgumentParser()
78 |     parser.add_argument('--n-folds', type=int, default=10)
79 |     args = parser.parse_args()
80 |     df = make_folds(n_folds=args.n_folds)
81 |     df.to_pickle(CACHE_DIR / 'folds.pkl')
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     main()
86 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/helperbot/callbacks.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Callback:
 6 |     def on_batch_inputs(self, bot, input_tensors, targets):
 7 |         return input_tensors, targets
 8 | 
 9 |     def on_epoch_ends(self, bot, epoch):
10 |         return
11 | 
12 |     def on_step_ends(self, bot):
13 |         return
14 | 
15 | 
16 | class MixUpCallback(Callback):
17 |     """Assumes the first dimension is batch.
18 | 
19 |     Reference: https://github.com/fastai/fastai/blob/master/fastai/callbacks/mixup.py
20 |     """
21 | 
22 |     def __init__(self, alpha: float = 0.4, softmax_target: bool = False):
23 |         super().__init__()
24 |         self.alpha = alpha
25 |         self.softmax_target = softmax_target
26 | 
27 |     def on_batch_inputs(self, bot, input_tensors, targets):
28 |         batch = input_tensors[0]
29 |         permuted_idx = torch.randperm(batch.size(0)).to(batch.device)
30 |         lambd = np.random.beta(self.alpha, self.alpha, batch.size(0))
31 |         lambd = np.concatenate(
32 |             [lambd[:, np.newaxis], 1-lambd[:, np.newaxis]], axis=1
33 |         ).max(axis=1)
34 |         # Create the tensor and expand (for batch inputs)
35 |         lambd_tensor = batch.new(lambd).view(
36 |             -1, *[1 for _ in range(len(batch.size())-1)]
37 |         ).expand(-1, *batch.shape[1:])
38 |         # Combine input batch
39 |         new_batch = (batch * lambd_tensor +
40 |                      batch[permuted_idx] * (1-lambd_tensor))
41 |         # Create the tensor and expand (for target)
42 |         lambd_tensor = batch.new(lambd).view(
43 |             -1, *[1 for _ in range(len(targets.size())-1)]
44 |         ).expand(-1, *targets.shape[1:])
45 |         # Combine targets
46 |         if self.softmax_target:
47 |             new_targets = torch.stack([
48 |                 targets.float(), targets[permuted_idx].float(), lambd_tensor
49 |             ], dim=1)
50 |         else:
51 |             new_targets = (
52 |                 targets * lambd_tensor +
53 |                 targets[permuted_idx] * (1-lambd_tensor)
54 |             )
55 |         input_tensors[0] = new_batch
56 |         return input_tensors, new_targets
57 | 
58 | 
59 | class LearningRateSchedulerCallback(Callback):
60 |     def __init__(self, scheduler):
61 |         super().__init__()
62 |         self.scheduler = scheduler
63 | 
64 |     def on_step_ends(self, bot):
65 |         self.scheduler.step()
66 | 
67 | 
68 | class StepwiseLinearPropertyScheduler(Callback):
69 |     def __init__(self, target_obj, property_name, start_val, end_val, decay_start_step, decay):
70 |         self.target_obj = target_obj
71 |         self.property_name = property_name
72 |         self.start_val = start_val
73 |         self.end_val = end_val
74 |         self.decay_start_step = decay_start_step
75 |         self.decay = decay
76 | 
77 |     def on_step_ends(self, bot):
78 |         if bot.step % 200 == 0:
79 |             bot.logger.info(
80 |                 "%s %s %.4f",
81 |                 self.target_obj.__class__.__name__,
82 |                 self.property_name,
83 |                 getattr(self.target_obj, self.property_name))
84 |         new_val = self.get_value(bot)
85 |         setattr(self.target_obj, self.property_name, new_val)
86 | 
87 |     def get_value(self, bot):
88 |         if self.start_val == self.end_val or bot.step <= self.decay_start_step:
89 |             return self.start_val
90 |         change = (self.end_val - self.start_val) * min(
91 |             ((bot.step - self.decay_start_step) * self.decay), 1
92 |         )
93 |         return self.start_val + change
94 | 


--------------------------------------------------------------------------------
/imet/transforms.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import math
 3 | 
 4 | import cv2
 5 | from PIL import Image
 6 | from torchvision.transforms import (
 7 |     Normalize, Compose, Resize)
 8 | from albumentations import (
 9 |     Compose, HorizontalFlip, Rotate, HueSaturationValue,
10 |     RandomBrightness, RandomContrast, RandomGamma, JpegCompression, GaussNoise,
11 |     Cutout, MedianBlur, Blur, OneOf, IAAAdditiveGaussianNoise, OpticalDistortion,
12 |     GridDistortion, IAAPiecewiseAffine, ShiftScaleRotate, CenterCrop,
13 |     RandomCrop, CenterCrop, Resize, PadIfNeeded, RandomScale, SmallestMaxSize
14 | )
15 | import albumentations.augmentations.functional as F
16 | from albumentations.pytorch.transforms import ToTensor
17 | 
18 | cv2.setNumThreads(0)
19 | 
20 | 
21 | class RandomCropIfNeeded(RandomCrop):
22 |     """Take from:
23 |     https://www.kaggle.com/c/imet-2019-fgvc6/discussion/94687
24 |     """
25 | 
26 |     def __init__(self, height, width, always_apply=False, p=1.0):
27 |         super().__init__(always_apply, p)
28 |         self.height = height
29 |         self.width = width
30 | 
31 |     def apply(self, img, h_start=0, w_start=0, **params):
32 |         h, w, _ = img.shape
33 |         return F.random_crop(img, min(self.height, h), min(self.width, w), h_start, w_start)
34 | 
35 | 
36 | def get_train_transform(border_mode, size=320):
37 |     return Compose([
38 |         # PadIfNeeded(256, 256, border_mode=cv2.BORDER_CONSTANT., value=0, p=1.),
39 |         # ShiftScaleRotate(
40 |         #     shift_limit=0.0625, scale_limit=0.1, rotate_limit=30,
41 |         #     border_mode=cv2.BORDER_REFLECT_101, p=1.),
42 |         # RandomScale(scale_limit=0.125),
43 |         # HorizontalFlip(p=0.5),
44 |         # RandomContrast(limit=0.2, p=0.5),
45 |         # RandomGamma(gamma_limit=(80, 120), p=0.5),
46 |         # RandomBrightness(limit=0.2, p=0.5),
47 |         # Rotate(limit=20,  border_mode=border_mode, p=1.),
48 |         HorizontalFlip(p=0.5),
49 |         OneOf([
50 |             RandomBrightness(0.1, p=1),
51 |             RandomContrast(0.1, p=1),
52 |         ], p=0.3),
53 |         ShiftScaleRotate(shift_limit=0.1, scale_limit=0.0,
54 |                          rotate_limit=15, p=0.3),
55 |         IAAAdditiveGaussianNoise(p=0.3),
56 |         RandomCropIfNeeded(size * 2, size * 2),
57 |         Resize(size, size),
58 |         # HueSaturationValue(hue_shift_limit=5, sat_shift_limit=20,
59 |         #                    val_shift_limit=10, p=1.),
60 |         # OneOf([
61 |         #     OpticalDistortion(p=0.3),
62 |         #     GridDistortion(p=0.1),
63 |         #     IAAPiecewiseAffine(p=0.3),
64 |         # ], p=0.2),
65 |         # OneOf([
66 |         #     IAAAdditiveGaussianNoise(
67 |         #         loc=0, scale=(1., 6.75), per_channel=False, p=0.3),
68 |         #     GaussNoise(var_limit=(5.0, 20.0), p=0.6),
69 |         # ], p=0.5),
70 |         # Cutout(num_holes=4, max_h_size=30, max_w_size=50, p=0.75),
71 |         # JpegCompression(quality_lower=50, quality_upper=100, p=0.5)
72 |     ])
73 | 
74 | 
75 | def get_test_transform(size=320, flip=True):
76 |     transformations = [
77 |         # SmallestMaxSize(320),
78 |         # RandomScale(scale_limit=0.125),
79 |         # PadIfNeeded(256, 256, border_mode=cv2.BORDER_REFLECT_101, value=0, p=1.),
80 |         # OneOf([
81 |         #     RandomCrop(256, 256, p=0.9),
82 |         #     CenterCrop(256, 256, p=0.1),
83 |         # ], p=1.),
84 |         RandomCropIfNeeded(size * 2, size * 2),
85 |         Resize(size, size),
86 |     ]
87 |     if flip:
88 |         transformations.append(HorizontalFlip(p=1.))
89 |     return Compose(transformations)
90 | 
91 | 
92 | tensor_transform = ToTensor(normalize=dict(
93 |     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
94 | )
95 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # imet-collection-2019
 2 | 
 3 | A fairly generic solution to iMet Collection 2019 - FGVC6 on Kaggle
 4 | 
 5 | Credit: This solution is built upon [Konstantin Lopuhin's public baseline](https://github.com/lopuhin/kaggle-imet-2019).
 6 | 
 7 | ## Requirements
 8 | 
 9 | Directly taken from [requirements.txt](requirements.txt) (they're also in [setup.py](setup.py)).
10 | 
11 | * torch>=1.0.0
12 | * albumentations>=0.2.3
13 | * pretrainedmodels>=0.7.4
14 | * tqdm>=4.29.1
15 | * scikit-learn>=0.21.2
16 | * pandas>=0.24.0
17 | * helperbot>=0.1.2
18 | 
19 | `helperbot` is included in this repo via `git subtree`. Install it after PyTorch and before everythin else:
20 | 
21 | ```
22 | cd pytorch_helper_bot && pip install .
23 | ```
24 | 
25 | ## Environments
26 | 
27 | I trained all my models using Kaggle Kernel. Example public kernels can be found at:
28 | 
29 | * [Trainer](https://www.kaggle.com/ceshine/imet-trainer)
30 | * [Validation (with TTA)](https://www.kaggle.com/ceshine/imet-validation-kernel-public)
31 | * [Inference - 256x256](https://www.kaggle.com/ceshine/imet-inference-kernel-public?scriptVersionId=16663008) - Private score *0.614* with 3 models (already in bronze range).
32 | * [Inference - 320x320](https://www.kaggle.com/ceshine/imet-inference-kernel-public?scriptVersionId=17048642) - Private score *0.622* with 3 models.
33 | 
34 | One drawback of Kaggle Kernel is that it's hard to control the version of PyTorch. My models trained during competition were trained with PyTorch 1.0, but the model cannot be loaded in the post-competition kernels due to this [compatibility issue](https://github.com/pytorch/pytorch/issues/20756). (The issue was fixed in the PyTorch master branch, but has not been released yet at the time of writing.)
35 | 
36 | To avoid this kind of hassles in the future, I started to keep two versions of trained model: one which contains fully pickled model using `torch.save(model, f'final_{fold}.pth')` to speed up experiment iteration; and one which has only model weights and the name of the architecture as a failover using `torch.save([args.arch, model.state_dict()], f'failover_{args.arch}_{fold}.pth')`.
37 | 
38 | ### Freezing the first three (Resnet) layers
39 | 
40 | The 10th place solution suggested that only training the last two (Renset) layers is sufficient to get good accuracies. This technique allow us to training se-resnext101 models in Kaggle Kernel with 320x320 images faster. (Otherwise the models will be underfit and underperformed relative to se-resnext50).
41 | 
42 | The code that freezes the first three layers lives in the [*freezing*](https://github.com/ceshine/imet-collection-2019/tree/freezing) branch.
43 | 
44 | * [3-model se-resnext101 Inference](https://www.kaggle.com/ceshine/imet-inference-kernel-public?scriptVersionId=17497470) - private 0.625
45 | * [8-model se-resnext101 Inference](https://www.kaggle.com/ceshine/imet-inference-kernel-public?scriptVersionId=17498665) - private 0.629 (near silver range)
46 | 
47 | ## Instructions
48 | 
49 | ### Making K-Fold validation sets
50 | 
51 | Example:
52 | 
53 | ```
54 | python -m imet.make_folds --n-folds 10
55 | ```
56 | 
57 | This will create a `folds.pkl` that you can reuse later.
58 | 
59 | ### Training model
60 | 
61 | Example:
62 | 
63 | ```
64 | python -m imet.main train --batch-size 48 --epochs 11 --fold 0 --arch seresnext101 --early-stop 4
65 | ```
66 | 
67 | ### Evaluating model (with TTA)
68 | 
69 | Example:
70 | 
71 | ```
72 | python -m imet.main validate --fold 0 --batch-size 256 --tta 4 --model .
73 | ```
74 | 
75 | ### Making Predictions (with TTA)
76 | 
77 | Example:
78 | 
79 | ```
80 | python -m imet.main predict_test --batch-size 256 --fold 0 --tta 5 --model ./seresnext50/
81 | ```
82 | 
83 | Then create a submission file (this one only uses predictions from three models):
84 | 
85 | ```
86 | python -m imet.make_submission test_0 test_1 test_2 --threshold 0.09
87 | ```


--------------------------------------------------------------------------------
/imet/partialconv2d.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # BSD 3-Clause License
 3 | #
 4 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Author & Contact: Guilin Liu (guilinl@nvidia.com)
 7 | ###############################################################################
 8 | 
 9 | import torch
10 | import torch.nn.functional as F
11 | from torch import nn, cuda
12 | from torch.autograd import Variable
13 | 
14 | 
15 | class PartialConv2d(nn.Conv2d):
16 |     def __init__(self, *args, **kwargs):
17 | 
18 |         # whether the mask is multi-channel or not
19 |         if 'multi_channel' in kwargs:
20 |             self.multi_channel = kwargs['multi_channel']
21 |             kwargs.pop('multi_channel')
22 |         else:
23 |             self.multi_channel = False
24 | 
25 |         if 'return_mask' in kwargs:
26 |             self.return_mask = kwargs['return_mask']
27 |             kwargs.pop('return_mask')
28 |         else:
29 |             self.return_mask = False
30 | 
31 |         super(PartialConv2d, self).__init__(*args, **kwargs)
32 | 
33 |         if self.multi_channel:
34 |             self.weight_maskUpdater = torch.ones(
35 |                 self.out_channels, self.in_channels, self.kernel_size[0], self.kernel_size[1])
36 |         else:
37 |             self.weight_maskUpdater = torch.ones(
38 |                 1, 1, self.kernel_size[0], self.kernel_size[1])
39 | 
40 |         self.slide_winsize = self.weight_maskUpdater.shape[1] * \
41 |             self.weight_maskUpdater.shape[2] * self.weight_maskUpdater.shape[3]
42 | 
43 |         self.last_size = (None, None, None, None)
44 |         self.update_mask = None
45 |         self.mask_ratio = None
46 | 
47 |     def forward(self, input, mask_in=None):
48 |         assert len(input.shape) == 4
49 |         if mask_in is not None or self.last_size != tuple(input.shape):
50 |             self.last_size = tuple(input.shape)
51 | 
52 |             with torch.no_grad():
53 |                 if self.weight_maskUpdater.type() != input.type():
54 |                     self.weight_maskUpdater = self.weight_maskUpdater.to(input)
55 | 
56 |                 if mask_in is None:
57 |                     # if mask is not provided, create a mask
58 |                     if self.multi_channel:
59 |                         mask = torch.ones(
60 |                             input.data.shape[0], input.data.shape[1], input.data.shape[2], input.data.shape[3]).to(input)
61 |                     else:
62 |                         mask = torch.ones(
63 |                             1, 1, input.data.shape[2], input.data.shape[3]).to(input)
64 |                 else:
65 |                     mask = mask_in
66 | 
67 |                 self.update_mask = F.conv2d(mask, self.weight_maskUpdater, bias=None,
68 |                                             stride=self.stride, padding=self.padding, dilation=self.dilation, groups=1)
69 | 
70 |                 self.mask_ratio = self.slide_winsize/(self.update_mask + 1e-8)
71 |                 # self.mask_ratio = torch.max(self.update_mask)/(self.update_mask + 1e-8)
72 |                 self.update_mask = torch.clamp(self.update_mask, 0, 1)
73 |                 self.mask_ratio = torch.mul(self.mask_ratio, self.update_mask)
74 | 
75 |         # if self.update_mask.type() != input.type() or self.mask_ratio.type() != input.type():
76 |         #     self.update_mask.to(input)
77 |         #     self.mask_ratio.to(input)
78 | 
79 |         raw_out = super(PartialConv2d, self).forward(
80 |             torch.mul(input, mask) if mask_in is not None else input)
81 | 
82 |         if self.bias is not None:
83 |             bias_view = self.bias.view(1, self.out_channels, 1, 1)
84 |             output = torch.mul(raw_out - bias_view,
85 |                                self.mask_ratio) + bias_view
86 |             output = torch.mul(output, self.update_mask)
87 |         else:
88 |             output = torch.mul(raw_out, self.mask_ratio)
89 | 
90 |         if self.return_mask:
91 |             return output, self.update_mask
92 |         else:
93 |             return output
94 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/helperbot/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | from torch.optim.lr_scheduler import _LRScheduler
 2 | from torch.optim import Optimizer
 3 | 
 4 | 
 5 | class BaseLRScheduler(_LRScheduler):
 6 |     def __init__(self, optimizer, last_epoch=-1):
 7 |         """Intentionally not calling super().__init__()"""
 8 |         if not isinstance(optimizer, Optimizer):
 9 |             flag = False
10 |             try:
11 |                 from apex.fp16_utils.fp16_optimizer import FP16_Optimizer
12 |                 if isinstance(optimizer, FP16_Optimizer):
13 |                     flag = True
14 |             except ModuleNotFoundError:
15 |                 pass
16 |             if not flag:
17 |                 raise TypeError('{} is not an Optimizer'.format(
18 |                     type(optimizer).__name__))
19 |         self.optimizer = optimizer
20 |         if last_epoch == -1:
21 |             for group in optimizer.param_groups:
22 |                 group.setdefault('initial_lr', group['lr'])
23 |         else:
24 |             for i, group in enumerate(optimizer.param_groups):
25 |                 if 'initial_lr' not in group:
26 |                     raise KeyError("param 'initial_lr' is not specified "
27 |                                    "in param_groups[{}] when resuming an optimizer".format(i))
28 |         self.base_lrs = list(
29 |             map(lambda group: group['initial_lr'], optimizer.param_groups))
30 |         self.step(last_epoch + 1)
31 |         self.last_epoch = last_epoch
32 | 
33 | 
34 | class TriangularLR(BaseLRScheduler):
35 |     def __init__(self, optimizer, max_mul, ratio, steps_per_cycle, decay=1, last_epoch=-1):
36 |         self.max_mul = max_mul - 1
37 |         self.turning_point = steps_per_cycle // (ratio + 1)
38 |         self.steps_per_cycle = steps_per_cycle
39 |         self.decay = decay
40 |         self.history = []
41 |         super().__init__(optimizer, last_epoch)
42 | 
43 |     def get_lr(self):
44 |         residual = self.last_epoch % self.steps_per_cycle
45 |         multiplier = self.decay ** (self.last_epoch // self.steps_per_cycle)
46 |         if residual <= self.turning_point:
47 |             multiplier *= self.max_mul * (residual / self.turning_point)
48 |         else:
49 |             multiplier *= self.max_mul * (
50 |                 (self.steps_per_cycle - residual) /
51 |                 (self.steps_per_cycle - self.turning_point))
52 |         new_lr = [
53 |             lr * (1 + multiplier) / (self.max_mul + 1) for lr in self.base_lrs]
54 |         self.history.append(new_lr)
55 |         return new_lr
56 | 
57 | 
58 | class GradualWarmupScheduler(BaseLRScheduler):
59 |     """ Gradually warm-up(increasing) learning rate in optimizer.
60 |     Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
61 |     Source: https://github.com/ildoonet/pytorch-gradual-warmup-lr/blob/master/warmup_scheduler/scheduler.py
62 |     Args:
63 |         optimizer (Optimizer): Wrapped optimizer.
64 |         multiplier: target learning rate = base lr * multiplier
65 |         total_epoch: target learning rate is reached at total_epoch, gradually
66 |         after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau)
67 |     """
68 | 
69 |     def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None):
70 |         self.multiplier = multiplier
71 |         if self.multiplier <= 1.:
72 |             raise ValueError('multiplier should be greater than 1.')
73 |         self.total_epoch = total_epoch
74 |         self.after_scheduler = after_scheduler
75 |         self.finished = False
76 |         super().__init__(optimizer)
77 | 
78 |     def get_lr(self):
79 |         if self.last_epoch > self.total_epoch:
80 |             if self.after_scheduler:
81 |                 if not self.finished:
82 |                     self.after_scheduler.base_lrs = self.base_lrs
83 |                     self.finished = True
84 |                 return self.after_scheduler.get_lr()
85 |             return self.base_lrs
86 |         return [base_lr / self.multiplier * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs]
87 | 
88 |     def step(self, epoch=None):
89 |         if self.finished and self.after_scheduler:
90 |             return self.after_scheduler.step(epoch)
91 |         else:
92 |             return super(GradualWarmupScheduler, self).step(epoch)
93 | 


--------------------------------------------------------------------------------
/imet/models.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | 
  3 | import torch
  4 | import numpy as np
  5 | import pretrainedmodels
  6 | from torch import nn
  7 | from torch.nn import functional as F
  8 | import torchvision.models as M
  9 | # from efficientnet_pytorch import EfficientNet
 10 | 
 11 | from . import seresnet_partial
 12 | from .utils import ON_KAGGLE
 13 | 
 14 | 
 15 | class Flatten(nn.Module):
 16 |     def forward(self, x):
 17 |         return x.view(x.size(0), -1)
 18 | 
 19 | 
 20 | def create_net(net_cls, pretrained: bool):
 21 |     if ON_KAGGLE and pretrained:
 22 |         net = net_cls()
 23 |         model_name = net_cls.__name__
 24 |         weights_path = f'../input/{model_name}/{model_name}.pth'
 25 |         net.load_state_dict(torch.load(weights_path))
 26 |     else:
 27 |         net = net_cls(pretrained=pretrained)
 28 |     return net
 29 | 
 30 | 
 31 | def get_head(nf: int, n_classes):
 32 |     model = nn.Sequential(
 33 |         nn.ReLU(),
 34 |         nn.AdaptiveAvgPool2d(1),
 35 |         Flatten(),
 36 |         # nn.BatchNorm1d(nf),
 37 |         nn.Dropout(p=0.25),
 38 |         nn.Linear(nf, n_classes)
 39 |         # nn.BatchNorm1d(nf),
 40 |         # nn.Dropout(p=0.25),
 41 |         # nn.Linear(nf, 1024),
 42 |         # nn.BatchNorm1d(1024),
 43 |         # nn.Dropout(p=0.25),
 44 |         # nn.Linear(1024, n_classes)
 45 |     )
 46 |     for i, module in enumerate(model):
 47 |         if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
 48 |             if module.weight is not None:
 49 |                 nn.init.uniform_(module.weight)
 50 |             if module.bias is not None:
 51 |                 nn.init.constant_(module.bias, 0)
 52 |         if isinstance(module, nn.Linear):
 53 |             if getattr(module, "weight_v", None) is not None:
 54 |                 print("Initing linear with weight normalization")
 55 |                 assert model[i].weight_g is not None
 56 |             else:
 57 |                 nn.init.kaiming_normal_(module.weight)
 58 |                 print("Initing linear")
 59 |             if module.bias is not None:
 60 |                 nn.init.constant_(module.bias, 0)
 61 |     return model
 62 | 
 63 | 
 64 | def get_seresnet_model(arch: str = "se_resnext101_32x4d", n_classes: int = 1103, pretrained=True):
 65 |     full = pretrainedmodels.__dict__[arch](
 66 |         pretrained='imagenet' if pretrained else None)
 67 |     model = nn.Sequential(
 68 |         nn.Sequential(full.layer0, full.layer1, full.layer2, full.layer3[:3]),
 69 |         nn.Sequential(full.layer3[3:], full.layer4),
 70 |         get_head(2048, n_classes))
 71 |     print(" | ".join([
 72 |         "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model]))
 73 |     return model
 74 | 
 75 | 
 76 | def get_seresnet_partial_model(arch: str = "se_resnext101_32x4d", n_classes: int = 1103, pretrained=True):
 77 |     full = seresnet_partial.__dict__[arch](
 78 |         pretrained='imagenet' if pretrained else None)
 79 |     model = nn.Sequential(
 80 |         nn.Sequential(full.layer0, full.layer1, full.layer2, full.layer3[:3]),
 81 |         nn.Sequential(full.layer3[3:], full.layer4),
 82 |         get_head(2048, n_classes))
 83 |     print(" | ".join([
 84 |         "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model]))
 85 |     return model
 86 | 
 87 | 
 88 | def get_densenet_model(arch: str = "densenet169", n_classes: int = 1103, pretrained=True):
 89 |     full = pretrainedmodels.__dict__[arch](
 90 |         pretrained='imagenet' if pretrained else None)
 91 |     print(len(full.features))
 92 |     model = nn.Sequential(
 93 |         nn.Sequential(*full.features[:8]),
 94 |         nn.Sequential(*full.features[8:]),
 95 |         get_head(full.features[-1].num_features, n_classes))
 96 |     print(" | ".join([
 97 |         "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model]))
 98 |     return model
 99 | 
100 | 
101 | class Swish(nn.Module):
102 |     def forward(self, x):
103 |         """ Swish activation function """
104 |         return x * torch.sigmoid(x)
105 | 
106 | 
107 | # def get_efficientnet(arch: str = "efficientnet-b3", n_classes: int = 1103, pretrained=True):
108 | #     if pretrained == True:
109 | #         base_model = EfficientNet.from_pretrained(arch)
110 | #     else:
111 | #         base_model = EfficientNet.from_name(arch)
112 | #     # print(base_model)
113 | #     print(len(base_model._blocks))
114 | #     model = nn.Sequential(
115 | #         nn.Sequential(
116 | #             base_model._conv_stem,
117 | #             base_model._bn0,
118 | #             Swish(),
119 | #             *base_model._blocks[:20]
120 | #         ),
121 | #         nn.Sequential(*base_model._blocks[20:]),
122 | #         nn.Sequential(
123 | #             base_model._conv_head,
124 | #             base_model._bn1,
125 | #             Swish(),
126 | #             *get_head(base_model._fc.in_features, n_classes)[1:],
127 | #         )
128 | #     )
129 | #     print(" | ".join([
130 | #         "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model]))
131 | #     return model
132 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | from pathlib import Path
  4 | from dataclasses import dataclass
  5 | 
  6 | import pandas as pd
  7 | import torch
  8 | from torch import nn, cuda
  9 | from torch.utils.data import DataLoader
 10 | from torch.optim.lr_scheduler import CosineAnnealingLR
 11 | from helperbot import (
 12 |     BaseBot, WeightDecayOptimizerWrapper, TriangularLR,
 13 |     GradualWarmupScheduler, LearningRateSchedulerCallback,
 14 |     MixUpCallback, Top1Accuracy, TopKAccuracy
 15 | )
 16 | from helperbot.loss import MixUpSoftmaxLoss
 17 | 
 18 | from models import get_seresnet_model, get_densenet_model
 19 | from dataset import TrainDataset, N_CLASSES, DATA_ROOT, build_dataframe_from_folder
 20 | from transforms import train_transform, test_transform
 21 | 
 22 | try:
 23 |     from apex import amp
 24 |     APEX_AVAILABLE = True
 25 | except ModuleNotFoundError:
 26 |     APEX_AVAILABLE = False
 27 | 
 28 | CACHE_DIR = Path('./data/cache/')
 29 | CACHE_DIR.mkdir(exist_ok=True, parents=True)
 30 | MODEL_DIR = Path('./data/cache/')
 31 | MODEL_DIR.mkdir(exist_ok=True, parents=True)
 32 | 
 33 | NO_DECAY = [
 34 |     'bias', 'bn1.weight', 'bn2.weight', 'bn3.weight'
 35 | ]
 36 | 
 37 | 
 38 | def make_loader(args, ds_class, df: pd.DataFrame, image_transform, drop_last=False, shuffle=False) -> DataLoader:
 39 |     return DataLoader(
 40 |         ds_class(df, image_transform, debug=args.debug),
 41 |         shuffle=shuffle,
 42 |         batch_size=args.batch_size,
 43 |         num_workers=args.workers,
 44 |         drop_last=drop_last
 45 |     )
 46 | 
 47 | 
 48 | @dataclass
 49 | class ImageClassificationBot(BaseBot):
 50 |     checkpoint_dir: Path = CACHE_DIR / "model_cache/"
 51 |     log_dir: Path = MODEL_DIR / "logs/"
 52 | 
 53 |     def __post_init__(self):
 54 |         super().__post_init__()
 55 |         self.loss_format = "%.6f"
 56 |         self.metrics = (Top1Accuracy(), TopKAccuracy(k=3))
 57 |         self.monitor_metric = "accuracy"
 58 | 
 59 |     def extract_prediction(self, x):
 60 |         return x
 61 | 
 62 | 
 63 | def train_from_scratch(args, model, train_loader, valid_loader, criterion):
 64 |     n_steps = len(train_loader) * args.epochs
 65 |     optimizer = WeightDecayOptimizerWrapper(
 66 |         torch.optim.Adam(
 67 |             [
 68 |                 {
 69 |                     'params': [p for n, p in model.named_parameters()
 70 |                                if not any(nd in n for nd in NO_DECAY)],
 71 |                 },
 72 |                 {
 73 |                     'params': [p for n, p in model.named_parameters()
 74 |                                if any(nd in n for nd in NO_DECAY)],
 75 |                 }
 76 |             ],
 77 |             weight_decay=0,
 78 |             lr=args.lr
 79 |         ),
 80 |         weight_decay=[1e-1, 0],
 81 |         change_with_lr=True
 82 |     )
 83 |     if args.debug:
 84 |         print(
 85 |             "No decay:",
 86 |             [n for n, p in model.named_parameters()
 87 |              if any(nd in n for nd in NO_DECAY)]
 88 |         )
 89 |     if args.amp:
 90 |         if not APEX_AVAILABLE:
 91 |             raise ValueError("Apex is not installed!")
 92 |         model, optimizer = amp.initialize(
 93 |             model, optimizer, opt_level=args.amp
 94 |         )
 95 | 
 96 |     callbacks = [
 97 |         LearningRateSchedulerCallback(
 98 |             # TriangularLR(
 99 |             #     optimizer, 100, ratio=4, steps_per_cycle=n_steps
100 |             # )
101 |             GradualWarmupScheduler(
102 |                 optimizer, 100, len(train_loader),
103 |                 after_scheduler=CosineAnnealingLR(
104 |                     optimizer, n_steps - len(train_loader)
105 |                 )
106 |             )
107 |         )
108 |     ]
109 |     if args.mixup_alpha:
110 |         callbacks.append(MixUpCallback(
111 |             alpha=args.mixup_alpha, softmax_target=True))
112 |     bot = ImageClassificationBot(
113 |         model=model, train_loader=train_loader,
114 |         val_loader=valid_loader, clip_grad=10.,
115 |         optimizer=optimizer, echo=True,
116 |         criterion=criterion,
117 |         avg_window=len(train_loader) // 5,
118 |         callbacks=callbacks,
119 |         pbar=True, use_tensorboard=True,
120 |         use_amp=(args.amp != '')
121 |     )
122 |     bot.train(
123 |         n_steps,
124 |         log_interval=len(train_loader) // 6,
125 |         snapshot_interval=len(train_loader) // 2,
126 |         # early_stopping_cnt=8,
127 |         min_improv=1e-2,
128 |         keep_n_snapshots=1
129 |     )
130 |     bot.remove_checkpoints(keep=1)
131 |     bot.load_model(bot.best_performers[0][1])
132 |     torch.save(bot.model.state_dict(), CACHE_DIR /
133 |                f"final_weights.pth")
134 |     bot.remove_checkpoints(keep=0)
135 | 
136 | 
137 | def main():
138 |     parser = argparse.ArgumentParser()
139 |     arg = parser.add_argument
140 |     arg('--batch-size', type=int, default=32)
141 |     arg('--lr', type=float, default=2e-3)
142 |     arg('--workers', type=int, default=4)
143 |     arg('--epochs', type=int, default=5)
144 |     arg('--mixup-alpha', type=float, default=0)
145 |     arg('--arch', type=str, default='seresnext50')
146 |     arg('--amp', type=str, default='')
147 |     arg('--debug', action='store_true')
148 |     args = parser.parse_args()
149 | 
150 |     train_dir = DATA_ROOT / 'train'
151 |     valid_dir = DATA_ROOT / 'val'
152 | 
153 |     use_cuda = cuda.is_available()
154 |     if args.arch == 'seresnext50':
155 |         model = get_seresnet_model(
156 |             arch="se_resnext50_32x4d",
157 |             n_classes=N_CLASSES, pretrained=False)
158 |     elif args.arch == 'seresnext101':
159 |         model = get_seresnet_model(
160 |             arch="se_resnext101_32x4d",
161 |             n_classes=N_CLASSES, pretrained=False)
162 |     elif args.arch.startswith("densenet"):
163 |         model = get_densenet_model(arch=args.arch)
164 |     else:
165 |         raise ValueError("No such model")
166 |     if use_cuda:
167 |         model = model.cuda()
168 |     criterion = MixUpSoftmaxLoss(nn.CrossEntropyLoss())
169 |     (CACHE_DIR / 'params.json').write_text(
170 |         json.dumps(vars(args), indent=4, sort_keys=True))
171 | 
172 |     df_train, class_map = build_dataframe_from_folder(train_dir)
173 |     df_valid = build_dataframe_from_folder(valid_dir, class_map)
174 | 
175 |     train_loader = make_loader(
176 |         args, TrainDataset, df_train, train_transform, drop_last=True, shuffle=True)
177 |     valid_loader = make_loader(
178 |         args, TrainDataset, df_valid, test_transform, shuffle=False)
179 | 
180 |     print(f'{len(train_loader.dataset):,} items in train, '
181 |           f'{len(valid_loader.dataset):,} in valid')
182 | 
183 |     train_from_scratch(args, model, train_loader, valid_loader, criterion)
184 | 
185 | 
186 | if __name__ == '__main__':
187 |     main()
188 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/logs/colab_o0_bs64_e5.txt:
--------------------------------------------------------------------------------
  1 | Initing linear
  2 | 5,610,928 | 19,899,968 | 2,260,047
  3 | 12,894 items in train, 500 in valid
  4 | [[06/22/2019 11:23:00 AM]] SEED: 9293
  5 | [[06/22/2019 11:23:00 AM]] # of parameters: 27,770,943
  6 | [[06/22/2019 11:23:00 AM]] # of trainable parameters: 27,770,943
  7 | [[06/22/2019 11:23:00 AM]] Optimizer Adam (
  8 | Parameter Group 0
  9 |     amsgrad: False
 10 |     betas: (0.9, 0.999)
 11 |     eps: 1e-08
 12 |     initial_lr: 0.005
 13 |     lr: 5e-05
 14 |     weight_decay: 0
 15 | 
 16 | Parameter Group 1
 17 |     amsgrad: False
 18 |     betas: (0.9, 0.999)
 19 |     eps: 1e-08
 20 |     initial_lr: 0.005
 21 |     lr: 5e-05
 22 |     weight_decay: 0
 23 | )
 24 | [[06/22/2019 11:23:00 AM]] Batches per epoch: 201
 25 | [[06/22/2019 11:23:00 AM]] ====================Epoch 1====================
 26 | [[06/22/2019 11:23:25 AM]] Step 33: train 3.692032 lr: 8.134e-04
 27 | [[06/22/2019 11:23:48 AM]] Step 66: train 1.930618 lr: 1.626e-03
 28 | [[06/22/2019 11:24:12 AM]] Step 99: train 1.695598 lr: 2.439e-03
 29 | 100% 8/8 [00:03<00:00,  2.77it/s]
 30 | [[06/22/2019 11:24:16 AM]] Criterion loss: 2.293814
 31 | [[06/22/2019 11:24:16 AM]] accuracy: 38.40%
 32 | [[06/22/2019 11:24:16 AM]] top_3_accuracy: 67.00%
 33 | [[06/22/2019 11:24:16 AM]] Snapshot metric -0.38400000
 34 | [[06/22/2019 11:24:16 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.38400000_100.pth...
 35 | [[06/22/2019 11:24:16 AM]] New low
 36 | 
 37 | [[06/22/2019 11:24:39 AM]] Step 132: train 1.584172 lr: 3.251e-03
 38 | [[06/22/2019 11:25:03 AM]] Step 165: train 1.500142 lr: 4.064e-03
 39 | [[06/22/2019 11:25:27 AM]] Step 198: train 1.571352 lr: 4.877e-03
 40 | 100% 8/8 [00:03<00:00,  2.86it/s]
 41 | [[06/22/2019 11:25:32 AM]] Criterion loss: 2.362044
 42 | [[06/22/2019 11:25:32 AM]] accuracy: 36.40%
 43 | [[06/22/2019 11:25:32 AM]] top_3_accuracy: 70.40%
 44 | [[06/22/2019 11:25:32 AM]] Snapshot metric -0.36400000
 45 | [[06/22/2019 11:25:32 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.36400000_200.pth...
 46 | [[06/22/2019 11:25:33 AM]] ====================Epoch 2====================
 47 | [[06/22/2019 11:25:56 AM]] Step 231: train 1.433132 lr: 4.986e-03
 48 | [[06/22/2019 11:26:20 AM]] Step 264: train 1.347544 lr: 4.932e-03
 49 | [[06/22/2019 11:26:44 AM]] Step 297: train 1.336156 lr: 4.837e-03
 50 | 100% 8/8 [00:03<00:00,  2.94it/s]
 51 | [[06/22/2019 11:26:50 AM]] Criterion loss: 1.654578
 52 | [[06/22/2019 11:26:50 AM]] accuracy: 46.40%
 53 | [[06/22/2019 11:26:50 AM]] top_3_accuracy: 79.40%
 54 | [[06/22/2019 11:26:50 AM]] Snapshot metric -0.46400000
 55 | [[06/22/2019 11:26:50 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.46400000_300.pth...
 56 | [[06/22/2019 11:26:50 AM]] New low
 57 | 
 58 | [[06/22/2019 11:27:12 AM]] Step 330: train 1.203578 lr: 4.703e-03
 59 | [[06/22/2019 11:27:37 AM]] Step 363: train 1.156871 lr: 4.533e-03
 60 | [[06/22/2019 11:28:01 AM]] Step 396: train 1.103429 lr: 4.329e-03
 61 | 100% 8/8 [00:03<00:00,  2.88it/s]
 62 | [[06/22/2019 11:28:08 AM]] Criterion loss: 0.921457
 63 | [[06/22/2019 11:28:08 AM]] accuracy: 68.60%
 64 | [[06/22/2019 11:28:08 AM]] top_3_accuracy: 90.80%
 65 | [[06/22/2019 11:28:08 AM]] Snapshot metric -0.68600000
 66 | [[06/22/2019 11:28:08 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.68600000_400.pth...
 67 | [[06/22/2019 11:28:08 AM]] New low
 68 | 
 69 | [[06/22/2019 11:28:09 AM]] ====================Epoch 3====================
 70 | [[06/22/2019 11:28:31 AM]] Step 429: train 1.057488 lr: 4.094e-03
 71 | [[06/22/2019 11:28:55 AM]] Step 462: train 1.021254 lr: 3.834e-03
 72 | [[06/22/2019 11:29:20 AM]] Step 495: train 1.042827 lr: 3.551e-03
 73 | 100% 8/8 [00:03<00:00,  2.89it/s]
 74 | [[06/22/2019 11:29:27 AM]] Criterion loss: 1.046612
 75 | [[06/22/2019 11:29:27 AM]] accuracy: 66.20%
 76 | [[06/22/2019 11:29:27 AM]] top_3_accuracy: 90.00%
 77 | [[06/22/2019 11:29:27 AM]] Snapshot metric -0.66200000
 78 | [[06/22/2019 11:29:27 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.66200000_500.pth...
 79 | [[06/22/2019 11:29:48 AM]] Step 528: train 0.999482 lr: 3.250e-03
 80 | [[06/22/2019 11:30:13 AM]] Step 561: train 0.969348 lr: 2.937e-03
 81 | [[06/22/2019 11:30:38 AM]] Step 594: train 0.918918 lr: 2.617e-03
 82 | 100% 8/8 [00:03<00:00,  2.81it/s]
 83 | [[06/22/2019 11:30:45 AM]] Criterion loss: 0.816670
 84 | [[06/22/2019 11:30:45 AM]] accuracy: 73.40%
 85 | [[06/22/2019 11:30:45 AM]] top_3_accuracy: 92.00%
 86 | [[06/22/2019 11:30:45 AM]] Snapshot metric -0.73400000
 87 | [[06/22/2019 11:30:45 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.73400000_600.pth...
 88 | [[06/22/2019 11:30:46 AM]] New low
 89 | 
 90 | [[06/22/2019 11:30:48 AM]] ====================Epoch 4====================
 91 | [[06/22/2019 11:31:07 AM]] Step 627: train 0.849912 lr: 2.295e-03
 92 | [[06/22/2019 11:31:32 AM]] Step 660: train 0.829518 lr: 1.976e-03
 93 | [[06/22/2019 11:31:57 AM]] Step 693: train 0.766648 lr: 1.666e-03
 94 | 100% 8/8 [00:03<00:00,  2.88it/s]
 95 | [[06/22/2019 11:32:05 AM]] Criterion loss: 0.635795
 96 | [[06/22/2019 11:32:05 AM]] accuracy: 79.80%
 97 | [[06/22/2019 11:32:05 AM]] top_3_accuracy: 95.00%
 98 | [[06/22/2019 11:32:05 AM]] Snapshot metric -0.79800000
 99 | [[06/22/2019 11:32:05 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.79800000_700.pth...
100 | [[06/22/2019 11:32:05 AM]] New low
101 | 
102 | [[06/22/2019 11:32:25 AM]] Step 726: train 0.761491 lr: 1.370e-03
103 | [[06/22/2019 11:32:50 AM]] Step 759: train 0.715271 lr: 1.093e-03
104 | [[06/22/2019 11:33:15 AM]] Step 792: train 0.683347 lr: 8.388e-04
105 | 100% 8/8 [00:03<00:00,  2.81it/s]
106 | [[06/22/2019 11:33:24 AM]] Criterion loss: 0.546431
107 | [[06/22/2019 11:33:24 AM]] accuracy: 84.00%
108 | [[06/22/2019 11:33:24 AM]] top_3_accuracy: 96.00%
109 | [[06/22/2019 11:33:24 AM]] Snapshot metric -0.84000000
110 | [[06/22/2019 11:33:24 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84000000_800.pth...
111 | [[06/22/2019 11:33:24 AM]] New low
112 | 
113 | [[06/22/2019 11:33:27 AM]] ====================Epoch 5====================
114 | [[06/22/2019 11:33:44 AM]] Step 825: train 0.663847 lr: 6.124e-04
115 | [[06/22/2019 11:34:09 AM]] Step 858: train 0.629520 lr: 4.173e-04
116 | [[06/22/2019 11:34:34 AM]] Step 891: train 0.604525 lr: 2.567e-04
117 | 100% 8/8 [00:03<00:00,  2.71it/s]
118 | [[06/22/2019 11:34:44 AM]] Criterion loss: 0.488755
119 | [[06/22/2019 11:34:44 AM]] accuracy: 85.60%
120 | [[06/22/2019 11:34:44 AM]] top_3_accuracy: 96.40%
121 | [[06/22/2019 11:34:44 AM]] Snapshot metric -0.85600000
122 | [[06/22/2019 11:34:44 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85600000_900.pth...
123 | [[06/22/2019 11:34:44 AM]] New low
124 | 
125 | [[06/22/2019 11:35:02 AM]] Step 924: train 0.575820 lr: 1.335e-04
126 | [[06/22/2019 11:35:27 AM]] Step 957: train 0.563364 lr: 4.948e-05
127 | [[06/22/2019 11:35:52 AM]] Step 990: train 0.548493 lr: 6.181e-06
128 | 100% 8/8 [00:03<00:00,  2.80it/s]
129 | [[06/22/2019 11:36:03 AM]] Criterion loss: 0.468685
130 | [[06/22/2019 11:36:03 AM]] accuracy: 85.20%
131 | [[06/22/2019 11:36:03 AM]] top_3_accuracy: 96.60%
132 | [[06/22/2019 11:36:03 AM]] Snapshot metric -0.85200000
133 | [[06/22/2019 11:36:03 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85200000_1000.pth...
134 | CPU times: user 3.45 s, sys: 413 ms, total: 3.86 s
135 | Wall time: 13min 18s


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/logs/colab_o1_bs64_e5_2.txt:
--------------------------------------------------------------------------------
  1 | 5,610,928 | 19,899,968 | 2,260,047
  2 | 12,894 items in train, 500 in valid
  3 | Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.
  4 | 
  5 | Defaults for this optimization level are:
  6 | enabled                : True
  7 | opt_level              : O1
  8 | cast_model_type        : None
  9 | patch_torch_functions  : True
 10 | keep_batchnorm_fp32    : None
 11 | master_weights         : None
 12 | loss_scale             : dynamic
 13 | Processing user overrides (additional kwargs that are not None)...
 14 | After processing overrides, optimization options are:
 15 | enabled                : True
 16 | opt_level              : O1
 17 | cast_model_type        : None
 18 | patch_torch_functions  : True
 19 | keep_batchnorm_fp32    : None
 20 | master_weights         : None
 21 | loss_scale             : dynamic
 22 | [[06/23/2019 03:02:37 AM]] SEED: 9293
 23 | [[06/23/2019 03:02:37 AM]] # of parameters: 27,770,943
 24 | [[06/23/2019 03:02:37 AM]] # of trainable parameters: 27,770,943
 25 | [[06/23/2019 03:02:37 AM]] Optimizer Adam (
 26 | Parameter Group 0
 27 |     amsgrad: False
 28 |     betas: (0.9, 0.999)
 29 |     eps: 1e-08
 30 |     initial_lr: 0.005
 31 |     lr: 5e-05
 32 |     weight_decay: 0
 33 | 
 34 | Parameter Group 1
 35 |     amsgrad: False
 36 |     betas: (0.9, 0.999)
 37 |     eps: 1e-08
 38 |     initial_lr: 0.005
 39 |     lr: 5e-05
 40 |     weight_decay: 0
 41 | )
 42 | [[06/23/2019 03:02:37 AM]] Batches per epoch: 201
 43 | [[06/23/2019 03:02:37 AM]] ====================Epoch 1====================
 44 | [[06/23/2019 03:02:55 AM]] Step 33: train 3.570696 lr: 8.134e-04
 45 | [[06/23/2019 03:03:13 AM]] Step 66: train 1.898845 lr: 1.626e-03
 46 | Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
 47 | [[06/23/2019 03:03:30 AM]] Step 99: train 1.691594 lr: 2.439e-03
 48 | 100% 8/8 [00:03<00:00,  2.31it/s]
 49 | [[06/23/2019 03:03:34 AM]] Criterion loss: 1.909745
 50 | [[06/23/2019 03:03:34 AM]] accuracy: 36.00%
 51 | [[06/23/2019 03:03:34 AM]] top_3_accuracy: 70.80%
 52 | [[06/23/2019 03:03:34 AM]] Snapshot metric -0.36000000
 53 | [[06/23/2019 03:03:34 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.36000000_100.pth...
 54 | [[06/23/2019 03:03:35 AM]] New low
 55 | 
 56 | [[06/23/2019 03:03:51 AM]] Step 132: train 1.606339 lr: 3.251e-03
 57 | [[06/23/2019 03:04:09 AM]] Step 165: train 1.566232 lr: 4.064e-03
 58 | [[06/23/2019 03:04:26 AM]] Step 198: train 1.537102 lr: 4.877e-03
 59 | 100% 8/8 [00:03<00:00,  2.47it/s]
 60 | [[06/23/2019 03:04:30 AM]] Criterion loss: 1.674237
 61 | [[06/23/2019 03:04:30 AM]] accuracy: 47.60%
 62 | [[06/23/2019 03:04:30 AM]] top_3_accuracy: 78.80%
 63 | [[06/23/2019 03:04:30 AM]] Snapshot metric -0.47600000
 64 | [[06/23/2019 03:04:30 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.47600000_200.pth...
 65 | [[06/23/2019 03:04:30 AM]] New low
 66 | 
 67 | [[06/23/2019 03:04:31 AM]] ====================Epoch 2====================
 68 | [[06/23/2019 03:04:48 AM]] Step 231: train 1.502071 lr: 4.986e-03
 69 | [[06/23/2019 03:05:05 AM]] Step 264: train 1.334250 lr: 4.932e-03
 70 | [[06/23/2019 03:05:23 AM]] Step 297: train 1.301363 lr: 4.837e-03
 71 | 100% 8/8 [00:03<00:00,  2.57it/s]
 72 | [[06/23/2019 03:05:28 AM]] Criterion loss: 1.491104
 73 | [[06/23/2019 03:05:28 AM]] accuracy: 54.40%
 74 | [[06/23/2019 03:05:28 AM]] top_3_accuracy: 85.40%
 75 | [[06/23/2019 03:05:28 AM]] Snapshot metric -0.54400000
 76 | [[06/23/2019 03:05:28 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.54400000_300.pth...
 77 | [[06/23/2019 03:05:29 AM]] New low
 78 | 
 79 | [[06/23/2019 03:05:44 AM]] Step 330: train 1.177583 lr: 4.703e-03
 80 | [[06/23/2019 03:06:02 AM]] Step 363: train 1.276057 lr: 4.533e-03
 81 | [[06/23/2019 03:06:19 AM]] Step 396: train 1.165876 lr: 4.329e-03
 82 | 100% 8/8 [00:03<00:00,  2.61it/s]
 83 | [[06/23/2019 03:06:24 AM]] Criterion loss: 1.189894
 84 | [[06/23/2019 03:06:24 AM]] accuracy: 63.80%
 85 | [[06/23/2019 03:06:24 AM]] top_3_accuracy: 87.80%
 86 | [[06/23/2019 03:06:24 AM]] Snapshot metric -0.63800000
 87 | [[06/23/2019 03:06:24 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.63800000_400.pth...
 88 | [[06/23/2019 03:06:25 AM]] New low
 89 | 
 90 | [[06/23/2019 03:06:26 AM]] ====================Epoch 3====================
 91 | [[06/23/2019 03:06:41 AM]] Step 429: train 1.088577 lr: 4.094e-03
 92 | [[06/23/2019 03:06:59 AM]] Step 462: train 1.025925 lr: 3.834e-03
 93 | [[06/23/2019 03:07:16 AM]] Step 495: train 1.032965 lr: 3.551e-03
 94 | 100% 8/8 [00:03<00:00,  2.09it/s]
 95 | [[06/23/2019 03:07:23 AM]] Criterion loss: 1.222470
 96 | [[06/23/2019 03:07:23 AM]] accuracy: 61.40%
 97 | [[06/23/2019 03:07:23 AM]] top_3_accuracy: 86.40%
 98 | [[06/23/2019 03:07:23 AM]] Snapshot metric -0.61400000
 99 | [[06/23/2019 03:07:23 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.61400000_500.pth...
100 | [[06/23/2019 03:07:38 AM]] Step 528: train 0.993605 lr: 3.250e-03
101 | [[06/23/2019 03:07:55 AM]] Step 561: train 0.962152 lr: 2.937e-03
102 | [[06/23/2019 03:08:13 AM]] Step 594: train 0.881560 lr: 2.617e-03
103 | 100% 8/8 [00:02<00:00,  3.14it/s]
104 | [[06/23/2019 03:08:19 AM]] Criterion loss: 0.784809
105 | [[06/23/2019 03:08:19 AM]] accuracy: 75.60%
106 | [[06/23/2019 03:08:19 AM]] top_3_accuracy: 92.20%
107 | [[06/23/2019 03:08:19 AM]] Snapshot metric -0.75600000
108 | [[06/23/2019 03:08:19 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.75600000_600.pth...
109 | [[06/23/2019 03:08:20 AM]] New low
110 | 
111 | [[06/23/2019 03:08:21 AM]] ====================Epoch 4====================
112 | [[06/23/2019 03:08:35 AM]] Step 627: train 0.827620 lr: 2.295e-03
113 | [[06/23/2019 03:08:53 AM]] Step 660: train 0.838944 lr: 1.976e-03
114 | [[06/23/2019 03:09:10 AM]] Step 693: train 0.801387 lr: 1.666e-03
115 | 100% 8/8 [00:03<00:00,  2.29it/s]
116 | [[06/23/2019 03:09:17 AM]] Criterion loss: 0.810328
117 | [[06/23/2019 03:09:17 AM]] accuracy: 72.20%
118 | [[06/23/2019 03:09:17 AM]] top_3_accuracy: 92.60%
119 | [[06/23/2019 03:09:18 AM]] Snapshot metric -0.72200000
120 | [[06/23/2019 03:09:18 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.72200000_700.pth...
121 | [[06/23/2019 03:09:31 AM]] Step 726: train 0.734263 lr: 1.370e-03
122 | [[06/23/2019 03:09:49 AM]] Step 759: train 0.728053 lr: 1.093e-03
123 | [[06/23/2019 03:10:07 AM]] Step 792: train 0.709520 lr: 8.388e-04
124 | 100% 8/8 [00:03<00:00,  2.16it/s]
125 | [[06/23/2019 03:10:15 AM]] Criterion loss: 0.518678
126 | [[06/23/2019 03:10:15 AM]] accuracy: 84.00%
127 | [[06/23/2019 03:10:15 AM]] top_3_accuracy: 96.00%
128 | [[06/23/2019 03:10:15 AM]] Snapshot metric -0.84000000
129 | [[06/23/2019 03:10:15 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84000000_800.pth...
130 | [[06/23/2019 03:10:15 AM]] New low
131 | 
132 | [[06/23/2019 03:10:17 AM]] ====================Epoch 5====================
133 | [[06/23/2019 03:10:29 AM]] Step 825: train 0.672310 lr: 6.124e-04
134 | [[06/23/2019 03:10:47 AM]] Step 858: train 0.607981 lr: 4.173e-04
135 | [[06/23/2019 03:11:05 AM]] Step 891: train 0.629262 lr: 2.567e-04
136 | 100% 8/8 [00:03<00:00,  2.90it/s]
137 | [[06/23/2019 03:11:13 AM]] Criterion loss: 0.509262
138 | [[06/23/2019 03:11:13 AM]] accuracy: 83.60%
139 | [[06/23/2019 03:11:13 AM]] top_3_accuracy: 96.80%
140 | [[06/23/2019 03:11:13 AM]] Snapshot metric -0.83600000
141 | [[06/23/2019 03:11:13 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83600000_900.pth...
142 | [[06/23/2019 03:11:26 AM]] Step 924: train 0.587658 lr: 1.335e-04
143 | [[06/23/2019 03:11:43 AM]] Step 957: train 0.581012 lr: 4.948e-05
144 | [[06/23/2019 03:12:01 AM]] Step 990: train 0.579771 lr: 6.181e-06
145 | 100% 8/8 [00:03<00:00,  2.93it/s]
146 | [[06/23/2019 03:12:10 AM]] Criterion loss: 0.479807
147 | [[06/23/2019 03:12:10 AM]] accuracy: 85.80%
148 | [[06/23/2019 03:12:10 AM]] top_3_accuracy: 96.80%
149 | [[06/23/2019 03:12:10 AM]] Snapshot metric -0.85800000
150 | [[06/23/2019 03:12:10 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85800000_1000.pth...
151 | [[06/23/2019 03:12:10 AM]] New low
152 | 
153 | CPU times: user 2.93 s, sys: 345 ms, total: 3.28 s
154 | Wall time: 9min 47s


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/logs/colab_o2_bs64_e5.txt:
--------------------------------------------------------------------------------
  1 | 5,610,928 | 19,899,968 | 2,260,047
  2 | 12,894 items in train, 500 in valid
  3 | Selected optimization level O2:  FP16 training with FP32 batchnorm and FP32 master weights.
  4 | 
  5 | Defaults for this optimization level are:
  6 | enabled                : True
  7 | opt_level              : O2
  8 | cast_model_type        : torch.float16
  9 | patch_torch_functions  : False
 10 | keep_batchnorm_fp32    : True
 11 | master_weights         : True
 12 | loss_scale             : dynamic
 13 | Processing user overrides (additional kwargs that are not None)...
 14 | After processing overrides, optimization options are:
 15 | enabled                : True
 16 | opt_level              : O2
 17 | cast_model_type        : torch.float16
 18 | patch_torch_functions  : False
 19 | keep_batchnorm_fp32    : True
 20 | master_weights         : True
 21 | loss_scale             : dynamic
 22 | [[06/23/2019 03:24:49 AM]] SEED: 9293
 23 | [[06/23/2019 03:24:49 AM]] # of parameters: 27,770,943
 24 | [[06/23/2019 03:24:49 AM]] # of trainable parameters: 27,770,943
 25 | [[06/23/2019 03:24:49 AM]] Optimizer Adam (
 26 | Parameter Group 0
 27 |     amsgrad: False
 28 |     betas: (0.9, 0.999)
 29 |     eps: 1e-08
 30 |     initial_lr: 0.005
 31 |     lr: 5e-05
 32 |     weight_decay: 0
 33 | 
 34 | Parameter Group 1
 35 |     amsgrad: False
 36 |     betas: (0.9, 0.999)
 37 |     eps: 1e-08
 38 |     initial_lr: 0.005
 39 |     lr: 5e-05
 40 |     weight_decay: 0
 41 | )
 42 | [[06/23/2019 03:24:49 AM]] Batches per epoch: 201
 43 | [[06/23/2019 03:24:49 AM]] ====================Epoch 1====================
 44 | Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
 45 | [[06/23/2019 03:25:07 AM]] Step 33: train 3.563893 lr: 8.134e-04
 46 | [[06/23/2019 03:25:24 AM]] Step 66: train 1.888989 lr: 1.626e-03
 47 | [[06/23/2019 03:25:42 AM]] Step 99: train 1.706567 lr: 2.439e-03
 48 | 100% 8/8 [00:03<00:00,  2.11it/s]
 49 | [[06/23/2019 03:25:46 AM]] Criterion loss: 3.025426
 50 | [[06/23/2019 03:25:46 AM]] accuracy: 32.20%
 51 | [[06/23/2019 03:25:46 AM]] top_3_accuracy: 64.60%
 52 | [[06/23/2019 03:25:46 AM]] Snapshot metric -0.32200000
 53 | [[06/23/2019 03:25:46 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.32200000_100.pth...
 54 | [[06/23/2019 03:25:46 AM]] New low
 55 | 
 56 | [[06/23/2019 03:26:02 AM]] Step 132: train 1.597432 lr: 3.251e-03
 57 | [[06/23/2019 03:26:19 AM]] Step 165: train 1.581918 lr: 4.064e-03
 58 | [[06/23/2019 03:26:36 AM]] Step 198: train 1.577245 lr: 4.877e-03
 59 | 100% 8/8 [00:02<00:00,  2.73it/s]
 60 | [[06/23/2019 03:26:40 AM]] Criterion loss: 2.104764
 61 | [[06/23/2019 03:26:40 AM]] accuracy: 39.40%
 62 | [[06/23/2019 03:26:40 AM]] top_3_accuracy: 73.40%
 63 | [[06/23/2019 03:26:40 AM]] Snapshot metric -0.39400000
 64 | [[06/23/2019 03:26:40 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.39400000_200.pth...
 65 | [[06/23/2019 03:26:40 AM]] New low
 66 | 
 67 | [[06/23/2019 03:26:41 AM]] ====================Epoch 2====================
 68 | [[06/23/2019 03:26:57 AM]] Step 231: train 1.530420 lr: 4.986e-03
 69 | [[06/23/2019 03:27:15 AM]] Step 264: train 1.378344 lr: 4.932e-03
 70 | [[06/23/2019 03:27:32 AM]] Step 297: train 1.330413 lr: 4.837e-03
 71 | 100% 8/8 [00:04<00:00,  1.97it/s]
 72 | [[06/23/2019 03:27:37 AM]] Criterion loss: 1.163028
 73 | [[06/23/2019 03:27:37 AM]] accuracy: 61.80%
 74 | [[06/23/2019 03:27:37 AM]] top_3_accuracy: 85.00%
 75 | [[06/23/2019 03:27:37 AM]] Snapshot metric -0.61800000
 76 | [[06/23/2019 03:27:37 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.61800000_300.pth...
 77 | [[06/23/2019 03:27:38 AM]] New low
 78 | 
 79 | [[06/23/2019 03:27:53 AM]] Step 330: train 1.229342 lr: 4.703e-03
 80 | [[06/23/2019 03:28:10 AM]] Step 363: train 1.240462 lr: 4.533e-03
 81 | [[06/23/2019 03:28:27 AM]] Step 396: train 1.153345 lr: 4.329e-03
 82 | 100% 8/8 [00:03<00:00,  2.66it/s]
 83 | [[06/23/2019 03:28:32 AM]] Criterion loss: 0.839671
 84 | [[06/23/2019 03:28:32 AM]] accuracy: 73.40%
 85 | [[06/23/2019 03:28:32 AM]] top_3_accuracy: 92.80%
 86 | [[06/23/2019 03:28:32 AM]] Snapshot metric -0.73400000
 87 | [[06/23/2019 03:28:32 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.73400000_400.pth...
 88 | [[06/23/2019 03:28:32 AM]] New low
 89 | 
 90 | [[06/23/2019 03:28:33 AM]] ====================Epoch 3====================
 91 | [[06/23/2019 03:28:48 AM]] Step 429: train 1.091728 lr: 4.094e-03
 92 | [[06/23/2019 03:29:06 AM]] Step 462: train 1.046135 lr: 3.834e-03
 93 | [[06/23/2019 03:29:23 AM]] Step 495: train 1.024378 lr: 3.551e-03
 94 | 100% 8/8 [00:03<00:00,  2.75it/s]
 95 | [[06/23/2019 03:29:29 AM]] Criterion loss: 0.782142
 96 | [[06/23/2019 03:29:29 AM]] accuracy: 74.40%
 97 | [[06/23/2019 03:29:29 AM]] top_3_accuracy: 92.00%
 98 | [[06/23/2019 03:29:29 AM]] Snapshot metric -0.74400000
 99 | [[06/23/2019 03:29:29 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.74400000_500.pth...
100 | [[06/23/2019 03:29:44 AM]] Step 528: train 1.027445 lr: 3.250e-03
101 | [[06/23/2019 03:30:01 AM]] Step 561: train 0.966508 lr: 2.937e-03
102 | [[06/23/2019 03:30:18 AM]] Step 594: train 0.886668 lr: 2.617e-03
103 | 100% 8/8 [00:03<00:00,  2.16it/s]
104 | [[06/23/2019 03:30:24 AM]] Criterion loss: 0.823883
105 | [[06/23/2019 03:30:24 AM]] accuracy: 73.20%
106 | [[06/23/2019 03:30:24 AM]] top_3_accuracy: 91.20%
107 | [[06/23/2019 03:30:24 AM]] Snapshot metric -0.73200000
108 | [[06/23/2019 03:30:24 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.73200000_600.pth...
109 | [[06/23/2019 03:30:26 AM]] ====================Epoch 4====================
110 | [[06/23/2019 03:30:40 AM]] Step 627: train 0.816750 lr: 2.295e-03
111 | [[06/23/2019 03:30:57 AM]] Step 660: train 0.861780 lr: 1.976e-03
112 | [[06/23/2019 03:31:15 AM]] Step 693: train 0.810774 lr: 1.666e-03
113 | 100% 8/8 [00:03<00:00,  2.19it/s]
114 | [[06/23/2019 03:31:22 AM]] Criterion loss: 0.706307
115 | [[06/23/2019 03:31:22 AM]] accuracy: 77.20%
116 | [[06/23/2019 03:31:22 AM]] top_3_accuracy: 93.00%
117 | [[06/23/2019 03:31:22 AM]] Snapshot metric -0.77200000
118 | [[06/23/2019 03:31:22 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.77200000_700.pth...
119 | [[06/23/2019 03:31:22 AM]] New low
120 | 
121 | [[06/23/2019 03:31:35 AM]] Step 726: train 0.774412 lr: 1.370e-03
122 | [[06/23/2019 03:31:53 AM]] Step 759: train 0.753918 lr: 1.093e-03
123 | [[06/23/2019 03:32:10 AM]] Step 792: train 0.728576 lr: 8.388e-04
124 | 100% 8/8 [00:03<00:00,  2.04it/s]
125 | [[06/23/2019 03:32:18 AM]] Criterion loss: 0.544742
126 | [[06/23/2019 03:32:18 AM]] accuracy: 83.40%
127 | [[06/23/2019 03:32:18 AM]] top_3_accuracy: 95.80%
128 | [[06/23/2019 03:32:18 AM]] Snapshot metric -0.83400000
129 | [[06/23/2019 03:32:18 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83400000_800.pth...
130 | [[06/23/2019 03:32:18 AM]] New low
131 | 
132 | [[06/23/2019 03:32:20 AM]] ====================Epoch 5====================
133 | [[06/23/2019 03:32:32 AM]] Step 825: train 0.683776 lr: 6.124e-04
134 | [[06/23/2019 03:32:49 AM]] Step 858: train 0.616099 lr: 4.173e-04
135 | [[06/23/2019 03:33:07 AM]] Step 891: train 0.637941 lr: 2.567e-04
136 | 100% 8/8 [00:03<00:00,  2.76it/s]
137 | [[06/23/2019 03:33:15 AM]] Criterion loss: 0.504005
138 | [[06/23/2019 03:33:15 AM]] accuracy: 82.80%
139 | [[06/23/2019 03:33:15 AM]] top_3_accuracy: 96.20%
140 | [[06/23/2019 03:33:15 AM]] Snapshot metric -0.82800000
141 | [[06/23/2019 03:33:15 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.82800000_900.pth...
142 | [[06/23/2019 03:33:28 AM]] Step 924: train 0.592936 lr: 1.335e-04
143 | [[06/23/2019 03:33:45 AM]] Step 957: train 0.582012 lr: 4.948e-05
144 | [[06/23/2019 03:34:02 AM]] Step 990: train 0.588021 lr: 6.181e-06
145 | 100% 8/8 [00:03<00:00,  2.70it/s]
146 | [[06/23/2019 03:34:11 AM]] Criterion loss: 0.487183
147 | [[06/23/2019 03:34:11 AM]] accuracy: 85.40%
148 | [[06/23/2019 03:34:11 AM]] top_3_accuracy: 96.20%
149 | [[06/23/2019 03:34:11 AM]] Snapshot metric -0.85400000
150 | [[06/23/2019 03:34:11 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85400000_1000.pth...
151 | [[06/23/2019 03:34:11 AM]] New low
152 | 
153 | CPU times: user 2.89 s, sys: 358 ms, total: 3.25 s
154 | Wall time: 9min 35s


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/logs/colab_o1_bs64_e5.txt:
--------------------------------------------------------------------------------
  1 | Initing linear
  2 | 5,610,928 | 19,899,968 | 2,260,047
  3 | 12,894 items in train, 500 in valid
  4 | Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.
  5 | 
  6 | Defaults for this optimization level are:
  7 | enabled                : True
  8 | opt_level              : O1
  9 | cast_model_type        : None
 10 | patch_torch_functions  : True
 11 | keep_batchnorm_fp32    : None
 12 | master_weights         : None
 13 | loss_scale             : dynamic
 14 | Processing user overrides (additional kwargs that are not None)...
 15 | After processing overrides, optimization options are:
 16 | enabled                : True
 17 | opt_level              : O1
 18 | cast_model_type        : None
 19 | patch_torch_functions  : True
 20 | keep_batchnorm_fp32    : None
 21 | master_weights         : None
 22 | loss_scale             : dynamic
 23 | [[06/22/2019 11:07:14 AM]] SEED: 9293
 24 | [[06/22/2019 11:07:14 AM]] # of parameters: 27,770,943
 25 | [[06/22/2019 11:07:14 AM]] # of trainable parameters: 27,770,943
 26 | [[06/22/2019 11:07:14 AM]] Optimizer Adam (
 27 | Parameter Group 0
 28 |     amsgrad: False
 29 |     betas: (0.9, 0.999)
 30 |     eps: 1e-08
 31 |     initial_lr: 0.005
 32 |     lr: 5e-05
 33 |     weight_decay: 0
 34 | 
 35 | Parameter Group 1
 36 |     amsgrad: False
 37 |     betas: (0.9, 0.999)
 38 |     eps: 1e-08
 39 |     initial_lr: 0.005
 40 |     lr: 5e-05
 41 |     weight_decay: 0
 42 | )
 43 | [[06/22/2019 11:07:14 AM]] Batches per epoch: 201
 44 | [[06/22/2019 11:07:14 AM]] ====================Epoch 1====================
 45 | Gradient overflow.  Skipping step, loss scaler 0 reducing loss scale to 32768.0
 46 | [[06/22/2019 11:07:33 AM]] Step 33: train 3.761103 lr: 8.134e-04
 47 | [[06/22/2019 11:07:50 AM]] Step 66: train 1.999741 lr: 1.626e-03
 48 | [[06/22/2019 11:08:08 AM]] Step 99: train 1.745328 lr: 2.439e-03
 49 | 100% 8/8 [00:03<00:00,  2.54it/s]
 50 | [[06/22/2019 11:08:12 AM]] Criterion loss: 1.965406
 51 | [[06/22/2019 11:08:12 AM]] accuracy: 40.80%
 52 | [[06/22/2019 11:08:12 AM]] top_3_accuracy: 69.40%
 53 | [[06/22/2019 11:08:12 AM]] Snapshot metric -0.40800000
 54 | [[06/22/2019 11:08:12 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.40800000_100.pth...
 55 | [[06/22/2019 11:08:12 AM]] New low
 56 | 
 57 | [[06/22/2019 11:08:28 AM]] Step 132: train 1.582526 lr: 3.251e-03
 58 | [[06/22/2019 11:08:46 AM]] Step 165: train 1.515693 lr: 4.064e-03
 59 | [[06/22/2019 11:09:04 AM]] Step 198: train 1.504430 lr: 4.877e-03
 60 | 100% 8/8 [00:03<00:00,  2.60it/s]
 61 | [[06/22/2019 11:09:08 AM]] Criterion loss: 1.618016
 62 | [[06/22/2019 11:09:08 AM]] accuracy: 48.60%
 63 | [[06/22/2019 11:09:08 AM]] top_3_accuracy: 79.00%
 64 | [[06/22/2019 11:09:08 AM]] Snapshot metric -0.48600000
 65 | [[06/22/2019 11:09:08 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.48600000_200.pth...
 66 | [[06/22/2019 11:09:08 AM]] New low
 67 | 
 68 | [[06/22/2019 11:09:08 AM]] ====================Epoch 2====================
 69 | [[06/22/2019 11:09:25 AM]] Step 231: train 1.415068 lr: 4.986e-03
 70 | [[06/22/2019 11:09:43 AM]] Step 264: train 1.320727 lr: 4.932e-03
 71 | [[06/22/2019 11:10:01 AM]] Step 297: train 1.312385 lr: 4.837e-03
 72 | 100% 8/8 [00:03<00:00,  2.21it/s]
 73 | [[06/22/2019 11:10:06 AM]] Criterion loss: 1.416207
 74 | [[06/22/2019 11:10:06 AM]] accuracy: 54.80%
 75 | [[06/22/2019 11:10:06 AM]] top_3_accuracy: 82.80%
 76 | [[06/22/2019 11:10:06 AM]] Snapshot metric -0.54800000
 77 | [[06/22/2019 11:10:06 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.54800000_300.pth...
 78 | [[06/22/2019 11:10:07 AM]] New low
 79 | 
 80 | [[06/22/2019 11:10:22 AM]] Step 330: train 1.171946 lr: 4.703e-03
 81 | [[06/22/2019 11:10:40 AM]] Step 363: train 1.107085 lr: 4.533e-03
 82 | [[06/22/2019 11:10:58 AM]] Step 396: train 1.065447 lr: 4.329e-03
 83 | 100% 8/8 [00:03<00:00,  2.49it/s]
 84 | [[06/22/2019 11:11:03 AM]] Criterion loss: 0.983896
 85 | [[06/22/2019 11:11:03 AM]] accuracy: 68.20%
 86 | [[06/22/2019 11:11:03 AM]] top_3_accuracy: 89.60%
 87 | [[06/22/2019 11:11:03 AM]] Snapshot metric -0.68200000
 88 | [[06/22/2019 11:11:03 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.68200000_400.pth...
 89 | [[06/22/2019 11:11:03 AM]] New low
 90 | 
 91 | [[06/22/2019 11:11:04 AM]] ====================Epoch 3====================
 92 | [[06/22/2019 11:11:20 AM]] Step 429: train 1.057125 lr: 4.094e-03
 93 | [[06/22/2019 11:11:39 AM]] Step 462: train 1.027017 lr: 3.834e-03
 94 | [[06/22/2019 11:11:57 AM]] Step 495: train 1.018028 lr: 3.551e-03
 95 | 100% 8/8 [00:03<00:00,  2.77it/s]
 96 | [[06/22/2019 11:12:03 AM]] Criterion loss: 0.961748
 97 | [[06/22/2019 11:12:03 AM]] accuracy: 69.00%
 98 | [[06/22/2019 11:12:03 AM]] top_3_accuracy: 91.00%
 99 | [[06/22/2019 11:12:03 AM]] Snapshot metric -0.69000000
100 | [[06/22/2019 11:12:03 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.69000000_500.pth...
101 | [[06/22/2019 11:12:18 AM]] Step 528: train 0.942188 lr: 3.250e-03
102 | [[06/22/2019 11:12:36 AM]] Step 561: train 0.954713 lr: 2.937e-03
103 | [[06/22/2019 11:12:54 AM]] Step 594: train 0.911246 lr: 2.617e-03
104 | 100% 8/8 [00:03<00:00,  2.27it/s]
105 | [[06/22/2019 11:13:00 AM]] Criterion loss: 0.943144
106 | [[06/22/2019 11:13:00 AM]] accuracy: 70.00%
107 | [[06/22/2019 11:13:00 AM]] top_3_accuracy: 90.00%
108 | [[06/22/2019 11:13:00 AM]] Snapshot metric -0.70000000
109 | [[06/22/2019 11:13:00 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.70000000_600.pth...
110 | [[06/22/2019 11:13:00 AM]] New low
111 | 
112 | [[06/22/2019 11:13:02 AM]] ====================Epoch 4====================
113 | [[06/22/2019 11:13:16 AM]] Step 627: train 0.844525 lr: 2.295e-03
114 | [[06/22/2019 11:13:34 AM]] Step 660: train 0.806685 lr: 1.976e-03
115 | [[06/22/2019 11:13:52 AM]] Step 693: train 0.746364 lr: 1.666e-03
116 | 100% 8/8 [00:04<00:00,  1.98it/s]
117 | [[06/22/2019 11:14:00 AM]] Criterion loss: 0.556791
118 | [[06/22/2019 11:14:00 AM]] accuracy: 83.00%
119 | [[06/22/2019 11:14:00 AM]] top_3_accuracy: 95.60%
120 | [[06/22/2019 11:14:00 AM]] Snapshot metric -0.83000000
121 | [[06/22/2019 11:14:00 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83000000_700.pth...
122 | [[06/22/2019 11:14:00 AM]] New low
123 | 
124 | [[06/22/2019 11:14:14 AM]] Step 726: train 0.754921 lr: 1.370e-03
125 | [[06/22/2019 11:14:31 AM]] Step 759: train 0.711394 lr: 1.093e-03
126 | [[06/22/2019 11:14:49 AM]] Step 792: train 0.662325 lr: 8.388e-04
127 | 100% 8/8 [00:03<00:00,  2.09it/s]
128 | [[06/22/2019 11:14:57 AM]] Criterion loss: 0.540010
129 | [[06/22/2019 11:14:57 AM]] accuracy: 83.20%
130 | [[06/22/2019 11:14:57 AM]] top_3_accuracy: 95.20%
131 | [[06/22/2019 11:14:57 AM]] Snapshot metric -0.83200000
132 | [[06/22/2019 11:14:57 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83200000_800.pth...
133 | [[06/22/2019 11:14:59 AM]] ====================Epoch 5====================
134 | [[06/22/2019 11:15:12 AM]] Step 825: train 0.657531 lr: 6.124e-04
135 | [[06/22/2019 11:15:31 AM]] Step 858: train 0.639147 lr: 4.173e-04
136 | [[06/22/2019 11:15:48 AM]] Step 891: train 0.591341 lr: 2.567e-04
137 | 100% 8/8 [00:03<00:00,  2.73it/s]
138 | [[06/22/2019 11:15:57 AM]] Criterion loss: 0.488692
139 | [[06/22/2019 11:15:57 AM]] accuracy: 84.20%
140 | [[06/22/2019 11:15:57 AM]] top_3_accuracy: 96.00%
141 | [[06/22/2019 11:15:57 AM]] Snapshot metric -0.84200000
142 | [[06/22/2019 11:15:57 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84200000_900.pth...
143 | [[06/22/2019 11:15:57 AM]] New low
144 | 
145 | [[06/22/2019 11:16:09 AM]] Step 924: train 0.566965 lr: 1.335e-04
146 | [[06/22/2019 11:16:27 AM]] Step 957: train 0.551050 lr: 4.948e-05
147 | [[06/22/2019 11:16:45 AM]] Step 990: train 0.532422 lr: 6.181e-06
148 | 100% 8/8 [00:03<00:00,  2.18it/s]
149 | [[06/22/2019 11:16:54 AM]] Criterion loss: 0.471129
150 | [[06/22/2019 11:16:54 AM]] accuracy: 84.00%
151 | [[06/22/2019 11:16:54 AM]] top_3_accuracy: 96.60%
152 | [[06/22/2019 11:16:54 AM]] Snapshot metric -0.84000000
153 | [[06/22/2019 11:16:54 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84000000_1000.pth...
154 | CPU times: user 2.62 s, sys: 334 ms, total: 2.95 s
155 | Wall time: 9min 59s


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/logs/bs32_8460.txt:
--------------------------------------------------------------------------------
  1 | [[06/22/2019 02:17:52 PM]] SEED: 9293
  2 | [[06/22/2019 02:17:52 PM]] # of parameters: 27,770,943
  3 | [[06/22/2019 02:17:52 PM]] # of trainable parameters: 27,770,943
  4 | [[06/22/2019 02:17:52 PM]] Optimizer Adam (
  5 | Parameter Group 0
  6 |     amsgrad: False
  7 |     betas: (0.9, 0.999)
  8 |     eps: 1e-08
  9 |     initial_lr: 0.002
 10 |     lr: 2e-05
 11 |     weight_decay: 0
 12 | 
 13 | Parameter Group 1
 14 |     amsgrad: False
 15 |     betas: (0.9, 0.999)
 16 |     eps: 1e-08
 17 |     initial_lr: 0.002
 18 |     lr: 2e-05
 19 |     weight_decay: 0
 20 | )
 21 | [[06/22/2019 02:17:52 PM]] Batches per epoch: 402
 22 | [[06/22/2019 02:17:52 PM]] ====================Epoch 1====================
 23 | [[06/22/2019 02:18:10 PM]] Step 40: train 3.956017 lr: 2.072e-04
 24 | [[06/22/2019 02:18:27 PM]] Step 80: train 2.060594 lr: 4.042e-04
 25 | [[06/22/2019 02:18:44 PM]] Step 120: train 1.976278 lr: 6.012e-04
 26 | [[06/22/2019 02:19:02 PM]] Step 160: train 1.772698 lr: 7.982e-04
 27 | [[06/22/2019 02:19:19 PM]] Step 200: train 1.656106 lr: 9.952e-04
 28 | [[06/22/2019 02:19:22 PM]] Criterion loss: 1.560624
 29 | [[06/22/2019 02:19:22 PM]] accuracy: 49.80%
 30 | [[06/22/2019 02:19:22 PM]] top_3_accuracy: 75.60%
 31 | [[06/22/2019 02:19:22 PM]] Snapshot metric -0.49800000
 32 | [[06/22/2019 02:19:22 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.49800000_201.pth...
 33 | [[06/22/2019 02:19:22 PM]] New low
 34 | 
 35 | [[06/22/2019 02:19:39 PM]] Step 240: train 1.619188 lr: 1.192e-03
 36 | [[06/22/2019 02:19:56 PM]] Step 280: train 1.540767 lr: 1.389e-03
 37 | [[06/22/2019 02:20:13 PM]] Step 320: train 1.467717 lr: 1.586e-03
 38 | [[06/22/2019 02:20:31 PM]] Step 360: train 1.525094 lr: 1.783e-03
 39 | [[06/22/2019 02:20:48 PM]] Step 400: train 1.454986 lr: 1.980e-03
 40 | [[06/22/2019 02:20:51 PM]] Criterion loss: 1.493544
 41 | [[06/22/2019 02:20:51 PM]] accuracy: 53.80%
 42 | [[06/22/2019 02:20:51 PM]] top_3_accuracy: 78.00%
 43 | [[06/22/2019 02:20:51 PM]] Snapshot metric -0.53800000
 44 | [[06/22/2019 02:20:51 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.53800000_402.pth...
 45 | [[06/22/2019 02:20:52 PM]] New low
 46 | 
 47 | [[06/22/2019 02:20:52 PM]] ====================Epoch 2====================
 48 | [[06/22/2019 02:21:08 PM]] Step 440: train 1.488533 lr: 1.998e-03
 49 | [[06/22/2019 02:21:26 PM]] Step 480: train 1.380048 lr: 1.990e-03
 50 | [[06/22/2019 02:21:43 PM]] Step 520: train 1.430035 lr: 1.975e-03
 51 | [[06/22/2019 02:22:00 PM]] Step 560: train 1.249746 lr: 1.955e-03
 52 | [[06/22/2019 02:22:17 PM]] Step 600: train 1.240363 lr: 1.929e-03
 53 | [[06/22/2019 02:22:21 PM]] Criterion loss: 0.982757
 54 | [[06/22/2019 02:22:21 PM]] accuracy: 66.00%
 55 | [[06/22/2019 02:22:21 PM]] top_3_accuracy: 92.20%
 56 | [[06/22/2019 02:22:21 PM]] Snapshot metric -0.66000000
 57 | [[06/22/2019 02:22:21 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.66000000_603.pth...
 58 | [[06/22/2019 02:22:21 PM]] New low
 59 | 
 60 | [[06/22/2019 02:22:37 PM]] Step 640: train 1.268782 lr: 1.897e-03
 61 | [[06/22/2019 02:22:55 PM]] Step 680: train 1.264315 lr: 1.860e-03
 62 | [[06/22/2019 02:23:12 PM]] Step 720: train 1.153008 lr: 1.818e-03
 63 | [[06/22/2019 02:23:29 PM]] Step 760: train 1.135256 lr: 1.770e-03
 64 | [[06/22/2019 02:23:46 PM]] Step 800: train 1.113838 lr: 1.718e-03
 65 | [[06/22/2019 02:23:50 PM]] Criterion loss: 0.893659
 66 | [[06/22/2019 02:23:50 PM]] accuracy: 70.20%
 67 | [[06/22/2019 02:23:50 PM]] top_3_accuracy: 93.20%
 68 | [[06/22/2019 02:23:50 PM]] Snapshot metric -0.70200000
 69 | [[06/22/2019 02:23:50 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.70200000_804.pth...
 70 | [[06/22/2019 02:23:51 PM]] New low
 71 | 
 72 | [[06/22/2019 02:23:51 PM]] ====================Epoch 3====================
 73 | [[06/22/2019 02:24:07 PM]] Step 840: train 1.078236 lr: 1.662e-03
 74 | [[06/22/2019 02:24:24 PM]] Step 880: train 1.151317 lr: 1.601e-03
 75 | [[06/22/2019 02:24:43 PM]] Step 920: train 1.030114 lr: 1.537e-03
 76 | [[06/22/2019 02:25:02 PM]] Step 960: train 1.032722 lr: 1.469e-03
 77 | [[06/22/2019 02:25:20 PM]] Step 1000: train 1.093392 lr: 1.399e-03
 78 | [[06/22/2019 02:25:26 PM]] Criterion loss: 0.766180
 79 | [[06/22/2019 02:25:26 PM]] accuracy: 76.20%
 80 | [[06/22/2019 02:25:26 PM]] top_3_accuracy: 94.60%
 81 | [[06/22/2019 02:25:26 PM]] Snapshot metric -0.76200000
 82 | [[06/22/2019 02:25:26 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.76200000_1005.pth...
 83 | [[06/22/2019 02:25:26 PM]] New low
 84 | 
 85 | [[06/22/2019 02:25:42 PM]] Step 1040: train 1.119177 lr: 1.326e-03
 86 | [[06/22/2019 02:26:01 PM]] Step 1080: train 0.856363 lr: 1.251e-03
 87 | [[06/22/2019 02:26:20 PM]] Step 1120: train 0.873459 lr: 1.175e-03
 88 | [[06/22/2019 02:26:39 PM]] Step 1160: train 0.928308 lr: 1.098e-03
 89 | [[06/22/2019 02:26:57 PM]] Step 1200: train 0.913717 lr: 1.020e-03
 90 | [[06/22/2019 02:27:02 PM]] Criterion loss: 0.745631
 91 | [[06/22/2019 02:27:02 PM]] accuracy: 76.80%
 92 | [[06/22/2019 02:27:02 PM]] top_3_accuracy: 93.40%
 93 | [[06/22/2019 02:27:02 PM]] Snapshot metric -0.76800000
 94 | [[06/22/2019 02:27:02 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.76800000_1206.pth...
 95 | [[06/22/2019 02:27:02 PM]] New low
 96 | 
 97 | [[06/22/2019 02:27:02 PM]] ====================Epoch 4====================
 98 | [[06/22/2019 02:27:18 PM]] Step 1240: train 0.906149 lr: 9.414e-04
 99 | [[06/22/2019 02:27:36 PM]] Step 1280: train 0.858262 lr: 8.637e-04
100 | [[06/22/2019 02:27:54 PM]] Step 1320: train 0.795978 lr: 7.867e-04
101 | [[06/22/2019 02:28:12 PM]] Step 1360: train 0.783649 lr: 7.111e-04
102 | [[06/22/2019 02:28:30 PM]] Step 1400: train 0.849409 lr: 6.373e-04
103 | [[06/22/2019 02:28:35 PM]] Criterion loss: 0.626640
104 | [[06/22/2019 02:28:35 PM]] accuracy: 80.60%
105 | [[06/22/2019 02:28:35 PM]] top_3_accuracy: 95.40%
106 | [[06/22/2019 02:28:35 PM]] Snapshot metric -0.80600000
107 | [[06/22/2019 02:28:35 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.80600000_1407.pth...
108 | [[06/22/2019 02:28:36 PM]] New low
109 | 
110 | [[06/22/2019 02:28:50 PM]] Step 1440: train 0.748868 lr: 5.656e-04
111 | [[06/22/2019 02:29:08 PM]] Step 1480: train 0.772972 lr: 4.966e-04
112 | [[06/22/2019 02:29:26 PM]] Step 1520: train 0.767017 lr: 4.307e-04
113 | [[06/22/2019 02:29:43 PM]] Step 1560: train 0.733338 lr: 3.683e-04
114 | [[06/22/2019 02:30:01 PM]] Step 1600: train 0.687413 lr: 3.097e-04
115 | [[06/22/2019 02:30:07 PM]] Criterion loss: 0.540119
116 | [[06/22/2019 02:30:07 PM]] accuracy: 84.60%
117 | [[06/22/2019 02:30:07 PM]] top_3_accuracy: 94.60%
118 | [[06/22/2019 02:30:07 PM]] Snapshot metric -0.84600000
119 | [[06/22/2019 02:30:07 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84600000_1608.pth...
120 | [[06/22/2019 02:30:07 PM]] New low
121 | 
122 | [[06/22/2019 02:30:07 PM]] ====================Epoch 5====================
123 | [[06/22/2019 02:30:22 PM]] Step 1640: train 0.686983 lr: 2.553e-04
124 | [[06/22/2019 02:30:39 PM]] Step 1680: train 0.700227 lr: 2.055e-04
125 | [[06/22/2019 02:30:56 PM]] Step 1720: train 0.620291 lr: 1.605e-04
126 | [[06/22/2019 02:31:14 PM]] Step 1760: train 0.587356 lr: 1.206e-04
127 | [[06/22/2019 02:31:31 PM]] Step 1800: train 0.602121 lr: 8.614e-05
128 | [[06/22/2019 02:31:38 PM]] Criterion loss: 0.498786
129 | [[06/22/2019 02:31:38 PM]] accuracy: 84.60%
130 | [[06/22/2019 02:31:38 PM]] top_3_accuracy: 95.80%
131 | [[06/22/2019 02:31:38 PM]] Snapshot metric -0.84600000
132 | [[06/22/2019 02:31:38 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84600000_1809.pth...
133 | [[06/22/2019 02:31:52 PM]] Step 1840: train 0.621550 lr: 5.723e-05
134 | [[06/22/2019 02:32:10 PM]] Step 1880: train 0.559325 lr: 3.407e-05
135 | [[06/22/2019 02:32:28 PM]] Step 1920: train 0.624009 lr: 1.682e-05
136 | [[06/22/2019 02:32:45 PM]] Step 1960: train 0.610800 lr: 5.560e-06
137 | [[06/22/2019 02:33:03 PM]] Step 2000: train 0.585742 lr: 3.740e-07
138 | [[06/22/2019 02:33:10 PM]] Criterion loss: 0.506756
139 | [[06/22/2019 02:33:10 PM]] accuracy: 83.40%
140 | [[06/22/2019 02:33:10 PM]] top_3_accuracy: 95.80%
141 | [[06/22/2019 02:33:10 PM]] Snapshot metric -0.83400000
142 | [[06/22/2019 02:33:10 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83400000_2010.pth...
143 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/logs/bs64_mixup02_8600.txt:
--------------------------------------------------------------------------------
  1 | [[06/22/2019 02:59:53 PM]] SEED: 231
  2 | [[06/22/2019 02:59:53 PM]] # of parameters: 27,770,943
  3 | [[06/22/2019 02:59:53 PM]] # of trainable parameters: 27,770,943
  4 | [[06/22/2019 02:59:53 PM]] Optimizer Adam (
  5 | Parameter Group 0
  6 |     amsgrad: False
  7 |     betas: (0.9, 0.999)
  8 |     eps: 1e-08
  9 |     initial_lr: 0.005
 10 |     lr: 5e-05
 11 |     weight_decay: 0
 12 | 
 13 | Parameter Group 1
 14 |     amsgrad: False
 15 |     betas: (0.9, 0.999)
 16 |     eps: 1e-08
 17 |     initial_lr: 0.005
 18 |     lr: 5e-05
 19 |     weight_decay: 0
 20 | )
 21 | [[06/22/2019 02:59:53 PM]] Batches per epoch: 201
 22 | [[06/22/2019 02:59:53 PM]] ====================Epoch 1====================
 23 | [[06/22/2019 03:00:10 PM]] Step 20: train 3.708626 lr: 4.933e-04
 24 | [[06/22/2019 03:00:27 PM]] Step 40: train 2.107206 lr: 9.858e-04
 25 | [[06/22/2019 03:00:43 PM]] Step 60: train 2.039582 lr: 1.478e-03
 26 | [[06/22/2019 03:01:00 PM]] Step 80: train 1.885562 lr: 1.971e-03
 27 | [[06/22/2019 03:01:16 PM]] Step 100: train 1.781235 lr: 2.463e-03
 28 | [[06/22/2019 03:01:19 PM]] Criterion loss: 3.208526
 29 | [[06/22/2019 03:01:19 PM]] accuracy: 31.00%
 30 | [[06/22/2019 03:01:19 PM]] top_3_accuracy: 51.40%
 31 | [[06/22/2019 03:01:19 PM]] Snapshot metric -0.31000000
 32 | [[06/22/2019 03:01:19 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.31000000_100.pth...
 33 | [[06/22/2019 03:01:19 PM]] New low
 34 | 
 35 | [[06/22/2019 03:01:36 PM]] Step 120: train 1.711841 lr: 2.956e-03
 36 | [[06/22/2019 03:01:52 PM]] Step 140: train 1.779275 lr: 3.449e-03
 37 | [[06/22/2019 03:02:09 PM]] Step 160: train 1.782424 lr: 3.941e-03
 38 | [[06/22/2019 03:02:25 PM]] Step 180: train 1.643625 lr: 4.434e-03
 39 | [[06/22/2019 03:02:42 PM]] Step 200: train 1.690528 lr: 4.926e-03
 40 | [[06/22/2019 03:02:45 PM]] Criterion loss: 1.394506
 41 | [[06/22/2019 03:02:45 PM]] accuracy: 55.80%
 42 | [[06/22/2019 03:02:45 PM]] top_3_accuracy: 81.20%
 43 | [[06/22/2019 03:02:45 PM]] Snapshot metric -0.55800000
 44 | [[06/22/2019 03:02:45 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.55800000_200.pth...
 45 | [[06/22/2019 03:02:45 PM]] New low
 46 | 
 47 | [[06/22/2019 03:02:46 PM]] ====================Epoch 2====================
 48 | [[06/22/2019 03:03:02 PM]] Step 220: train 1.586219 lr: 4.996e-03
 49 | [[06/22/2019 03:03:19 PM]] Step 240: train 1.540666 lr: 4.977e-03
 50 | [[06/22/2019 03:03:35 PM]] Step 260: train 1.584697 lr: 4.942e-03
 51 | [[06/22/2019 03:03:52 PM]] Step 280: train 1.529005 lr: 4.893e-03
 52 | [[06/22/2019 03:04:09 PM]] Step 300: train 1.468388 lr: 4.830e-03
 53 | [[06/22/2019 03:04:11 PM]] Criterion loss: 1.253831
 54 | [[06/22/2019 03:04:11 PM]] accuracy: 56.60%
 55 | [[06/22/2019 03:04:11 PM]] top_3_accuracy: 86.80%
 56 | [[06/22/2019 03:04:11 PM]] Snapshot metric -0.56600000
 57 | [[06/22/2019 03:04:11 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.56600000_300.pth...
 58 | [[06/22/2019 03:04:12 PM]] New low
 59 | 
 60 | [[06/22/2019 03:04:28 PM]] Step 320: train 1.420705 lr: 4.752e-03
 61 | [[06/22/2019 03:04:45 PM]] Step 340: train 1.461756 lr: 4.660e-03
 62 | [[06/22/2019 03:05:01 PM]] Step 360: train 1.431700 lr: 4.555e-03
 63 | [[06/22/2019 03:05:18 PM]] Step 380: train 1.399826 lr: 4.438e-03
 64 | [[06/22/2019 03:05:34 PM]] Step 400: train 1.338424 lr: 4.309e-03
 65 | [[06/22/2019 03:05:37 PM]] Criterion loss: 1.110560
 66 | [[06/22/2019 03:05:37 PM]] accuracy: 63.80%
 67 | [[06/22/2019 03:05:37 PM]] top_3_accuracy: 88.00%
 68 | [[06/22/2019 03:05:37 PM]] Snapshot metric -0.63800000
 69 | [[06/22/2019 03:05:37 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.63800000_400.pth...
 70 | [[06/22/2019 03:05:37 PM]] New low
 71 | 
 72 | [[06/22/2019 03:05:39 PM]] ====================Epoch 3====================
 73 | [[06/22/2019 03:05:54 PM]] Step 420: train 1.360706 lr: 4.168e-03
 74 | [[06/22/2019 03:06:11 PM]] Step 440: train 1.345563 lr: 4.018e-03
 75 | [[06/22/2019 03:06:27 PM]] Step 460: train 1.260429 lr: 3.858e-03
 76 | [[06/22/2019 03:06:44 PM]] Step 480: train 1.311192 lr: 3.690e-03
 77 | [[06/22/2019 03:07:00 PM]] Step 500: train 1.265890 lr: 3.515e-03
 78 | [[06/22/2019 03:07:03 PM]] Criterion loss: 0.843474
 79 | [[06/22/2019 03:07:03 PM]] accuracy: 71.80%
 80 | [[06/22/2019 03:07:03 PM]] top_3_accuracy: 92.00%
 81 | [[06/22/2019 03:07:03 PM]] Snapshot metric -0.71800000
 82 | [[06/22/2019 03:07:03 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.71800000_500.pth...
 83 | [[06/22/2019 03:07:03 PM]] New low
 84 | 
 85 | [[06/22/2019 03:07:20 PM]] Step 520: train 1.227812 lr: 3.334e-03
 86 | [[06/22/2019 03:07:36 PM]] Step 540: train 1.289461 lr: 3.147e-03
 87 | [[06/22/2019 03:07:53 PM]] Step 560: train 1.262719 lr: 2.957e-03
 88 | [[06/22/2019 03:08:10 PM]] Step 580: train 1.117412 lr: 2.763e-03
 89 | [[06/22/2019 03:08:27 PM]] Step 600: train 1.164743 lr: 2.568e-03
 90 | [[06/22/2019 03:08:29 PM]] Criterion loss: 0.726492
 91 | [[06/22/2019 03:08:29 PM]] accuracy: 76.80%
 92 | [[06/22/2019 03:08:29 PM]] top_3_accuracy: 94.00%
 93 | [[06/22/2019 03:08:29 PM]] Snapshot metric -0.76800000
 94 | [[06/22/2019 03:08:29 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.76800000_600.pth...
 95 | [[06/22/2019 03:08:30 PM]] New low
 96 | 
 97 | [[06/22/2019 03:08:32 PM]] ====================Epoch 4====================
 98 | [[06/22/2019 03:08:47 PM]] Step 620: train 1.115039 lr: 2.373e-03
 99 | [[06/22/2019 03:09:04 PM]] Step 640: train 1.121448 lr: 2.179e-03
100 | [[06/22/2019 03:09:21 PM]] Step 660: train 1.164681 lr: 1.986e-03
101 | [[06/22/2019 03:09:38 PM]] Step 680: train 1.079892 lr: 1.797e-03
102 | [[06/22/2019 03:09:55 PM]] Step 700: train 1.058773 lr: 1.611e-03
103 | [[06/22/2019 03:09:58 PM]] Criterion loss: 0.941993
104 | [[06/22/2019 03:09:58 PM]] accuracy: 68.60%
105 | [[06/22/2019 03:09:58 PM]] top_3_accuracy: 90.80%
106 | [[06/22/2019 03:09:58 PM]] Snapshot metric -0.68600000
107 | [[06/22/2019 03:09:58 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.68600000_700.pth...
108 | [[06/22/2019 03:10:15 PM]] Step 720: train 1.053417 lr: 1.432e-03
109 | [[06/22/2019 03:10:33 PM]] Step 740: train 1.014899 lr: 1.258e-03
110 | [[06/22/2019 03:10:49 PM]] Step 760: train 1.034633 lr: 1.093e-03
111 | [[06/22/2019 03:11:06 PM]] Step 780: train 1.066577 lr: 9.358e-04
112 | [[06/22/2019 03:11:22 PM]] Step 800: train 0.943729 lr: 7.883e-04
113 | [[06/22/2019 03:11:25 PM]] Criterion loss: 0.548378
114 | [[06/22/2019 03:11:25 PM]] accuracy: 83.80%
115 | [[06/22/2019 03:11:25 PM]] top_3_accuracy: 94.80%
116 | [[06/22/2019 03:11:25 PM]] Snapshot metric -0.83800000
117 | [[06/22/2019 03:11:25 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83800000_800.pth...
118 | [[06/22/2019 03:11:25 PM]] New low
119 | 
120 | [[06/22/2019 03:11:29 PM]] ====================Epoch 5====================
121 | [[06/22/2019 03:11:42 PM]] Step 820: train 0.915435 lr: 6.513e-04
122 | [[06/22/2019 03:11:59 PM]] Step 840: train 0.926835 lr: 5.256e-04
123 | [[06/22/2019 03:12:15 PM]] Step 860: train 0.959245 lr: 4.119e-04
124 | [[06/22/2019 03:12:32 PM]] Step 880: train 0.954981 lr: 3.109e-04
125 | [[06/22/2019 03:12:49 PM]] Step 900: train 0.939287 lr: 2.233e-04
126 | [[06/22/2019 03:12:51 PM]] Criterion loss: 0.518281
127 | [[06/22/2019 03:12:51 PM]] accuracy: 84.80%
128 | [[06/22/2019 03:12:51 PM]] top_3_accuracy: 95.80%
129 | [[06/22/2019 03:12:51 PM]] Snapshot metric -0.84800000
130 | [[06/22/2019 03:12:51 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84800000_900.pth...
131 | [[06/22/2019 03:12:51 PM]] New low
132 | 
133 | [[06/22/2019 03:13:08 PM]] Step 920: train 0.919005 lr: 1.497e-04
134 | [[06/22/2019 03:13:25 PM]] Step 940: train 0.895367 lr: 9.032e-05
135 | [[06/22/2019 03:13:41 PM]] Step 960: train 0.936508 lr: 4.568e-05
136 | [[06/22/2019 03:13:58 PM]] Step 980: train 0.939540 lr: 1.603e-05
137 | [[06/22/2019 03:14:14 PM]] Step 1000: train 0.933365 lr: 1.546e-06
138 | [[06/22/2019 03:14:17 PM]] Criterion loss: 0.484089
139 | [[06/22/2019 03:14:17 PM]] accuracy: 86.00%
140 | [[06/22/2019 03:14:17 PM]] top_3_accuracy: 95.40%
141 | [[06/22/2019 03:14:17 PM]] Snapshot metric -0.86000000
142 | [[06/22/2019 03:14:17 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.86000000_1000.pth...
143 | [[06/22/2019 03:14:17 PM]] New low
144 | 
145 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/logs/bs64_8680.txt:
--------------------------------------------------------------------------------
  1 | [[06/22/2019 02:41:28 PM]] SEED: 231
  2 | [[06/22/2019 02:41:28 PM]] # of parameters: 27,770,943
  3 | [[06/22/2019 02:41:28 PM]] # of trainable parameters: 27,770,943
  4 | [[06/22/2019 02:41:28 PM]] Optimizer Adam (
  5 | Parameter Group 0
  6 |     amsgrad: False
  7 |     betas: (0.9, 0.999)
  8 |     eps: 1e-08
  9 |     initial_lr: 0.005
 10 |     lr: 5e-05
 11 |     weight_decay: 0
 12 | 
 13 | Parameter Group 1
 14 |     amsgrad: False
 15 |     betas: (0.9, 0.999)
 16 |     eps: 1e-08
 17 |     initial_lr: 0.005
 18 |     lr: 5e-05
 19 |     weight_decay: 0
 20 | )
 21 | [[06/22/2019 02:41:28 PM]] Batches per epoch: 201
 22 | [[06/22/2019 02:41:28 PM]] ====================Epoch 1====================
 23 | [[06/22/2019 02:41:45 PM]] Step 20: train 3.597263 lr: 4.933e-04
 24 | [[06/22/2019 02:42:01 PM]] Step 40: train 2.026338 lr: 9.858e-04
 25 | [[06/22/2019 02:42:18 PM]] Step 60: train 1.883784 lr: 1.478e-03
 26 | [[06/22/2019 02:42:34 PM]] Step 80: train 1.725523 lr: 1.971e-03
 27 | [[06/22/2019 02:42:50 PM]] Step 100: train 1.609397 lr: 2.463e-03
 28 | [[06/22/2019 02:42:53 PM]] Criterion loss: 2.403526
 29 | [[06/22/2019 02:42:53 PM]] accuracy: 34.40%
 30 | [[06/22/2019 02:42:53 PM]] top_3_accuracy: 63.00%
 31 | [[06/22/2019 02:42:53 PM]] Snapshot metric -0.34400000
 32 | [[06/22/2019 02:42:53 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.34400000_100.pth...
 33 | [[06/22/2019 02:42:53 PM]] New low
 34 | 
 35 | [[06/22/2019 02:43:10 PM]] Step 120: train 1.539507 lr: 2.956e-03
 36 | [[06/22/2019 02:43:26 PM]] Step 140: train 1.656980 lr: 3.449e-03
 37 | [[06/22/2019 02:43:43 PM]] Step 160: train 1.592132 lr: 3.941e-03
 38 | [[06/22/2019 02:43:59 PM]] Step 180: train 1.371064 lr: 4.434e-03
 39 | [[06/22/2019 02:44:16 PM]] Step 200: train 1.439013 lr: 4.926e-03
 40 | [[06/22/2019 02:44:18 PM]] Criterion loss: 1.570898
 41 | [[06/22/2019 02:44:18 PM]] accuracy: 54.20%
 42 | [[06/22/2019 02:44:18 PM]] top_3_accuracy: 79.20%
 43 | [[06/22/2019 02:44:18 PM]] Snapshot metric -0.54200000
 44 | [[06/22/2019 02:44:18 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.54200000_200.pth...
 45 | [[06/22/2019 02:44:19 PM]] New low
 46 | 
 47 | [[06/22/2019 02:44:19 PM]] ====================Epoch 2====================
 48 | [[06/22/2019 02:44:36 PM]] Step 220: train 1.482403 lr: 4.996e-03
 49 | [[06/22/2019 02:44:52 PM]] Step 240: train 1.352111 lr: 4.977e-03
 50 | [[06/22/2019 02:45:08 PM]] Step 260: train 1.351472 lr: 4.942e-03
 51 | [[06/22/2019 02:45:25 PM]] Step 280: train 1.337714 lr: 4.893e-03
 52 | [[06/22/2019 02:45:41 PM]] Step 300: train 1.190638 lr: 4.830e-03
 53 | [[06/22/2019 02:45:44 PM]] Criterion loss: 1.551471
 54 | [[06/22/2019 02:45:44 PM]] accuracy: 57.20%
 55 | [[06/22/2019 02:45:44 PM]] top_3_accuracy: 84.60%
 56 | [[06/22/2019 02:45:44 PM]] Snapshot metric -0.57200000
 57 | [[06/22/2019 02:45:44 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.57200000_300.pth...
 58 | [[06/22/2019 02:45:44 PM]] New low
 59 | 
 60 | [[06/22/2019 02:46:01 PM]] Step 320: train 1.171512 lr: 4.752e-03
 61 | [[06/22/2019 02:46:17 PM]] Step 340: train 1.148336 lr: 4.660e-03
 62 | [[06/22/2019 02:46:34 PM]] Step 360: train 1.125046 lr: 4.555e-03
 63 | [[06/22/2019 02:46:52 PM]] Step 380: train 1.155357 lr: 4.438e-03
 64 | [[06/22/2019 02:47:10 PM]] Step 400: train 1.104876 lr: 4.309e-03
 65 | [[06/22/2019 02:47:13 PM]] Criterion loss: 1.028381
 66 | [[06/22/2019 02:47:13 PM]] accuracy: 68.40%
 67 | [[06/22/2019 02:47:13 PM]] top_3_accuracy: 89.60%
 68 | [[06/22/2019 02:47:13 PM]] Snapshot metric -0.68400000
 69 | [[06/22/2019 02:47:13 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.68400000_400.pth...
 70 | [[06/22/2019 02:47:13 PM]] New low
 71 | 
 72 | [[06/22/2019 02:47:15 PM]] ====================Epoch 3====================
 73 | [[06/22/2019 02:47:31 PM]] Step 420: train 1.110575 lr: 4.168e-03
 74 | [[06/22/2019 02:47:49 PM]] Step 440: train 1.051789 lr: 4.018e-03
 75 | [[06/22/2019 02:48:06 PM]] Step 460: train 0.986835 lr: 3.858e-03
 76 | [[06/22/2019 02:48:24 PM]] Step 480: train 1.029533 lr: 3.690e-03
 77 | [[06/22/2019 02:48:41 PM]] Step 500: train 0.950887 lr: 3.515e-03
 78 | [[06/22/2019 02:48:44 PM]] Criterion loss: 0.888465
 79 | [[06/22/2019 02:48:44 PM]] accuracy: 69.60%
 80 | [[06/22/2019 02:48:44 PM]] top_3_accuracy: 91.80%
 81 | [[06/22/2019 02:48:44 PM]] Snapshot metric -0.69600000
 82 | [[06/22/2019 02:48:44 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.69600000_500.pth...
 83 | [[06/22/2019 02:48:45 PM]] New low
 84 | 
 85 | [[06/22/2019 02:49:03 PM]] Step 520: train 0.961737 lr: 3.334e-03
 86 | [[06/22/2019 02:49:20 PM]] Step 540: train 0.851117 lr: 3.147e-03
 87 | [[06/22/2019 02:49:38 PM]] Step 560: train 0.842348 lr: 2.957e-03
 88 | [[06/22/2019 02:49:57 PM]] Step 580: train 0.833667 lr: 2.763e-03
 89 | [[06/22/2019 02:50:14 PM]] Step 600: train 0.892872 lr: 2.568e-03
 90 | [[06/22/2019 02:50:17 PM]] Criterion loss: 0.726943
 91 | [[06/22/2019 02:50:17 PM]] accuracy: 78.40%
 92 | [[06/22/2019 02:50:17 PM]] top_3_accuracy: 93.20%
 93 | [[06/22/2019 02:50:17 PM]] Snapshot metric -0.78400000
 94 | [[06/22/2019 02:50:17 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.78400000_600.pth...
 95 | [[06/22/2019 02:50:18 PM]] New low
 96 | 
 97 | [[06/22/2019 02:50:21 PM]] ====================Epoch 4====================
 98 | [[06/22/2019 02:50:36 PM]] Step 620: train 0.784497 lr: 2.373e-03
 99 | [[06/22/2019 02:50:54 PM]] Step 640: train 0.796126 lr: 2.179e-03
100 | [[06/22/2019 02:51:12 PM]] Step 660: train 0.752741 lr: 1.986e-03
101 | [[06/22/2019 02:51:30 PM]] Step 680: train 0.773949 lr: 1.797e-03
102 | [[06/22/2019 02:51:48 PM]] Step 700: train 0.780226 lr: 1.611e-03
103 | [[06/22/2019 02:51:51 PM]] Criterion loss: 0.588716
104 | [[06/22/2019 02:51:51 PM]] accuracy: 81.40%
105 | [[06/22/2019 02:51:51 PM]] top_3_accuracy: 96.00%
106 | [[06/22/2019 02:51:51 PM]] Snapshot metric -0.81400000
107 | [[06/22/2019 02:51:51 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.81400000_700.pth...
108 | [[06/22/2019 02:51:51 PM]] New low
109 | 
110 | [[06/22/2019 02:52:09 PM]] Step 720: train 0.763110 lr: 1.432e-03
111 | [[06/22/2019 02:52:27 PM]] Step 740: train 0.733605 lr: 1.258e-03
112 | [[06/22/2019 02:52:44 PM]] Step 760: train 0.665256 lr: 1.093e-03
113 | [[06/22/2019 02:53:02 PM]] Step 780: train 0.687970 lr: 9.358e-04
114 | [[06/22/2019 02:53:19 PM]] Step 800: train 0.635656 lr: 7.883e-04
115 | [[06/22/2019 02:53:21 PM]] Criterion loss: 0.513557
116 | [[06/22/2019 02:53:21 PM]] accuracy: 83.40%
117 | [[06/22/2019 02:53:22 PM]] top_3_accuracy: 95.80%
118 | [[06/22/2019 02:53:22 PM]] Snapshot metric -0.83400000
119 | [[06/22/2019 02:53:22 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83400000_800.pth...
120 | [[06/22/2019 02:53:22 PM]] New low
121 | 
122 | [[06/22/2019 02:53:25 PM]] ====================Epoch 5====================
123 | [[06/22/2019 02:53:40 PM]] Step 820: train 0.626556 lr: 6.513e-04
124 | [[06/22/2019 02:53:57 PM]] Step 840: train 0.600769 lr: 5.256e-04
125 | [[06/22/2019 02:54:15 PM]] Step 860: train 0.598774 lr: 4.119e-04
126 | [[06/22/2019 02:54:33 PM]] Step 880: train 0.527382 lr: 3.109e-04
127 | [[06/22/2019 02:54:50 PM]] Step 900: train 0.511858 lr: 2.233e-04
128 | [[06/22/2019 02:54:53 PM]] Criterion loss: 0.456041
129 | [[06/22/2019 02:54:53 PM]] accuracy: 85.80%
130 | [[06/22/2019 02:54:53 PM]] top_3_accuracy: 96.80%
131 | [[06/22/2019 02:54:53 PM]] Snapshot metric -0.85800000
132 | [[06/22/2019 02:54:53 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85800000_900.pth...
133 | [[06/22/2019 02:54:53 PM]] New low
134 | 
135 | [[06/22/2019 02:55:10 PM]] Step 920: train 0.606073 lr: 1.497e-04
136 | [[06/22/2019 02:55:28 PM]] Step 940: train 0.585275 lr: 9.032e-05
137 | [[06/22/2019 02:55:46 PM]] Step 960: train 0.676848 lr: 4.568e-05
138 | [[06/22/2019 02:56:04 PM]] Step 980: train 0.553108 lr: 1.603e-05
139 | [[06/22/2019 02:56:22 PM]] Step 1000: train 0.535630 lr: 1.546e-06
140 | [[06/22/2019 02:56:25 PM]] Criterion loss: 0.430974
141 | [[06/22/2019 02:56:25 PM]] accuracy: 86.80%
142 | [[06/22/2019 02:56:25 PM]] top_3_accuracy: 96.80%
143 | [[06/22/2019 02:56:25 PM]] Snapshot metric -0.86800000
144 | [[06/22/2019 02:56:25 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.86800000_1000.pth...
145 | [[06/22/2019 02:56:25 PM]] New low
146 | 
147 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/helperbot/bot.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import logging
  4 | from pathlib import Path
  5 | from collections import deque
  6 | from typing import List, Tuple, Iterable, Optional, Union, Sequence
  7 | from dataclasses import dataclass, field
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | from torch.nn.utils.clip_grad import clip_grad_norm_
 12 | from tqdm import tqdm
 13 | 
 14 | from .logger import Logger
 15 | from .metrics import Metric
 16 | 
 17 | try:
 18 |     from apex import amp
 19 |     APEX_AVAILABLE = True
 20 | except ModuleNotFoundError:
 21 |     APEX_AVAILABLE = False
 22 | 
 23 | SEED = int(os.environ.get("SEED", 9293))
 24 | 
 25 | random.seed(SEED)
 26 | np.random.seed(SEED)
 27 | torch.manual_seed(SEED)
 28 | torch.cuda.manual_seed(SEED)
 29 | 
 30 | 
 31 | @dataclass
 32 | class BaseBot:
 33 |     """Base Interface to Model Training and Inference"""
 34 |     train_loader: Iterable
 35 |     val_loader: Iterable
 36 |     avg_window: int
 37 |     criterion: object
 38 |     model: torch.nn.Module
 39 |     optimizer: torch.optim.Optimizer
 40 |     name: str = "basebot"
 41 |     use_amp: bool = False
 42 |     clip_grad: float = 0
 43 |     batch_idx: int = 0
 44 |     checkpoint_dir: Path = Path("./data/cache/model_cache/")
 45 |     device: Union[str, torch.device] = "cuda:0"
 46 |     log_dir: Path = Path("./data/cache/logs/")
 47 |     log_level: int = logging.INFO
 48 |     loss_format: str = "%.8f"
 49 |     metric_format: Optional[str] = None
 50 |     use_tensorboard: bool = False
 51 |     gradient_accumulation_steps: int = 1
 52 |     echo: bool = True
 53 |     step: int = 0
 54 |     best_performers: List[Tuple] = field(init=False)
 55 |     train_losses: deque = field(init=False)
 56 |     train_weights: deque = field(init=False)
 57 |     metrics: Sequence = ()
 58 |     callbacks: Sequence = ()
 59 |     monitor_metric: str = "loss"
 60 |     pbar: bool = False
 61 | 
 62 |     def __post_init__(self):
 63 |         assert (self.use_amp and APEX_AVAILABLE) or (not self.use_amp)
 64 |         self.logger = Logger(
 65 |             self.name, str(self.log_dir), self.log_level,
 66 |             use_tensorboard=self.use_tensorboard, echo=self.echo)
 67 |         self.logger.info("SEED: %s", SEED)
 68 |         self.checkpoint_dir.mkdir(exist_ok=True, parents=True)
 69 |         self.best_performers: List[Tuple] = []
 70 |         if self.metric_format is None:
 71 |             self.metric_format = self.loss_format
 72 |         self.train_losses = deque(maxlen=self.avg_window)
 73 |         self.train_weights = deque(maxlen=self.avg_window)
 74 |         self.count_model_parameters()
 75 | 
 76 |     def count_model_parameters(self):
 77 |         self.logger.info(
 78 |             "# of parameters: {:,d}".format(
 79 |                 np.sum(list(p.numel() for p in self.model.parameters()))))
 80 |         self.logger.info(
 81 |             "# of trainable parameters: {:,d}".format(
 82 |                 np.sum(list(p.numel() for p in self.model.parameters() if p.requires_grad))))
 83 | 
 84 |     def train_one_step(self, input_tensors, target):
 85 |         self.model.train()
 86 |         assert self.model.training
 87 |         output = self.model(*input_tensors)
 88 |         batch_loss = self.criterion(
 89 |             self.extract_prediction(output), target
 90 |         ) / self.gradient_accumulation_steps
 91 |         if self.use_amp:
 92 |             with amp.scale_loss(batch_loss, self.optimizer) as scaled_loss:
 93 |                 scaled_loss.backward()
 94 |         else:
 95 |             batch_loss.backward()
 96 |         self.train_losses.append(
 97 |             batch_loss.data.cpu().numpy() * self.gradient_accumulation_steps)
 98 |         self.train_weights.append(input_tensors[0].size(self.batch_idx))
 99 |         if self.clip_grad > 0:
100 |             if not self.use_amp:
101 |                 clip_grad_norm_(self.model.parameters(), self.clip_grad)
102 |             else:
103 |                 clip_grad_norm_(amp.master_params(
104 |                     self.optimizer), self.clip_grad)
105 |         if self.step % self.gradient_accumulation_steps == 0:
106 |             self.optimizer.step()
107 |             self.optimizer.zero_grad()
108 | 
109 |     def log_progress(self):
110 |         train_loss_avg = np.average(
111 |             self.train_losses, weights=self.train_weights)
112 |         self.logger.info(
113 |             "Step %s: train %.6f lr: %.3e",
114 |             self.step, train_loss_avg, self.optimizer.param_groups[-1]['lr'])
115 |         self.logger.tb_scalars(
116 |             "lr", self.optimizer.param_groups[0]['lr'], self.step)
117 |         self.logger.tb_scalars(
118 |             "losses", {"train": train_loss_avg}, self.step)
119 | 
120 |     def snapshot(self):
121 |         metrics = self.eval(self.val_loader)
122 |         target_metric = metrics[self.monitor_metric]
123 |         metric_str = self.metric_format % target_metric
124 |         self.logger.info("Snapshot metric %s", metric_str)
125 |         self.logger.tb_scalars(
126 |             "losses", {"val": metrics["loss"]},  self.step)
127 |         self.logger.tb_scalars(
128 |             "monitor_metric", {"val": target_metric},  self.step)
129 |         target_path = (
130 |             self.checkpoint_dir /
131 |             "snapshot_{}_{}_{}.pth".format(self.name, metric_str, self.step))
132 |         self.best_performers.append((target_metric, target_path, self.step))
133 |         self.best_performers = sorted(self.best_performers, key=lambda x: x[0])
134 |         self.logger.info("Saving checkpoint %s...", target_path)
135 |         torch.save(self.model.state_dict(), target_path)
136 |         assert Path(target_path).exists()
137 |         return target_metric
138 | 
139 |     @staticmethod
140 |     def extract_prediction(output):
141 |         """Assumes single output"""
142 |         return output[:, 0]
143 | 
144 |     @staticmethod
145 |     def transform_prediction(prediction):
146 |         return prediction
147 | 
148 |     def run_batch_inputs_callbacks(self, input_tensors, targets):
149 |         for callback in self.callbacks:
150 |             input_tensors, targets = callback.on_batch_inputs(
151 |                 self, input_tensors, targets)
152 |         return input_tensors, targets
153 | 
154 |     def run_step_ends_callbacks(self):
155 |         for callback in self.callbacks:
156 |             callback.on_step_ends(self)
157 | 
158 |     def run_epoch_ends_callbacks(self, epoch):
159 |         for callback in self.callbacks:
160 |             callback.on_epoch_ends(self, epoch)
161 | 
162 |     def train(
163 |             self, n_steps, *, log_interval=50,
164 |             early_stopping_cnt=0, min_improv=1e-4,
165 |             snapshot_interval=2500, keep_n_snapshots=-1):
166 |         self.optimizer.zero_grad()
167 |         if self.val_loader is not None:
168 |             best_val_loss = 100
169 |         epoch = 0
170 |         wo_improvement = 0
171 |         self.best_performers = []
172 |         self.logger.info(
173 |             "Optimizer {}".format(str(self.optimizer)))
174 |         self.logger.info("Batches per epoch: {}".format(
175 |             len(self.train_loader)))
176 |         try:
177 |             while self.step < n_steps:
178 |                 epoch += 1
179 |                 self.logger.info(
180 |                     "=" * 20 + "Epoch %d" + "=" * 20, epoch)
181 |                 for *input_tensors, targets in self.train_loader:
182 |                     input_tensors = [x.to(self.device) for x in input_tensors]
183 |                     targets = targets.to(self.device)
184 |                     input_tensors, targets = self.run_batch_inputs_callbacks(
185 |                         input_tensors, targets)
186 |                     self.train_one_step(input_tensors, targets)
187 |                     self.step += 1
188 |                     if self.step % log_interval == 0:
189 |                         self.log_progress()
190 |                     if ((callable(snapshot_interval) and snapshot_interval(self.step))
191 |                             or (not callable(snapshot_interval) and self.step % snapshot_interval == 0)):
192 |                         loss = self.snapshot()
193 |                         if best_val_loss > loss + min_improv:
194 |                             self.logger.info("New low\n")
195 |                             best_val_loss = loss
196 |                             wo_improvement = 0
197 |                         else:
198 |                             wo_improvement += 1
199 |                         if keep_n_snapshots > 0:
200 |                             self.remove_checkpoints(keep=keep_n_snapshots)
201 |                     self.run_step_ends_callbacks()
202 |                     if early_stopping_cnt and wo_improvement > early_stopping_cnt:
203 |                         return
204 |                     if self.step >= n_steps:
205 |                         break
206 |                 self.run_epoch_ends_callbacks(epoch + 1)
207 |         except KeyboardInterrupt:
208 |             pass
209 | 
210 |     def eval(self, loader):
211 |         """Warning: Only support datasets whose predictions and labels fit in memory together."""
212 |         self.model.eval()
213 |         preds, ys = [], []
214 |         losses, weights = [], []
215 |         self.logger.debug("Evaluating...")
216 |         with torch.set_grad_enabled(False):
217 |             for *input_tensors, y_local in tqdm(loader, disable=not self.pbar):
218 |                 input_tensors = [x.to(self.device) for x in input_tensors]
219 |                 output = self.extract_prediction(self.model(*input_tensors))
220 |                 batch_loss = self.criterion(
221 |                     output, y_local.to(self.device))
222 |                 losses.append(batch_loss.data.cpu().item())
223 |                 weights.append(y_local.size(self.batch_idx))
224 |                 # Save batch labels and predictions
225 |                 preds.append(output.cpu())
226 |                 ys.append(y_local.cpu())
227 |         loss = np.average(losses, weights=weights)
228 |         self.logger.info("Criterion loss: {}".format(self.loss_format % loss))
229 |         metrics = {"loss": loss}
230 |         global_ys, global_preds = torch.cat(ys), torch.cat(preds)
231 |         for metric in self.metrics:
232 |             metric_loss, metric_string = metric(global_ys, global_preds)
233 |             metrics[metric.name] = metric_loss
234 |             self.logger.info(f"{metric.name}: {metric_string}")
235 |         return metrics
236 | 
237 |     def predict_batch(self, input_tensors):
238 |         self.model.eval()
239 |         tmp = self.model(*input_tensors)
240 |         return self.extract_prediction(tmp)
241 | 
242 |     def predict_avg(self, loader, k=8):
243 |         assert len(self.best_performers) >= k
244 |         preds = []
245 |         # Iterating through checkpoints
246 |         for i in range(k):
247 |             target = self.best_performers[i][1]
248 |             self.logger.info("Loading %s", format(target))
249 |             self.load_model(target)
250 |             preds.append(self.predict(loader).unsqueeze(0))
251 |         return torch.cat(preds, dim=0).mean(dim=0)
252 | 
253 |     def predict(self, loader, *, return_y=False):
254 |         self.model.eval()
255 |         outputs, y_global = [], []
256 |         with torch.set_grad_enabled(False):
257 |             for *input_tensors, y_local in tqdm(loader, disable=not self.pbar):
258 |                 input_tensors = [x.to(self.device) for x in input_tensors]
259 |                 outputs.append(self.predict_batch(input_tensors).cpu())
260 |                 if return_y:
261 |                     y_global.append(y_local)
262 |             outputs = torch.cat(outputs, dim=0)
263 |         if return_y:
264 |             y_global = torch.cat(y_global, dim=0)
265 |             return outputs, y_global.cpu()
266 |         return outputs
267 | 
268 |     def remove_checkpoints(self, keep=0):
269 |         for checkpoint in np.unique([x[1] for x in self.best_performers[keep:]]):
270 |             Path(checkpoint).unlink()
271 |         self.best_performers = self.best_performers[:keep]
272 | 
273 |     def load_model(self, target_path):
274 |         self.model.load_state_dict(torch.load(target_path))
275 | 


--------------------------------------------------------------------------------
/imet/adabound.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | from torch.optim import Optimizer
  4 | 
  5 | 
  6 | class AdaBound(Optimizer):
  7 |     """Implements AdaBound algorithm.
  8 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
  9 |     Arguments:
 10 |         params (iterable): iterable of parameters to optimize or dicts defining
 11 |             parameter groups
 12 |         lr (float, optional): Adam learning rate (default: 1e-3)
 13 |         betas (Tuple[float, float], optional): coefficients used for computing
 14 |             running averages of gradient and its square (default: (0.9, 0.999))
 15 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
 16 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
 17 |         eps (float, optional): term added to the denominator to improve
 18 |             numerical stability (default: 1e-8)
 19 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
 20 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
 21 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
 22 |         https://openreview.net/forum?id=Bkg3g2R9FX
 23 |     """
 24 | 
 25 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
 26 |                  eps=1e-8, weight_decay=0, amsbound=False):
 27 |         if not 0.0 <= lr:
 28 |             raise ValueError("Invalid learning rate: {}".format(lr))
 29 |         if not 0.0 <= eps:
 30 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 31 |         if not 0.0 <= betas[0] < 1.0:
 32 |             raise ValueError(
 33 |                 "Invalid beta parameter at index 0: {}".format(betas[0]))
 34 |         if not 0.0 <= betas[1] < 1.0:
 35 |             raise ValueError(
 36 |                 "Invalid beta parameter at index 1: {}".format(betas[1]))
 37 |         if not 0.0 <= final_lr:
 38 |             raise ValueError(
 39 |                 "Invalid final learning rate: {}".format(final_lr))
 40 |         if not 0.0 <= gamma < 1.0:
 41 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
 42 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
 43 |                         weight_decay=weight_decay, amsbound=amsbound)
 44 |         super(AdaBound, self).__init__(params, defaults)
 45 | 
 46 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
 47 | 
 48 |     def __setstate__(self, state):
 49 |         super(AdaBound, self).__setstate__(state)
 50 |         for group in self.param_groups:
 51 |             group.setdefault('amsbound', False)
 52 | 
 53 |     def step(self, closure=None):
 54 |         """Performs a single optimization step.
 55 |         Arguments:
 56 |             closure (callable, optional): A closure that reevaluates the model
 57 |                 and returns the loss.
 58 |         """
 59 |         loss = None
 60 |         if closure is not None:
 61 |             loss = closure()
 62 | 
 63 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
 64 |             for p in group['params']:
 65 |                 if p.grad is None:
 66 |                     continue
 67 |                 grad = p.grad.data
 68 |                 if grad.is_sparse:
 69 |                     raise RuntimeError(
 70 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
 71 |                 amsbound = group['amsbound']
 72 | 
 73 |                 state = self.state[p]
 74 | 
 75 |                 # State initialization
 76 |                 if len(state) == 0:
 77 |                     state['step'] = 0
 78 |                     # Exponential moving average of gradient values
 79 |                     state['exp_avg'] = torch.zeros_like(p.data)
 80 |                     # Exponential moving average of squared gradient values
 81 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
 82 |                     if amsbound:
 83 |                         # Maintains max of all exp. moving avg. of sq. grad. values
 84 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
 85 | 
 86 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
 87 |                 if amsbound:
 88 |                     max_exp_avg_sq = state['max_exp_avg_sq']
 89 |                 beta1, beta2 = group['betas']
 90 | 
 91 |                 state['step'] += 1
 92 | 
 93 |                 if group['weight_decay'] != 0:
 94 |                     grad = grad.add(group['weight_decay'], p.data)
 95 | 
 96 |                 # Decay the first and second moment running average coefficient
 97 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
 98 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
 99 |                 if amsbound:
100 |                     # Maintains the maximum of all 2nd moment running avg. till now
101 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
102 |                     # Use the max. for normalizing running avg. of gradient
103 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
104 |                 else:
105 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
106 | 
107 |                 bias_correction1 = 1 - beta1 ** state['step']
108 |                 bias_correction2 = 1 - beta2 ** state['step']
109 |                 step_size = group['lr'] * \
110 |                     math.sqrt(bias_correction2) / bias_correction1
111 | 
112 |                 # Applies bounds on actual learning rate
113 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
114 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
115 |                 lower_bound = final_lr * \
116 |                     (1 - 1 / (group['gamma'] * state['step'] + 1))
117 |                 upper_bound = final_lr * \
118 |                     (1 + 1 / (group['gamma'] * state['step']))
119 |                 step_size = torch.full_like(denom, step_size)
120 |                 step_size.div_(denom).clamp_(
121 |                     lower_bound, upper_bound).mul_(exp_avg)
122 | 
123 |                 p.data.add_(-step_size)
124 | 
125 |         return loss
126 | 
127 | 
128 | class AdaBoundW(Optimizer):
129 |     """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
130 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
131 |     Arguments:
132 |         params (iterable): iterable of parameters to optimize or dicts defining
133 |             parameter groups
134 |         lr (float, optional): Adam learning rate (default: 1e-3)
135 |         betas (Tuple[float, float], optional): coefficients used for computing
136 |             running averages of gradient and its square (default: (0.9, 0.999))
137 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
138 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
139 |         eps (float, optional): term added to the denominator to improve
140 |             numerical stability (default: 1e-8)
141 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
142 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
143 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
144 |         https://openreview.net/forum?id=Bkg3g2R9FX
145 |     """
146 | 
147 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
148 |                  eps=1e-8, weight_decay=0, amsbound=False):
149 |         if not 0.0 <= lr:
150 |             raise ValueError("Invalid learning rate: {}".format(lr))
151 |         if not 0.0 <= eps:
152 |             raise ValueError("Invalid epsilon value: {}".format(eps))
153 |         if not 0.0 <= betas[0] < 1.0:
154 |             raise ValueError(
155 |                 "Invalid beta parameter at index 0: {}".format(betas[0]))
156 |         if not 0.0 <= betas[1] < 1.0:
157 |             raise ValueError(
158 |                 "Invalid beta parameter at index 1: {}".format(betas[1]))
159 |         if not 0.0 <= final_lr:
160 |             raise ValueError(
161 |                 "Invalid final learning rate: {}".format(final_lr))
162 |         if not 0.0 <= gamma < 1.0:
163 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
164 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
165 |                         weight_decay=weight_decay, amsbound=amsbound)
166 |         super(AdaBoundW, self).__init__(params, defaults)
167 | 
168 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
169 | 
170 |     def __setstate__(self, state):
171 |         super(AdaBoundW, self).__setstate__(state)
172 |         for group in self.param_groups:
173 |             group.setdefault('amsbound', False)
174 | 
175 |     def step(self, closure=None):
176 |         """Performs a single optimization step.
177 |         Arguments:
178 |             closure (callable, optional): A closure that reevaluates the model
179 |                 and returns the loss.
180 |         """
181 |         loss = None
182 |         if closure is not None:
183 |             loss = closure()
184 | 
185 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
186 |             for p in group['params']:
187 |                 if p.grad is None:
188 |                     continue
189 |                 grad = p.grad.data
190 |                 if grad.is_sparse:
191 |                     raise RuntimeError(
192 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
193 |                 amsbound = group['amsbound']
194 | 
195 |                 state = self.state[p]
196 | 
197 |                 # State initialization
198 |                 if len(state) == 0:
199 |                     state['step'] = 0
200 |                     # Exponential moving average of gradient values
201 |                     state['exp_avg'] = torch.zeros_like(p.data)
202 |                     # Exponential moving average of squared gradient values
203 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
204 |                     if amsbound:
205 |                         # Maintains max of all exp. moving avg. of sq. grad. values
206 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
207 | 
208 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
209 |                 if amsbound:
210 |                     max_exp_avg_sq = state['max_exp_avg_sq']
211 |                 beta1, beta2 = group['betas']
212 | 
213 |                 state['step'] += 1
214 | 
215 |                 # Decay the first and second moment running average coefficient
216 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
217 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
218 |                 if amsbound:
219 |                     # Maintains the maximum of all 2nd moment running avg. till now
220 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
221 |                     # Use the max. for normalizing running avg. of gradient
222 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
223 |                 else:
224 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
225 | 
226 |                 bias_correction1 = 1 - beta1 ** state['step']
227 |                 bias_correction2 = 1 - beta2 ** state['step']
228 |                 step_size = group['lr'] * \
229 |                     math.sqrt(bias_correction2) / bias_correction1
230 | 
231 |                 # Applies bounds on actual learning rate
232 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
233 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
234 |                 lower_bound = final_lr * \
235 |                     (1 - 1 / (group['gamma'] * state['step'] + 1))
236 |                 upper_bound = final_lr * \
237 |                     (1 + 1 / (group['gamma'] * state['step']))
238 |                 step_size = torch.full_like(denom, step_size)
239 |                 step_size.div_(denom).clamp_(
240 |                     lower_bound, upper_bound).mul_(exp_avg)
241 | 
242 |                 if group['weight_decay'] != 0:
243 |                     decayed_weights = torch.mul(p.data, group['weight_decay'])
244 |                     p.data.add_(-step_size)
245 |                     p.data.sub_(decayed_weights)
246 |                 else:
247 |                     p.data.add_(-step_size)
248 | 
249 |         return loss
250 | 


--------------------------------------------------------------------------------
/pytorch_helper_bot/examples/imagenette/logs/bs64_e10.txt:
--------------------------------------------------------------------------------
  1 | [[06/22/2019 03:44:34 PM]] SEED: 231
  2 | [[06/22/2019 03:44:34 PM]] # of parameters: 27,770,943
  3 | [[06/22/2019 03:44:34 PM]] # of trainable parameters: 27,770,943
  4 | [[06/22/2019 03:44:34 PM]] Optimizer Adam (
  5 | Parameter Group 0
  6 |     amsgrad: False
  7 |     betas: (0.9, 0.999)
  8 |     eps: 1e-08
  9 |     initial_lr: 0.005
 10 |     lr: 5e-05
 11 |     weight_decay: 0
 12 | 
 13 | Parameter Group 1
 14 |     amsgrad: False
 15 |     betas: (0.9, 0.999)
 16 |     eps: 1e-08
 17 |     initial_lr: 0.005
 18 |     lr: 5e-05
 19 |     weight_decay: 0
 20 | )
 21 | [[06/22/2019 03:44:34 PM]] Batches per epoch: 201
 22 | [[06/22/2019 03:44:34 PM]] ====================Epoch 1====================
 23 | [[06/22/2019 03:44:52 PM]] Step 20: train 3.573733 lr: 4.933e-04
 24 | [[06/22/2019 03:45:09 PM]] Step 40: train 2.030234 lr: 9.858e-04
 25 | [[06/22/2019 03:45:26 PM]] Step 60: train 1.984939 lr: 1.478e-03
 26 | [[06/22/2019 03:45:43 PM]] Step 80: train 1.709919 lr: 1.971e-03
 27 | [[06/22/2019 03:46:00 PM]] Step 100: train 1.608930 lr: 2.463e-03
 28 | [[06/22/2019 03:46:03 PM]] Criterion loss: 2.605317
 29 | [[06/22/2019 03:46:03 PM]] accuracy: 36.40%
 30 | [[06/22/2019 03:46:03 PM]] top_3_accuracy: 60.20%
 31 | [[06/22/2019 03:46:03 PM]] Snapshot metric -0.36400000
 32 | [[06/22/2019 03:46:03 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.36400000_100.pth...
 33 | [[06/22/2019 03:46:03 PM]] New low
 34 | 
 35 | [[06/22/2019 03:46:20 PM]] Step 120: train 1.557482 lr: 2.956e-03
 36 | [[06/22/2019 03:46:36 PM]] Step 140: train 1.548209 lr: 3.449e-03
 37 | [[06/22/2019 03:46:53 PM]] Step 160: train 1.653716 lr: 3.941e-03
 38 | [[06/22/2019 03:47:09 PM]] Step 180: train 1.428034 lr: 4.434e-03
 39 | [[06/22/2019 03:47:26 PM]] Step 200: train 1.459277 lr: 4.926e-03
 40 | [[06/22/2019 03:47:28 PM]] Criterion loss: 1.658659
 41 | [[06/22/2019 03:47:28 PM]] accuracy: 46.00%
 42 | [[06/22/2019 03:47:28 PM]] top_3_accuracy: 77.00%
 43 | [[06/22/2019 03:47:28 PM]] Snapshot metric -0.46000000
 44 | [[06/22/2019 03:47:28 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.46000000_200.pth...
 45 | [[06/22/2019 03:47:29 PM]] New low
 46 | 
 47 | [[06/22/2019 03:47:30 PM]] ====================Epoch 2====================
 48 | [[06/22/2019 03:47:46 PM]] Step 220: train 1.482041 lr: 4.999e-03
 49 | [[06/22/2019 03:48:02 PM]] Step 240: train 1.341844 lr: 4.995e-03
 50 | [[06/22/2019 03:48:19 PM]] Step 260: train 1.359507 lr: 4.989e-03
 51 | [[06/22/2019 03:48:36 PM]] Step 280: train 1.409255 lr: 4.979e-03
 52 | [[06/22/2019 03:48:53 PM]] Step 300: train 1.213519 lr: 4.966e-03
 53 | [[06/22/2019 03:48:55 PM]] Criterion loss: 1.155639
 54 | [[06/22/2019 03:48:55 PM]] accuracy: 63.00%
 55 | [[06/22/2019 03:48:55 PM]] top_3_accuracy: 86.00%
 56 | [[06/22/2019 03:48:55 PM]] Snapshot metric -0.63000000
 57 | [[06/22/2019 03:48:55 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.63000000_300.pth...
 58 | [[06/22/2019 03:48:56 PM]] New low
 59 | 
 60 | [[06/22/2019 03:49:13 PM]] Step 320: train 1.209477 lr: 4.950e-03
 61 | [[06/22/2019 03:49:29 PM]] Step 340: train 1.168014 lr: 4.932e-03
 62 | [[06/22/2019 03:49:46 PM]] Step 360: train 1.130536 lr: 4.910e-03
 63 | [[06/22/2019 03:50:03 PM]] Step 380: train 1.201653 lr: 4.885e-03
 64 | [[06/22/2019 03:50:19 PM]] Step 400: train 1.146011 lr: 4.858e-03
 65 | [[06/22/2019 03:50:22 PM]] Criterion loss: 1.090183
 66 | [[06/22/2019 03:50:22 PM]] accuracy: 64.60%
 67 | [[06/22/2019 03:50:22 PM]] top_3_accuracy: 88.40%
 68 | [[06/22/2019 03:50:22 PM]] Snapshot metric -0.64600000
 69 | [[06/22/2019 03:50:22 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.64600000_400.pth...
 70 | [[06/22/2019 03:50:23 PM]] New low
 71 | 
 72 | [[06/22/2019 03:50:24 PM]] ====================Epoch 3====================
 73 | [[06/22/2019 03:50:40 PM]] Step 420: train 1.210714 lr: 4.828e-03
 74 | [[06/22/2019 03:50:56 PM]] Step 440: train 1.090848 lr: 4.795e-03
 75 | [[06/22/2019 03:51:13 PM]] Step 460: train 1.061501 lr: 4.759e-03
 76 | [[06/22/2019 03:51:30 PM]] Step 480: train 1.065800 lr: 4.720e-03
 77 | [[06/22/2019 03:51:47 PM]] Step 500: train 0.984935 lr: 4.679e-03
 78 | [[06/22/2019 03:51:49 PM]] Criterion loss: 1.451416
 79 | [[06/22/2019 03:51:49 PM]] accuracy: 55.80%
 80 | [[06/22/2019 03:51:49 PM]] top_3_accuracy: 87.00%
 81 | [[06/22/2019 03:51:49 PM]] Snapshot metric -0.55800000
 82 | [[06/22/2019 03:51:49 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.55800000_500.pth...
 83 | [[06/22/2019 03:52:06 PM]] Step 520: train 1.045997 lr: 4.635e-03
 84 | [[06/22/2019 03:52:23 PM]] Step 540: train 0.967297 lr: 4.589e-03
 85 | [[06/22/2019 03:52:40 PM]] Step 560: train 0.921210 lr: 4.540e-03
 86 | [[06/22/2019 03:52:56 PM]] Step 580: train 0.934175 lr: 4.488e-03
 87 | [[06/22/2019 03:53:13 PM]] Step 600: train 0.997390 lr: 4.435e-03
 88 | [[06/22/2019 03:53:15 PM]] Criterion loss: 0.846122
 89 | [[06/22/2019 03:53:15 PM]] accuracy: 71.80%
 90 | [[06/22/2019 03:53:15 PM]] top_3_accuracy: 91.20%
 91 | [[06/22/2019 03:53:15 PM]] Snapshot metric -0.71800000
 92 | [[06/22/2019 03:53:15 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.71800000_600.pth...
 93 | [[06/22/2019 03:53:16 PM]] New low
 94 | 
 95 | [[06/22/2019 03:53:18 PM]] ====================Epoch 4====================
 96 | [[06/22/2019 03:53:33 PM]] Step 620: train 0.912232 lr: 4.378e-03
 97 | [[06/22/2019 03:53:50 PM]] Step 640: train 0.897097 lr: 4.320e-03
 98 | [[06/22/2019 03:54:06 PM]] Step 660: train 0.885259 lr: 4.259e-03
 99 | [[06/22/2019 03:54:23 PM]] Step 680: train 0.898099 lr: 4.197e-03
100 | [[06/22/2019 03:54:40 PM]] Step 700: train 0.952782 lr: 4.132e-03
101 | [[06/22/2019 03:54:43 PM]] Criterion loss: 0.828609
102 | [[06/22/2019 03:54:43 PM]] accuracy: 72.20%
103 | [[06/22/2019 03:54:43 PM]] top_3_accuracy: 91.60%
104 | [[06/22/2019 03:54:43 PM]] Snapshot metric -0.72200000
105 | [[06/22/2019 03:54:43 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.72200000_700.pth...
106 | [[06/22/2019 03:54:43 PM]] New low
107 | 
108 | [[06/22/2019 03:55:00 PM]] Step 720: train 0.877741 lr: 4.065e-03
109 | [[06/22/2019 03:55:16 PM]] Step 740: train 0.926822 lr: 3.996e-03
110 | [[06/22/2019 03:55:33 PM]] Step 760: train 0.826499 lr: 3.926e-03
111 | [[06/22/2019 03:55:50 PM]] Step 780: train 0.945527 lr: 3.854e-03
112 | [[06/22/2019 03:56:07 PM]] Step 800: train 0.872751 lr: 3.780e-03
113 | [[06/22/2019 03:56:09 PM]] Criterion loss: 0.853279
114 | [[06/22/2019 03:56:09 PM]] accuracy: 72.40%
115 | [[06/22/2019 03:56:09 PM]] top_3_accuracy: 91.40%
116 | [[06/22/2019 03:56:09 PM]] Snapshot metric -0.72400000
117 | [[06/22/2019 03:56:09 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.72400000_800.pth...
118 | [[06/22/2019 03:56:10 PM]] New low
119 | 
120 | [[06/22/2019 03:56:13 PM]] ====================Epoch 5====================
121 | [[06/22/2019 03:56:27 PM]] Step 820: train 0.826435 lr: 3.705e-03
122 | [[06/22/2019 03:56:44 PM]] Step 840: train 0.832256 lr: 3.628e-03
123 | [[06/22/2019 03:57:01 PM]] Step 860: train 0.848991 lr: 3.550e-03
124 | [[06/22/2019 03:57:17 PM]] Step 880: train 0.825508 lr: 3.470e-03
125 | [[06/22/2019 03:57:34 PM]] Step 900: train 0.703108 lr: 3.390e-03
126 | [[06/22/2019 03:57:37 PM]] Criterion loss: 0.754616
127 | [[06/22/2019 03:57:37 PM]] accuracy: 75.40%
128 | [[06/22/2019 03:57:37 PM]] top_3_accuracy: 94.60%
129 | [[06/22/2019 03:57:37 PM]] Snapshot metric -0.75400000
130 | [[06/22/2019 03:57:37 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.75400000_900.pth...
131 | [[06/22/2019 03:57:37 PM]] New low
132 | 
133 | [[06/22/2019 03:57:55 PM]] Step 920: train 0.858812 lr: 3.308e-03
134 | [[06/22/2019 03:58:12 PM]] Step 940: train 0.799415 lr: 3.225e-03
135 | [[06/22/2019 03:58:30 PM]] Step 960: train 0.862341 lr: 3.142e-03
136 | [[06/22/2019 03:58:47 PM]] Step 980: train 0.751697 lr: 3.058e-03
137 | [[06/22/2019 03:59:03 PM]] Step 1000: train 0.731585 lr: 2.973e-03
138 | [[06/22/2019 03:59:06 PM]] Criterion loss: 0.707321
139 | [[06/22/2019 03:59:06 PM]] accuracy: 78.20%
140 | [[06/22/2019 03:59:06 PM]] top_3_accuracy: 92.20%
141 | [[06/22/2019 03:59:06 PM]] Snapshot metric -0.78200000
142 | [[06/22/2019 03:59:06 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.78200000_1000.pth...
143 | [[06/22/2019 03:59:06 PM]] New low
144 | 
145 | [[06/22/2019 03:59:10 PM]] ====================Epoch 6====================
146 | [[06/22/2019 03:59:23 PM]] Step 1020: train 0.613325 lr: 2.887e-03
147 | [[06/22/2019 03:59:39 PM]] Step 1040: train 0.756710 lr: 2.801e-03
148 | [[06/22/2019 03:59:56 PM]] Step 1060: train 0.671936 lr: 2.715e-03
149 | [[06/22/2019 04:00:12 PM]] Step 1080: train 0.643940 lr: 2.628e-03
150 | [[06/22/2019 04:00:29 PM]] Step 1100: train 0.702415 lr: 2.541e-03
151 | [[06/22/2019 04:00:32 PM]] Criterion loss: 0.894972
152 | [[06/22/2019 04:00:32 PM]] accuracy: 71.40%
153 | [[06/22/2019 04:00:32 PM]] top_3_accuracy: 92.60%
154 | [[06/22/2019 04:00:32 PM]] Snapshot metric -0.71400000
155 | [[06/22/2019 04:00:32 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.71400000_1100.pth...
156 | [[06/22/2019 04:00:48 PM]] Step 1120: train 0.698035 lr: 2.454e-03
157 | [[06/22/2019 04:01:05 PM]] Step 1140: train 0.665236 lr: 2.368e-03
158 | [[06/22/2019 04:01:21 PM]] Step 1160: train 0.663330 lr: 2.281e-03
159 | [[06/22/2019 04:01:38 PM]] Step 1180: train 0.644659 lr: 2.195e-03
160 | [[06/22/2019 04:01:54 PM]] Step 1200: train 0.710065 lr: 2.109e-03
161 | [[06/22/2019 04:01:57 PM]] Criterion loss: 0.679426
162 | [[06/22/2019 04:01:57 PM]] accuracy: 77.80%
163 | [[06/22/2019 04:01:57 PM]] top_3_accuracy: 93.80%
164 | [[06/22/2019 04:01:57 PM]] Snapshot metric -0.77800000
165 | [[06/22/2019 04:01:57 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.77800000_1200.pth...
166 | [[06/22/2019 04:02:02 PM]] ====================Epoch 7====================
167 | [[06/22/2019 04:02:14 PM]] Step 1220: train 0.657953 lr: 2.023e-03
168 | [[06/22/2019 04:02:31 PM]] Step 1240: train 0.561891 lr: 1.938e-03
169 | [[06/22/2019 04:02:47 PM]] Step 1260: train 0.674270 lr: 1.854e-03
170 | [[06/22/2019 04:03:05 PM]] Step 1280: train 0.597797 lr: 1.771e-03
171 | [[06/22/2019 04:03:22 PM]] Step 1300: train 0.531784 lr: 1.688e-03
172 | [[06/22/2019 04:03:25 PM]] Criterion loss: 0.606873
173 | [[06/22/2019 04:03:25 PM]] accuracy: 82.00%
174 | [[06/22/2019 04:03:25 PM]] top_3_accuracy: 96.60%
175 | [[06/22/2019 04:03:25 PM]] Snapshot metric -0.82000000
176 | [[06/22/2019 04:03:25 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.82000000_1300.pth...
177 | [[06/22/2019 04:03:25 PM]] New low
178 | 
179 | [[06/22/2019 04:03:42 PM]] Step 1320: train 0.530845 lr: 1.606e-03
180 | [[06/22/2019 04:03:59 PM]] Step 1340: train 0.559952 lr: 1.526e-03
181 | [[06/22/2019 04:04:15 PM]] Step 1360: train 0.560953 lr: 1.446e-03
182 | [[06/22/2019 04:04:32 PM]] Step 1380: train 0.549461 lr: 1.368e-03
183 | [[06/22/2019 04:04:48 PM]] Step 1400: train 0.553163 lr: 1.292e-03
184 | [[06/22/2019 04:04:51 PM]] Criterion loss: 0.493012
185 | [[06/22/2019 04:04:51 PM]] accuracy: 84.40%
186 | [[06/22/2019 04:04:51 PM]] top_3_accuracy: 96.20%
187 | [[06/22/2019 04:04:51 PM]] Snapshot metric -0.84400000
188 | [[06/22/2019 04:04:51 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84400000_1400.pth...
189 | [[06/22/2019 04:04:51 PM]] New low
190 | 
191 | [[06/22/2019 04:04:57 PM]] ====================Epoch 8====================
192 | [[06/22/2019 04:05:09 PM]] Step 1420: train 0.526511 lr: 1.216e-03
193 | [[06/22/2019 04:05:25 PM]] Step 1440: train 0.520478 lr: 1.143e-03
194 | [[06/22/2019 04:05:42 PM]] Step 1460: train 0.455833 lr: 1.071e-03
195 | [[06/22/2019 04:05:58 PM]] Step 1480: train 0.480429 lr: 1.000e-03
196 | [[06/22/2019 04:06:15 PM]] Step 1500: train 0.500275 lr: 9.316e-04
197 | [[06/22/2019 04:06:17 PM]] Criterion loss: 0.519724
198 | [[06/22/2019 04:06:17 PM]] accuracy: 83.60%
199 | [[06/22/2019 04:06:17 PM]] top_3_accuracy: 94.80%
200 | [[06/22/2019 04:06:17 PM]] Snapshot metric -0.83600000
201 | [[06/22/2019 04:06:17 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83600000_1500.pth...
202 | [[06/22/2019 04:06:35 PM]] Step 1520: train 0.468733 lr: 8.649e-04
203 | [[06/22/2019 04:06:53 PM]] Step 1540: train 0.469848 lr: 8.003e-04
204 | [[06/22/2019 04:07:11 PM]] Step 1560: train 0.507552 lr: 7.376e-04
205 | [[06/22/2019 04:07:28 PM]] Step 1580: train 0.484207 lr: 6.771e-04
206 | [[06/22/2019 04:07:45 PM]] Step 1600: train 0.435758 lr: 6.188e-04
207 | [[06/22/2019 04:07:48 PM]] Criterion loss: 0.423036
208 | [[06/22/2019 04:07:48 PM]] accuracy: 87.40%
209 | [[06/22/2019 04:07:48 PM]] top_3_accuracy: 96.40%
210 | [[06/22/2019 04:07:48 PM]] Snapshot metric -0.87400000
211 | [[06/22/2019 04:07:48 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.87400000_1600.pth...
212 | [[06/22/2019 04:07:48 PM]] New low
213 | 
214 | [[06/22/2019 04:07:55 PM]] ====================Epoch 9====================
215 | [[06/22/2019 04:08:05 PM]] Step 1620: train 0.390876 lr: 5.627e-04
216 | [[06/22/2019 04:08:22 PM]] Step 1640: train 0.457620 lr: 5.090e-04
217 | [[06/22/2019 04:08:39 PM]] Step 1660: train 0.399328 lr: 4.577e-04
218 | [[06/22/2019 04:08:56 PM]] Step 1680: train 0.383037 lr: 4.089e-04
219 | [[06/22/2019 04:09:13 PM]] Step 1700: train 0.409151 lr: 3.626e-04
220 | [[06/22/2019 04:09:16 PM]] Criterion loss: 0.404898
221 | [[06/22/2019 04:09:16 PM]] accuracy: 88.80%
222 | [[06/22/2019 04:09:16 PM]] top_3_accuracy: 96.40%
223 | [[06/22/2019 04:09:16 PM]] Snapshot metric -0.88800000
224 | [[06/22/2019 04:09:16 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.88800000_1700.pth...
225 | [[06/22/2019 04:09:16 PM]] New low
226 | 
227 | [[06/22/2019 04:09:33 PM]] Step 1720: train 0.443100 lr: 3.188e-04
228 | [[06/22/2019 04:09:50 PM]] Step 1740: train 0.379389 lr: 2.777e-04
229 | [[06/22/2019 04:10:08 PM]] Step 1760: train 0.369363 lr: 2.393e-04
230 | [[06/22/2019 04:10:25 PM]] Step 1780: train 0.388807 lr: 2.036e-04
231 | [[06/22/2019 04:10:41 PM]] Step 1800: train 0.413061 lr: 1.707e-04
232 | [[06/22/2019 04:10:44 PM]] Criterion loss: 0.355131
233 | [[06/22/2019 04:10:44 PM]] accuracy: 89.80%
234 | [[06/22/2019 04:10:44 PM]] top_3_accuracy: 97.00%
235 | [[06/22/2019 04:10:44 PM]] Snapshot metric -0.89800000
236 | [[06/22/2019 04:10:44 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.89800000_1800.pth...
237 | [[06/22/2019 04:10:44 PM]] New low
238 | 
239 | [[06/22/2019 04:10:52 PM]] ====================Epoch 10====================
240 | [[06/22/2019 04:11:02 PM]] Step 1820: train 0.383528 lr: 1.405e-04
241 | [[06/22/2019 04:11:19 PM]] Step 1840: train 0.317547 lr: 1.133e-04
242 | [[06/22/2019 04:11:35 PM]] Step 1860: train 0.336927 lr: 8.888e-05
243 | [[06/22/2019 04:11:52 PM]] Step 1880: train 0.365633 lr: 6.739e-05
244 | [[06/22/2019 04:12:09 PM]] Step 1900: train 0.366723 lr: 4.883e-05
245 | [[06/22/2019 04:12:12 PM]] Criterion loss: 0.354443
246 | [[06/22/2019 04:12:12 PM]] accuracy: 89.80%
247 | [[06/22/2019 04:12:12 PM]] top_3_accuracy: 97.00%
248 | [[06/22/2019 04:12:12 PM]] Snapshot metric -0.89800000
249 | [[06/22/2019 04:12:12 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.89800000_1900.pth...
250 | [[06/22/2019 04:12:29 PM]] Step 1920: train 0.347152 lr: 3.324e-05
251 | [[06/22/2019 04:12:45 PM]] Step 1940: train 0.342237 lr: 2.062e-05
252 | [[06/22/2019 04:13:02 PM]] Step 1960: train 0.356638 lr: 1.099e-05
253 | [[06/22/2019 04:13:19 PM]] Step 1980: train 0.317537 lr: 4.357e-06
254 | [[06/22/2019 04:13:36 PM]] Step 2000: train 0.320815 lr: 7.389e-07
255 | [[06/22/2019 04:13:39 PM]] Criterion loss: 0.350640
256 | [[06/22/2019 04:13:39 PM]] accuracy: 89.60%
257 | [[06/22/2019 04:13:39 PM]] top_3_accuracy: 97.40%
258 | [[06/22/2019 04:13:39 PM]] Snapshot metric -0.89600000
259 | [[06/22/2019 04:13:39 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.89600000_2000.pth...
260 | 


--------------------------------------------------------------------------------
/imet/main.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import argparse
  3 | from itertools import islice
  4 | import json
  5 | from pathlib import Path
  6 | import warnings
  7 | from typing import Dict, Callable, List
  8 | from functools import partial
  9 | from dataclasses import dataclass
 10 | 
 11 | import numpy as np
 12 | import pandas as pd
 13 | from sklearn.metrics import fbeta_score, log_loss
 14 | from sklearn.exceptions import UndefinedMetricWarning
 15 | import torch
 16 | from torch import nn, cuda
 17 | from torch.utils.data import DataLoader
 18 | from torch.optim.lr_scheduler import CosineAnnealingLR
 19 | from tqdm import tqdm
 20 | from helperbot import (
 21 |     freeze_layers, TriangularLR, BaseBot, WeightDecayOptimizerWrapper,
 22 |     GradualWarmupScheduler, FBeta, LearningRateSchedulerCallback,
 23 |     MixUpCallback
 24 | )
 25 | 
 26 | from .adabound import AdaBound
 27 | from .models import get_seresnet_model, get_densenet_model, get_seresnet_partial_model
 28 | from .dataset import TrainDataset, TestDataset, get_ids, N_CLASSES, DATA_ROOT
 29 | from .transforms import get_train_transform, get_test_transform, cv2
 30 | from .utils import ON_KAGGLE
 31 | from .loss import FocalLoss
 32 | 
 33 | CACHE_DIR = Path('/tmp/imet' if ON_KAGGLE else './data/cache/')
 34 | CACHE_DIR.mkdir(exist_ok=True, parents=True)
 35 | MODEL_DIR = Path('.' if ON_KAGGLE else './data/cache/')
 36 | MODEL_DIR.mkdir(exist_ok=True, parents=True)
 37 | 
 38 | 
 39 | def make_loader(args, ds_class, root, df: pd.DataFrame, image_transform, drop_last=False, shuffle=False) -> DataLoader:
 40 |     return DataLoader(
 41 |         ds_class(root, df, image_transform, debug=args.debug),
 42 |         shuffle=shuffle,
 43 |         batch_size=args.batch_size,
 44 |         num_workers=args.workers,
 45 |         drop_last=drop_last
 46 |     )
 47 | 
 48 | 
 49 | def opt_params(layer, learning_rate, final_lr):
 50 |     return {'params': layer.parameters(), 'lr': learning_rate, 'final_lr': final_lr}
 51 | 
 52 | 
 53 | def setup_differential_learning_rates(
 54 |         optimizer_constructor: Callable[[List[Dict]], torch.optim.Optimizer],
 55 |         layer_groups: List[nn.Parameter],
 56 |         lrs: List[float], final_lrs: List[float]) -> torch.optim.Optimizer:
 57 |     assert len(layer_groups) == len(
 58 |         lrs), f'size mismatch, expected {len(layer_groups)} lrs, but got {len(lrs)}'
 59 |     optimizer = optimizer_constructor(
 60 |         [opt_params(*p) for p in zip(layer_groups, lrs, final_lrs)])
 61 |     return optimizer
 62 | 
 63 | 
 64 | @dataclass
 65 | class ImageClassificationBot(BaseBot):
 66 |     checkpoint_dir: Path = CACHE_DIR / "model_cache/"
 67 |     log_dir: Path = MODEL_DIR / "logs/"
 68 | 
 69 |     def __post_init__(self):
 70 |         super().__post_init__()
 71 |         self.loss_format = "%.6f"
 72 |         self.metrics = (FBeta(step=0.05, beta=2, average="samples"),)
 73 |         self.monitor_metric = "fbeta"
 74 | 
 75 |     def extract_prediction(self, x):
 76 |         return x
 77 | 
 78 | 
 79 | def train_stage_one(args, model, train_loader, valid_loader, criterion):
 80 |     optimizer = WeightDecayOptimizerWrapper(
 81 |         torch.optim.Adam(model.parameters(), lr=2e-3),
 82 |         0.1
 83 |     )
 84 |     freeze_layers(model, [True, True, False])
 85 | 
 86 |     # stage 1
 87 |     n_steps = len(train_loader) // 2
 88 |     bot = ImageClassificationBot(
 89 |         model=model, train_loader=train_loader,
 90 |         val_loader=valid_loader, clip_grad=10.,
 91 |         optimizer=optimizer, echo=not ON_KAGGLE,
 92 |         criterion=criterion,
 93 |         avg_window=len(train_loader) // 10,
 94 |         callbacks=[
 95 |             LearningRateSchedulerCallback(TriangularLR(
 96 |                 optimizer, 100, ratio=3, steps_per_cycle=n_steps))
 97 |         ],
 98 |         pbar=not ON_KAGGLE, use_tensorboard=False
 99 |     )
100 |     bot.logger.info(bot.criterion)
101 |     bot.train(
102 |         n_steps,
103 |         log_interval=len(train_loader) // 10,
104 |         snapshot_interval=len(train_loader) // 4
105 |     )
106 |     bot.load_model(bot.best_performers[0][1])
107 |     torch.save(bot.model.state_dict(), str(
108 |         CACHE_DIR / f"stage1_{args.fold}.pth"))
109 |     bot.remove_checkpoints(keep=0)
110 | 
111 | 
112 | def train_stage_two(args, model, train_loader, valid_loader, criterion):
113 |     n_steps = len(train_loader) * args.epochs
114 |     optimizer = WeightDecayOptimizerWrapper(
115 |         setup_differential_learning_rates(
116 |             partial(
117 |                 torch.optim.Adam, weight_decay=0
118 |                 # AdaBound, weight_decay=0, gamma=1/5000, betas=(.8, .999)
119 |                 # torch.optim.SGD, momentum=0.9
120 |             ), model, [1e-5, 8e-5, 5e-4], [1., 1., 1.]
121 |         ), weight_decay=5e-2, change_with_lr=True)
122 |     freeze_layers(model, [False, False, False])
123 |     bot = ImageClassificationBot(
124 |         model=model, train_loader=train_loader,
125 |         val_loader=valid_loader, clip_grad=10.,
126 |         optimizer=optimizer, echo=not ON_KAGGLE,
127 |         criterion=criterion,
128 |         avg_window=len(train_loader) // 15,
129 |         callbacks=[
130 |             LearningRateSchedulerCallback(
131 |                 TriangularLR(
132 |                     optimizer, 100, ratio=4, steps_per_cycle=n_steps
133 |                 )
134 |                 # GradualWarmupScheduler(
135 |                 # optimizer, 100, len(train_loader),
136 |                 # after_scheduler=CosineAnnealingLR(
137 |                 #     optimizer, n_steps - len(train_loader)
138 |                 # )
139 |             ),
140 |             MixUpCallback(alpha=0.2)
141 |         ],
142 |         pbar=not ON_KAGGLE, use_tensorboard=not ON_KAGGLE
143 |     )
144 |     bot.logger.info(bot.criterion)
145 |     bot.model.load_state_dict(torch.load(
146 |         CACHE_DIR / f"stage1_{args.fold}.pth"))
147 | 
148 |     # def snapshot_or_not(step):
149 |     #     if step < 4000:
150 |     #         if step % 2000 == 0:
151 |     #             return True
152 |     #     elif (step - 4000) % 1000 == 0:
153 |     #         return True
154 |     #     return False
155 | 
156 |     bot.train(
157 |         n_steps,
158 |         log_interval=len(train_loader) // 20,
159 |         snapshot_interval=len(train_loader) // 2,
160 |         # snapshot_interval=snapshot_or_not,
161 |         early_stopping_cnt=args.early_stop,
162 |         min_improv=1e-4,
163 |         keep_n_snapshots=1
164 |     )
165 |     bot.load_model(bot.best_performers[0][1])
166 |     bot.remove_checkpoints(keep=0)
167 | 
168 |     # Final model
169 |     torch.save(bot.model, MODEL_DIR / f"final_{args.fold}.pth")
170 |     # Failover (args + state dict)
171 |     torch.save(
172 |         [args.arch, bot.model.state_dict()],
173 |         MODEL_DIR / f"failover_{args.arch}_{args.fold}.pth"
174 |     )
175 | 
176 | 
177 | def find_best_fbeta_threshold(truth, probs, beta=2, step=0.05):
178 |     best, best_thres = 0, -1
179 |     argsorted = probs.argsort(axis=1)
180 |     with warnings.catch_warnings():
181 |         warnings.simplefilter('ignore', category=UndefinedMetricWarning)
182 |         for thres in np.arange(step, .5, step):
183 |             current = fbeta_score(
184 |                 truth,
185 |                 binarize_prediction(
186 |                     probs, thres, argsorted
187 |                 ).astype("int8"),
188 |                 beta=beta, average="samples")
189 |             if current > best:
190 |                 best = current
191 |                 best_thres = thres
192 |     return best, best_thres
193 | 
194 | 
195 | def print_eval(truth, preds):
196 |     best_score, threshold = find_best_fbeta_threshold(
197 |         truth, preds, beta=2, step=0.01
198 |     )
199 |     print(f"f2: {best_score:.4f} @ threshold {threshold:.2f}")
200 |     print(f"loss: {log_loss(truth, preds) / preds.shape[1]:.8f}")
201 | 
202 | 
203 | def eval_model(args, valid_loaders: List[DataLoader]):
204 |     model_dir = MODEL_DIR / args.model
205 |     model = torch.load(str(model_dir / f"final_{args.fold}.pth"))
206 |     model = model.cuda()
207 |     bot = ImageClassificationBot(
208 |         model=model, train_loader=None,
209 |         val_loader=None, optimizer=None,
210 |         echo=not ON_KAGGLE, criterion=None,
211 |         pbar=not ON_KAGGLE, avg_window=100
212 |     )
213 |     tmp = []
214 |     for valid_loader in valid_loaders:
215 |         preds, truth = bot.predict(valid_loader, return_y=True)
216 |         preds = torch.sigmoid(preds)
217 |         tmp.append(preds.numpy())
218 |     # print(np.mean(tmp, axis=0, keepdims=False).shape, preds.numpy().shape)
219 |     final_preds = np.mean(tmp, axis=0, keepdims=False)
220 |     print_eval(
221 |         truth.numpy(),
222 |         final_preds
223 |     )
224 |     if args.min_samples > 0:
225 |         final_preds = mask_predictions(args, final_preds)
226 |         print_eval(
227 |             truth.numpy(),
228 |             final_preds
229 |         )
230 | 
231 | 
232 | def predict_model(args, df: pd.DataFrame, loaders: List[DataLoader], name: str):
233 |     model_dir = MODEL_DIR / args.model
234 |     model = torch.load(str(model_dir / f"final_{args.fold}.pth"))
235 |     model = model.cuda()
236 |     bot = ImageClassificationBot(
237 |         model=model, train_loader=None,
238 |         val_loader=None, optimizer=None,
239 |         echo=not ON_KAGGLE, criterion=None,
240 |         pbar=not ON_KAGGLE, avg_window=100
241 |     )
242 |     tmp = []
243 |     model_dir = MODEL_DIR / args.model
244 |     for loader in loaders:
245 |         preds = bot.predict(loader, return_y=False)
246 |         preds = torch.sigmoid(preds)
247 |         tmp.append(preds.numpy())
248 |     final_preds = np.mean(tmp, axis=0, keepdims=False)
249 |     # print(np.isnan(final_preds).sum())
250 |     df_preds = pd.DataFrame(final_preds, index=df["id"].values)
251 |     df_preds.to_pickle(CACHE_DIR / f"preds_{name}_{args.fold}.pkl")
252 | 
253 | 
254 | def mask_predictions(args, preds):
255 |     folds = pd.read_pickle(CACHE_DIR / 'folds.pkl')
256 |     mask = folds.iloc[:, 1:-1].sum(axis=0).values < args.min_samples
257 |     print(mask.shape, preds.shape)
258 |     print(f"Masking {sum(mask)} labels...")
259 |     preds[:, mask] = 0
260 |     return preds
261 | 
262 | 
263 | def main():
264 |     parser = argparse.ArgumentParser()
265 |     arg = parser.add_argument
266 |     arg('mode', choices=['train', 'validate',
267 |                          'predict_valid', 'predict_test'])
268 |     arg('--batch-size', type=int, default=32)
269 |     arg('--step', type=int, default=1)
270 |     arg('--workers', type=int, default=2 if ON_KAGGLE else 4)
271 |     arg('--tta', type=int, default=4)
272 |     arg('--epochs', type=int, default=10)
273 |     arg('--arch', type=str, default='seresnext50')
274 |     arg('--min-samples', type=int, default=0)
275 |     arg('--debug', action='store_true')
276 |     arg('--limit', type=int)
277 |     arg('--alpha', type=float, default=.5)
278 |     arg('--gamma', type=float, default=.25)
279 |     arg('--fold', type=int, default=0)
280 |     arg('--model', type=str, default=".")
281 |     arg('--early-stop', type=int, default=5)
282 |     args = parser.parse_args()
283 | 
284 |     if args.mode in ("train", "validate", "predict_valid"):
285 |         folds = pd.read_pickle(CACHE_DIR / 'folds.pkl')
286 |         train_root = DATA_ROOT / 'train'
287 |         train_fold = folds[folds['fold'] != args.fold]
288 |         valid_fold = folds[folds['fold'] == args.fold]
289 |         if args.limit:
290 |             train_fold = train_fold[:args.limit]
291 |             valid_fold = valid_fold[:args.limit]
292 | 
293 |     use_cuda = cuda.is_available()
294 |     train_transform = get_train_transform(cv2.BORDER_REFLECT_101)
295 |     test_transform = get_test_transform()
296 |     if args.mode == 'train':
297 |         if args.arch == 'seresnext50':
298 |             model = get_seresnet_model(
299 |                 arch="se_resnext50_32x4d",
300 |                 n_classes=N_CLASSES, pretrained=True if args.mode == 'train' else False)
301 |         elif args.arch == 'seresnext101':
302 |             model = get_seresnet_model(
303 |                 arch="se_resnext101_32x4d",
304 |                 n_classes=N_CLASSES, pretrained=True if args.mode == 'train' else False)
305 |         elif args.arch == 'seresnext50-partial':
306 |             train_transform = get_train_transform(cv2.BORDER_CONSTANT)
307 |             model = get_seresnet_partial_model(
308 |                 arch="se_resnext50_32x4d",
309 |                 n_classes=N_CLASSES, pretrained=True if args.mode == 'train' else False)
310 |         elif args.arch.startswith("densenet"):
311 |             model = get_densenet_model(arch=args.arch)
312 |         # elif args.arch.startswith("efficientnet"):
313 |         #     model = get_efficientnet(arch=args.arch)
314 |         else:
315 |             raise ValueError("No such model")
316 |         if use_cuda:
317 |             model = model.cuda()
318 |         # criterion = nn.BCEWithLogitsLoss()
319 |         criterion = FocalLoss(gamma=args.gamma, alpha=args.alpha)
320 |         (CACHE_DIR / 'params.json').write_text(
321 |             json.dumps(vars(args), indent=4, sort_keys=True))
322 | 
323 |         train_loader = make_loader(
324 |             args, TrainDataset, train_root, train_fold, train_transform, drop_last=True, shuffle=True)
325 |         valid_loader = make_loader(
326 |             args, TrainDataset, train_root, valid_fold, test_transform, shuffle=False)
327 | 
328 |         print(f'{len(train_loader.dataset):,} items in train, '
329 |               f'{len(valid_loader.dataset):,} in valid')
330 | 
331 |         # Stage 1
332 |         train_stage_one(args, model, train_loader, valid_loader, criterion)
333 | 
334 |         # Stage 2
335 |         train_stage_two(args, model, train_loader, valid_loader, criterion)
336 | 
337 |     elif args.mode == 'validate':
338 |         valid_loaders = [
339 |             make_loader(
340 |                 args, TrainDataset, train_root,
341 |                 valid_fold, get_test_transform(), shuffle=False, drop_last=False),
342 |             make_loader(
343 |                 args, TrainDataset, train_root,
344 |                 valid_fold, get_test_transform(flip=True), shuffle=False, drop_last=False)
345 |         ]
346 |         eval_model(args, valid_loaders)
347 |     elif args.mode.startswith('predict'):
348 |         if args.mode == 'predict_valid':
349 |             loaders = [
350 |                 make_loader(
351 |                     args, TestDataset, train_root,
352 |                     valid_fold, get_test_transform(), shuffle=False, drop_last=False),
353 |                 make_loader(
354 |                     args, TestDataset, train_root,
355 |                     valid_fold, get_test_transform(flip=True), shuffle=False, drop_last=False)
356 |             ]
357 |             predict_model(args, valid_fold, loaders, "valid")
358 |         elif args.mode == 'predict_test':
359 |             test_root = DATA_ROOT / 'test'
360 |             df_test = pd.read_csv(DATA_ROOT / 'sample_submission.csv')
361 |             if args.limit:
362 |                 df_test = df_test[:args.limit]
363 |             print(df_test.shape)
364 |             loaders = [
365 |                 make_loader(
366 |                     args, TestDataset, test_root, df_test,
367 |                     get_test_transform(), shuffle=False, drop_last=False),
368 |                 make_loader(
369 |                     args, TestDataset, test_root, df_test,
370 |                     get_test_transform(flip=True), shuffle=False, drop_last=False)
371 |             ]
372 |             predict_model(args, df_test, loaders, "test")
373 | 
374 | 
375 | def binarize_prediction(probabilities, threshold: float, argsorted=None,
376 |                         min_labels=1, max_labels=10):
377 |     """ Return matrix of 0/1 predictions, same shape as probabilities.
378 |     """
379 |     assert probabilities.shape[1] == N_CLASSES
380 |     if argsorted is None:
381 |         argsorted = probabilities.argsort(axis=1)
382 |     max_mask = _make_mask(argsorted, max_labels)
383 |     min_mask = _make_mask(argsorted, min_labels)
384 |     prob_mask = probabilities > threshold
385 |     return (max_mask & prob_mask) | min_mask
386 | 
387 | 
388 | def _make_mask(argsorted, top_n: int):
389 |     mask = np.zeros_like(argsorted, dtype=np.uint8)
390 |     col_indices = argsorted[:, -top_n:].reshape(-1)
391 |     row_indices = [i // top_n for i in range(len(col_indices))]
392 |     mask[row_indices, col_indices] = 1
393 |     return mask
394 | 
395 | 
396 | if __name__ == '__main__':
397 |     main()
398 | 


--------------------------------------------------------------------------------
/imet/seresnet_partial.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Code adapted from
  3 | https://raw.githubusercontent.com/Cadene/pretrained-models.pytorch/master/pretrainedmodels/models/senet.py
  4 | """
  5 | from collections import OrderedDict
  6 | import math
  7 | 
  8 | import numpy as np
  9 | import torch.nn as nn
 10 | from torch.utils import model_zoo
 11 | 
 12 | from .partialconv2d import PartialConv2d
 13 | 
 14 | __all__ = ['SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152',
 15 |            'se_resnext50_32x4d', 'se_resnext101_32x4d']
 16 | 
 17 | pretrained_settings = {
 18 |     'senet154': {
 19 |         'imagenet': {
 20 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth',
 21 |             'input_space': 'RGB',
 22 |             'input_size': [3, 224, 224],
 23 |             'input_range': [0, 1],
 24 |             'mean': [0.485, 0.456, 0.406],
 25 |             'std': [0.229, 0.224, 0.225],
 26 |             'num_classes': 1000
 27 |         }
 28 |     },
 29 |     'se_resnet50': {
 30 |         'imagenet': {
 31 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth',
 32 |             'input_space': 'RGB',
 33 |             'input_size': [3, 224, 224],
 34 |             'input_range': [0, 1],
 35 |             'mean': [0.485, 0.456, 0.406],
 36 |             'std': [0.229, 0.224, 0.225],
 37 |             'num_classes': 1000
 38 |         }
 39 |     },
 40 |     'se_resnet101': {
 41 |         'imagenet': {
 42 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth',
 43 |             'input_space': 'RGB',
 44 |             'input_size': [3, 224, 224],
 45 |             'input_range': [0, 1],
 46 |             'mean': [0.485, 0.456, 0.406],
 47 |             'std': [0.229, 0.224, 0.225],
 48 |             'num_classes': 1000
 49 |         }
 50 |     },
 51 |     'se_resnet152': {
 52 |         'imagenet': {
 53 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth',
 54 |             'input_space': 'RGB',
 55 |             'input_size': [3, 224, 224],
 56 |             'input_range': [0, 1],
 57 |             'mean': [0.485, 0.456, 0.406],
 58 |             'std': [0.229, 0.224, 0.225],
 59 |             'num_classes': 1000
 60 |         }
 61 |     },
 62 |     'se_resnext50_32x4d': {
 63 |         'imagenet': {
 64 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth',
 65 |             'input_space': 'RGB',
 66 |             'input_size': [3, 224, 224],
 67 |             'input_range': [0, 1],
 68 |             'mean': [0.485, 0.456, 0.406],
 69 |             'std': [0.229, 0.224, 0.225],
 70 |             'num_classes': 1000
 71 |         }
 72 |     },
 73 |     'se_resnext101_32x4d': {
 74 |         'imagenet': {
 75 |             'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth',
 76 |             'input_space': 'RGB',
 77 |             'input_size': [3, 224, 224],
 78 |             'input_range': [0, 1],
 79 |             'mean': [0.485, 0.456, 0.406],
 80 |             'std': [0.229, 0.224, 0.225],
 81 |             'num_classes': 1000
 82 |         }
 83 |     },
 84 | }
 85 | 
 86 | ZERO_TRANSFORMED = np.sum(
 87 |     np.array([0.485, 0.456, 0.406]) / np.array([0.229, 0.224, 0.225])) * -1
 88 | 
 89 | 
 90 | class FirstPartialConv2d(PartialConv2d):
 91 |     def forward(self, input, mask_in=None):
 92 |         eps = 1e-4
 93 |         mask = (input.sum(dim=1, keepdim=True) >
 94 |                 ZERO_TRANSFORMED + eps).float()
 95 |         # print("%.4f" % (mask.sum() / input.size(3) / input.size(2) / input.size(0)))
 96 |         return super().forward(input, mask_in=mask)
 97 | 
 98 | 
 99 | class SEModule(nn.Module):
100 | 
101 |     def __init__(self, channels, reduction):
102 |         super(SEModule, self).__init__()
103 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
104 |         self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
105 |                              padding=0)
106 |         self.relu = nn.ReLU(inplace=True)
107 |         self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
108 |                              padding=0)
109 |         self.sigmoid = nn.Sigmoid()
110 | 
111 |     def forward(self, x):
112 |         module_input = x
113 |         x = self.avg_pool(x)
114 |         x = self.fc1(x)
115 |         x = self.relu(x)
116 |         x = self.fc2(x)
117 |         x = self.sigmoid(x)
118 |         return module_input * x
119 | 
120 | 
121 | class Bottleneck(nn.Module):
122 |     """
123 |     Base class for bottlenecks that implements `forward()` method.
124 |     """
125 | 
126 |     def forward(self, x):
127 |         residual = x
128 | 
129 |         out = self.conv1(x)
130 |         out = self.bn1(out)
131 |         out = self.relu(out)
132 | 
133 |         out = self.conv2(out)
134 |         out = self.bn2(out)
135 |         out = self.relu(out)
136 | 
137 |         out = self.conv3(out)
138 |         out = self.bn3(out)
139 | 
140 |         if self.downsample is not None:
141 |             residual = self.downsample(x)
142 | 
143 |         out = self.se_module(out) + residual
144 |         out = self.relu(out)
145 | 
146 |         return out
147 | 
148 | 
149 | class SEBottleneck(Bottleneck):
150 |     """
151 |     Bottleneck for SENet154.
152 |     """
153 |     expansion = 4
154 | 
155 |     def __init__(self, inplanes, planes, groups, reduction, stride=1,
156 |                  downsample=None):
157 |         super(SEBottleneck, self).__init__()
158 |         self.conv1 = nn.Conv2d(
159 |             inplanes, planes * 2, kernel_size=1, bias=False)
160 |         self.bn1 = nn.BatchNorm2d(planes * 2)
161 |         self.conv2 = PartialConv2d(planes * 2, planes * 4, kernel_size=3,
162 |                                    stride=stride, padding=1, groups=groups,
163 |                                    bias=False)
164 |         self.bn2 = nn.BatchNorm2d(planes * 4)
165 |         self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
166 |                                bias=False)
167 |         self.bn3 = nn.BatchNorm2d(planes * 4)
168 |         self.relu = nn.ReLU(inplace=True)
169 |         self.se_module = SEModule(planes * 4, reduction=reduction)
170 |         self.downsample = downsample
171 |         self.stride = stride
172 | 
173 | 
174 | class SEResNetBottleneck(Bottleneck):
175 |     """
176 |     ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
177 |     implementation and uses `stride=stride` in `conv1` and not in `conv2`
178 |     (the latter is used in the torchvision implementation of ResNet).
179 |     """
180 |     expansion = 4
181 | 
182 |     def __init__(self, inplanes, planes, groups, reduction, stride=1,
183 |                  downsample=None):
184 |         super(SEResNetBottleneck, self).__init__()
185 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
186 |                                stride=stride)
187 |         self.bn1 = nn.BatchNorm2d(planes)
188 |         self.conv2 = PartialConv2d(planes, planes, kernel_size=3, padding=1,
189 |                                    groups=groups, bias=False)
190 |         self.bn2 = nn.BatchNorm2d(planes)
191 |         self.conv3 = nn.Conv2d(
192 |             planes, planes * 4, kernel_size=1, bias=False)
193 |         self.bn3 = nn.BatchNorm2d(planes * 4)
194 |         self.relu = nn.ReLU(inplace=True)
195 |         self.se_module = SEModule(planes * 4, reduction=reduction)
196 |         self.downsample = downsample
197 |         self.stride = stride
198 | 
199 | 
200 | class SEResNeXtBottleneck(Bottleneck):
201 |     """
202 |     ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
203 |     """
204 |     expansion = 4
205 | 
206 |     def __init__(self, inplanes, planes, groups, reduction, stride=1,
207 |                  downsample=None, base_width=4):
208 |         super(SEResNeXtBottleneck, self).__init__()
209 |         width = math.floor(planes * (base_width / 64)) * groups
210 |         self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
211 |                                stride=1)
212 |         self.bn1 = nn.BatchNorm2d(width)
213 |         self.conv2 = PartialConv2d(width, width, kernel_size=3, stride=stride,
214 |                                    padding=1, groups=groups, bias=False)
215 |         self.bn2 = nn.BatchNorm2d(width)
216 |         self.conv3 = nn.Conv2d(
217 |             width, planes * 4, kernel_size=1, bias=False)
218 |         self.bn3 = nn.BatchNorm2d(planes * 4)
219 |         self.relu = nn.ReLU(inplace=True)
220 |         self.se_module = SEModule(planes * 4, reduction=reduction)
221 |         self.downsample = downsample
222 |         self.stride = stride
223 | 
224 | 
225 | class SENet(nn.Module):
226 | 
227 |     def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
228 |                  inplanes=128, input_3x3=True, downsample_kernel_size=3,
229 |                  downsample_padding=1, num_classes=1000):
230 |         """
231 |         Parameters
232 |         ----------
233 |         block (nn.Module): Bottleneck class.
234 |             - For SENet154: SEBottleneck
235 |             - For SE-ResNet models: SEResNetBottleneck
236 |             - For SE-ResNeXt models:  SEResNeXtBottleneck
237 |         layers (list of ints): Number of residual blocks for 4 layers of the
238 |             network (layer1...layer4).
239 |         groups (int): Number of groups for the 3x3 convolution in each
240 |             bottleneck block.
241 |             - For SENet154: 64
242 |             - For SE-ResNet models: 1
243 |             - For SE-ResNeXt models:  32
244 |         reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
245 |             - For all models: 16
246 |         dropout_p (float or None): Drop probability for the Dropout layer.
247 |             If `None` the Dropout layer is not used.
248 |             - For SENet154: 0.2
249 |             - For SE-ResNet models: None
250 |             - For SE-ResNeXt models: None
251 |         inplanes (int):  Number of input channels for layer1.
252 |             - For SENet154: 128
253 |             - For SE-ResNet models: 64
254 |             - For SE-ResNeXt models: 64
255 |         input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
256 |             a single 7x7 convolution in layer0.
257 |             - For SENet154: True
258 |             - For SE-ResNet models: False
259 |             - For SE-ResNeXt models: False
260 |         downsample_kernel_size (int): Kernel size for downsampling convolutions
261 |             in layer2, layer3 and layer4.
262 |             - For SENet154: 3
263 |             - For SE-ResNet models: 1
264 |             - For SE-ResNeXt models: 1
265 |         downsample_padding (int): Padding for downsampling convolutions in
266 |             layer2, layer3 and layer4.
267 |             - For SENet154: 1
268 |             - For SE-ResNet models: 0
269 |             - For SE-ResNeXt models: 0
270 |         num_classes (int): Number of outputs in `last_linear` layer.
271 |             - For all models: 1000
272 |         """
273 |         super(SENet, self).__init__()
274 |         self.inplanes = inplanes
275 |         if input_3x3:
276 |             layer0_modules = [
277 |                 ('conv1', FirstPartialConv2d(3, 64, 3, stride=2, padding=1,
278 |                                              bias=False)),
279 |                 ('bn1', nn.BatchNorm2d(64)),
280 |                 ('relu1', nn.ReLU(inplace=True)),
281 |                 ('conv2', PartialConv2d(64, 64, 3, stride=1, padding=1,
282 |                                         bias=False)),
283 |                 ('bn2', nn.BatchNorm2d(64)),
284 |                 ('relu2', nn.ReLU(inplace=True)),
285 |                 ('conv3', PartialConv2d(64, inplanes, 3, stride=1, padding=1,
286 |                                         bias=False)),
287 |                 ('bn3', nn.BatchNorm2d(inplanes)),
288 |                 ('relu3', nn.ReLU(inplace=True)),
289 |             ]
290 |         else:
291 |             layer0_modules = [
292 |                 ('conv1', FirstPartialConv2d(3, inplanes, kernel_size=7, stride=2,
293 |                                              padding=3, bias=False)),
294 |                 ('bn1', nn.BatchNorm2d(inplanes)),
295 |                 ('relu1', nn.ReLU(inplace=True)),
296 |             ]
297 |         # To preserve compatibility with Caffe weights `ceil_mode=True`
298 |         # is used instead of `padding=1`.
299 |         layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
300 |                                                     ceil_mode=True)))
301 |         self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
302 |         self.layer1 = self._make_layer(
303 |             block,
304 |             planes=64,
305 |             blocks=layers[0],
306 |             groups=groups,
307 |             reduction=reduction,
308 |             downsample_kernel_size=1,
309 |             downsample_padding=0
310 |         )
311 |         self.layer2 = self._make_layer(
312 |             block,
313 |             planes=128,
314 |             blocks=layers[1],
315 |             stride=2,
316 |             groups=groups,
317 |             reduction=reduction,
318 |             downsample_kernel_size=downsample_kernel_size,
319 |             downsample_padding=downsample_padding
320 |         )
321 |         self.layer3 = self._make_layer(
322 |             block,
323 |             planes=256,
324 |             blocks=layers[2],
325 |             stride=2,
326 |             groups=groups,
327 |             reduction=reduction,
328 |             downsample_kernel_size=downsample_kernel_size,
329 |             downsample_padding=downsample_padding
330 |         )
331 |         self.layer4 = self._make_layer(
332 |             block,
333 |             planes=512,
334 |             blocks=layers[3],
335 |             stride=2,
336 |             groups=groups,
337 |             reduction=reduction,
338 |             downsample_kernel_size=downsample_kernel_size,
339 |             downsample_padding=downsample_padding
340 |         )
341 |         self.avg_pool = nn.AvgPool2d(7, stride=1)
342 |         self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
343 |         self.last_linear = nn.Linear(512 * block.expansion, num_classes)
344 | 
345 |     def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
346 |                     downsample_kernel_size=1, downsample_padding=0):
347 |         downsample = None
348 |         if stride != 1 or self.inplanes != planes * block.expansion:
349 |             downsample = nn.Sequential(
350 |                 PartialConv2d(self.inplanes, planes * block.expansion,
351 |                               kernel_size=downsample_kernel_size, stride=stride,
352 |                               padding=downsample_padding, bias=False),
353 |                 nn.BatchNorm2d(planes * block.expansion),
354 |             )
355 | 
356 |         layers = []
357 |         layers.append(block(self.inplanes, planes, groups, reduction, stride,
358 |                             downsample))
359 |         self.inplanes = planes * block.expansion
360 |         for i in range(1, blocks):
361 |             layers.append(block(self.inplanes, planes, groups, reduction))
362 | 
363 |         return nn.Sequential(*layers)
364 | 
365 |     def features(self, x):
366 |         x = self.layer0(x)
367 |         x = self.layer1(x)
368 |         x = self.layer2(x)
369 |         x = self.layer3(x)
370 |         x = self.layer4(x)
371 |         return x
372 | 
373 |     def logits(self, x):
374 |         x = self.avg_pool(x)
375 |         if self.dropout is not None:
376 |             x = self.dropout(x)
377 |         x = x.view(x.size(0), -1)
378 |         x = self.last_linear(x)
379 |         return x
380 | 
381 |     def forward(self, x):
382 |         x = self.features(x)
383 |         x = self.logits(x)
384 |         return x
385 | 
386 | 
387 | def initialize_pretrained_model(model, num_classes, settings):
388 |     assert num_classes == settings['num_classes'], \
389 |         'num_classes should be {}, but is {}'.format(
390 |             settings['num_classes'], num_classes)
391 |     model.load_state_dict(model_zoo.load_url(settings['url']))
392 |     model.input_space = settings['input_space']
393 |     model.input_size = settings['input_size']
394 |     model.input_range = settings['input_range']
395 |     model.mean = settings['mean']
396 |     model.std = settings['std']
397 | 
398 | 
399 | def senet154(num_classes=1000, pretrained='imagenet'):
400 |     model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16,
401 |                   dropout_p=0.2, num_classes=num_classes)
402 |     if pretrained is not None:
403 |         settings = pretrained_settings['senet154'][pretrained]
404 |         initialize_pretrained_model(model, num_classes, settings)
405 |     return model
406 | 
407 | 
408 | def se_resnet50(num_classes=1000, pretrained='imagenet'):
409 |     model = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16,
410 |                   dropout_p=None, inplanes=64, input_3x3=False,
411 |                   downsample_kernel_size=1, downsample_padding=0,
412 |                   num_classes=num_classes)
413 |     if pretrained is not None:
414 |         settings = pretrained_settings['se_resnet50'][pretrained]
415 |         initialize_pretrained_model(model, num_classes, settings)
416 |     return model
417 | 
418 | 
419 | def se_resnet101(num_classes=1000, pretrained='imagenet'):
420 |     model = SENet(SEResNetBottleneck, [3, 4, 23, 3], groups=1, reduction=16,
421 |                   dropout_p=None, inplanes=64, input_3x3=False,
422 |                   downsample_kernel_size=1, downsample_padding=0,
423 |                   num_classes=num_classes)
424 |     if pretrained is not None:
425 |         settings = pretrained_settings['se_resnet101'][pretrained]
426 |         initialize_pretrained_model(model, num_classes, settings)
427 |     return model
428 | 
429 | 
430 | def se_resnet152(num_classes=1000, pretrained='imagenet'):
431 |     model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16,
432 |                   dropout_p=None, inplanes=64, input_3x3=False,
433 |                   downsample_kernel_size=1, downsample_padding=0,
434 |                   num_classes=num_classes)
435 |     if pretrained is not None:
436 |         settings = pretrained_settings['se_resnet152'][pretrained]
437 |         initialize_pretrained_model(model, num_classes, settings)
438 |     return model
439 | 
440 | 
441 | def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'):
442 |     model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
443 |                   dropout_p=None, inplanes=64, input_3x3=False,
444 |                   downsample_kernel_size=1, downsample_padding=0,
445 |                   num_classes=num_classes)
446 |     if pretrained is not None:
447 |         settings = pretrained_settings['se_resnext50_32x4d'][pretrained]
448 |         initialize_pretrained_model(model, num_classes, settings)
449 |     return model
450 | 
451 | 
452 | def se_resnext101_32x4d(num_classes=1000, pretrained='imagenet'):
453 |     model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16,
454 |                   dropout_p=None, inplanes=64, input_3x3=False,
455 |                   downsample_kernel_size=1, downsample_padding=0,
456 |                   num_classes=num_classes)
457 |     if pretrained is not None:
458 |         settings = pretrained_settings['se_resnext101_32x4d'][pretrained]
459 |         initialize_pretrained_model(model, num_classes, settings)
460 |     return model
461 | 


--------------------------------------------------------------------------------
/pylintrc:
--------------------------------------------------------------------------------
  1 | [MASTER]
  2 | 
  3 | # A comma-separated list of package or module names from where C extensions may
  4 | # be loaded. Extensions are loading into the active Python interpreter and may
  5 | # run arbitrary code.
  6 | extension-pkg-whitelist=torch,numpy
  7 | 
  8 | # Add files or directories to the blacklist. They should be base names, not
  9 | # paths.
 10 | ignore=CVS
 11 | 
 12 | # Add files or directories matching the regex patterns to the blacklist. The
 13 | # regex matches against base names, not paths.
 14 | ignore-patterns=
 15 | 
 16 | # Python code to execute, usually for sys.path manipulation such as
 17 | # pygtk.require().
 18 | #init-hook=
 19 | 
 20 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
 21 | # number of processors available to use.
 22 | jobs=1
 23 | 
 24 | # Control the amount of potential inferred values when inferring a single
 25 | # object. This can help the performance when dealing with large functions or
 26 | # complex, nested conditions.
 27 | limit-inference-results=100
 28 | 
 29 | # List of plugins (as comma separated values of python modules names) to load,
 30 | # usually to register additional checkers.
 31 | load-plugins=
 32 | 
 33 | # Pickle collected data for later comparisons.
 34 | persistent=yes
 35 | 
 36 | # Specify a configuration file.
 37 | #rcfile=
 38 | 
 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit
 40 | # user-friendly hints instead of false-positive error messages.
 41 | suggestion-mode=yes
 42 | 
 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the
 44 | # active Python interpreter and may run arbitrary code.
 45 | unsafe-load-any-extension=no
 46 | 
 47 | 
 48 | [MESSAGES CONTROL]
 49 | 
 50 | # Only show warnings with the listed confidence levels. Leave empty to show
 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
 52 | confidence=
 53 | 
 54 | # Disable the message, report, category or checker with the given id(s). You
 55 | # can either give multiple identifiers separated by comma (,) or put this
 56 | # option multiple times (only on the command line, not in the configuration
 57 | # file where it should appear only once). You can also use "--disable=all" to
 58 | # disable everything first and then reenable specific checks. For example, if
 59 | # you want to run only the similarities checker, you can use "--disable=all
 60 | # --enable=similarities". If you want to run only the classes checker, but have
 61 | # no Warning level messages displayed, use "--disable=all --enable=classes
 62 | # --disable=W".
 63 | disable=print-statement,
 64 |         parameter-unpacking,
 65 |         unpacking-in-except,
 66 |         old-raise-syntax,
 67 |         backtick,
 68 |         long-suffix,
 69 |         old-ne-operator,
 70 |         old-octal-literal,
 71 |         import-star-module-level,
 72 |         non-ascii-bytes-literal,
 73 |         raw-checker-failed,
 74 |         bad-inline-option,
 75 |         locally-disabled,
 76 |         locally-enabled,
 77 |         file-ignored,
 78 |         suppressed-message,
 79 |         useless-suppression,
 80 |         deprecated-pragma,
 81 |         use-symbolic-message-instead,
 82 |         apply-builtin,
 83 |         basestring-builtin,
 84 |         buffer-builtin,
 85 |         cmp-builtin,
 86 |         coerce-builtin,
 87 |         execfile-builtin,
 88 |         file-builtin,
 89 |         long-builtin,
 90 |         raw_input-builtin,
 91 |         reduce-builtin,
 92 |         standarderror-builtin,
 93 |         unicode-builtin,
 94 |         xrange-builtin,
 95 |         coerce-method,
 96 |         delslice-method,
 97 |         getslice-method,
 98 |         setslice-method,
 99 |         no-absolute-import,
100 |         old-division,
101 |         dict-iter-method,
102 |         dict-view-method,
103 |         next-method-called,
104 |         metaclass-assignment,
105 |         indexing-exception,
106 |         raising-string,
107 |         reload-builtin,
108 |         oct-method,
109 |         hex-method,
110 |         nonzero-method,
111 |         cmp-method,
112 |         input-builtin,
113 |         round-builtin,
114 |         intern-builtin,
115 |         unichr-builtin,
116 |         map-builtin-not-iterating,
117 |         zip-builtin-not-iterating,
118 |         range-builtin-not-iterating,
119 |         filter-builtin-not-iterating,
120 |         using-cmp-argument,
121 |         eq-without-hash,
122 |         div-method,
123 |         idiv-method,
124 |         rdiv-method,
125 |         exception-message-attribute,
126 |         invalid-str-codec,
127 |         sys-max-int,
128 |         bad-python3-import,
129 |         deprecated-string-function,
130 |         deprecated-str-translate-call,
131 |         deprecated-itertools-function,
132 |         deprecated-types-field,
133 |         next-method-defined,
134 |         dict-items-not-iterating,
135 |         dict-keys-not-iterating,
136 |         dict-values-not-iterating,
137 |         deprecated-operator-function,
138 |         deprecated-urllib-function,
139 |         xreadlines-attribute,
140 |         deprecated-sys-function,
141 |         exception-escape,
142 |         comprehension-escape,
143 |         missing-docstring
144 | 
145 | # Enable the message, report, category or checker with the given id(s). You can
146 | # either give multiple identifier separated by comma (,) or put this option
147 | # multiple time (only on the command line, not in the configuration file where
148 | # it should appear only once). See also the "--disable" option for examples.
149 | enable=c-extension-no-member
150 | 
151 | 
152 | [REPORTS]
153 | 
154 | # Python expression which should return a note less than 10 (10 is the highest
155 | # note). You have access to the variables errors warning, statement which
156 | # respectively contain the number of errors / warnings messages and the total
157 | # number of statements analyzed. This is used by the global evaluation report
158 | # (RP0004).
159 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
160 | 
161 | # Template used to display messages. This is a python new-style format string
162 | # used to format the message information. See doc for all details.
163 | #msg-template=
164 | 
165 | # Set the output format. Available formats are text, parseable, colorized, json
166 | # and msvs (visual studio). You can also give a reporter class, e.g.
167 | # mypackage.mymodule.MyReporterClass.
168 | output-format=text
169 | 
170 | # Tells whether to display a full report or only the messages.
171 | reports=no
172 | 
173 | # Activate the evaluation score.
174 | score=yes
175 | 
176 | 
177 | [REFACTORING]
178 | 
179 | # Maximum number of nested blocks for function / method body
180 | max-nested-blocks=5
181 | 
182 | # Complete name of functions that never returns. When checking for
183 | # inconsistent-return-statements if a never returning function is called then
184 | # it will be considered as an explicit return statement and no message will be
185 | # printed.
186 | never-returning-functions=sys.exit
187 | 
188 | 
189 | [FORMAT]
190 | 
191 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
192 | expected-line-ending-format=
193 | 
194 | # Regexp for a line that is allowed to be longer than the limit.
195 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
196 | 
197 | # Number of spaces of indent required inside a hanging  or continued line.
198 | indent-after-paren=4
199 | 
200 | # String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
201 | # tab).
202 | indent-string='    '
203 | 
204 | # Maximum number of characters on a single line.
205 | max-line-length=100
206 | 
207 | # Maximum number of lines in a module.
208 | max-module-lines=1000
209 | 
210 | # List of optional constructs for which whitespace checking is disabled. `dict-
211 | # separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
212 | # `trailing-comma` allows a space between comma and closing bracket: (a, ).
213 | # `empty-line` allows space-only lines.
214 | no-space-check=trailing-comma,
215 |                dict-separator
216 | 
217 | # Allow the body of a class to be on the same line as the declaration if body
218 | # contains single statement.
219 | single-line-class-stmt=no
220 | 
221 | # Allow the body of an if to be on the same line as the test if there is no
222 | # else.
223 | single-line-if-stmt=no
224 | 
225 | 
226 | [LOGGING]
227 | 
228 | # Logging modules to check that the string format arguments are in logging
229 | # function parameter format.
230 | logging-modules=logging
231 | 
232 | 
233 | [VARIABLES]
234 | 
235 | # List of additional names supposed to be defined in builtins. Remember that
236 | # you should avoid to define new builtins when possible.
237 | additional-builtins=
238 | 
239 | # Tells whether unused global variables should be treated as a violation.
240 | allow-global-unused-variables=yes
241 | 
242 | # List of strings which can identify a callback function by name. A callback
243 | # name must start or end with one of those strings.
244 | callbacks=cb_,
245 |           _cb
246 | 
247 | # A regular expression matching the name of dummy variables (i.e. expected to
248 | # not be used).
249 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
250 | 
251 | # Argument names that match this expression will be ignored. Default to name
252 | # with leading underscore.
253 | ignored-argument-names=_.*|^ignored_|^unused_
254 | 
255 | # Tells whether we should check for unused import in __init__ files.
256 | init-import=no
257 | 
258 | # List of qualified module names which can have objects that can redefine
259 | # builtins.
260 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
261 | 
262 | 
263 | [MISCELLANEOUS]
264 | 
265 | # List of note tags to take in consideration, separated by a comma.
266 | notes=FIXME,
267 |       XXX,
268 |       TODO
269 | 
270 | 
271 | [TYPECHECK]
272 | 
273 | # List of decorators that produce context managers, such as
274 | # contextlib.contextmanager. Add to this list to register other decorators that
275 | # produce valid context managers.
276 | contextmanager-decorators=contextlib.contextmanager
277 | 
278 | # List of members which are set dynamically and missed by pylint inference
279 | # system, and so shouldn't trigger E1101 when accessed. Python regular
280 | # expressions are accepted.
281 | generated-members=numpy.*,torch.*,tensorflow.*
282 | 
283 | # Tells whether missing members accessed in mixin class should be ignored. A
284 | # mixin class is detected if its name ends with "mixin" (case insensitive).
285 | ignore-mixin-members=yes
286 | 
287 | # Tells whether to warn about missing members when the owner of the attribute
288 | # is inferred to be None.
289 | ignore-none=yes
290 | 
291 | # This flag controls whether pylint should warn about no-member and similar
292 | # checks whenever an opaque object is returned when inferring. The inference
293 | # can return multiple potential results while evaluating a Python object, but
294 | # some branches might not be evaluated, which results in partial inference. In
295 | # that case, it might be useful to still emit no-member and other checks for
296 | # the rest of the inferred objects.
297 | ignore-on-opaque-inference=yes
298 | 
299 | # List of class names for which member attributes should not be checked (useful
300 | # for classes with dynamically set attributes). This supports the use of
301 | # qualified names.
302 | ignored-classes=optparse.Values,thread._local,_thread._local
303 | 
304 | # List of module names for which member attributes should not be checked
305 | # (useful for modules/projects where namespaces are manipulated during runtime
306 | # and thus existing member attributes cannot be deduced by static analysis. It
307 | # supports qualified module names, as well as Unix pattern matching.
308 | ignored-modules=
309 | 
310 | # Show a hint with possible names when a member name was not found. The aspect
311 | # of finding the hint is based on edit distance.
312 | missing-member-hint=yes
313 | 
314 | # The minimum edit distance a name should have in order to be considered a
315 | # similar match for a missing member name.
316 | missing-member-hint-distance=1
317 | 
318 | # The total number of similar names that should be taken in consideration when
319 | # showing a hint for a missing member.
320 | missing-member-max-choices=1
321 | 
322 | 
323 | [SPELLING]
324 | 
325 | # Limits count of emitted suggestions for spelling mistakes.
326 | max-spelling-suggestions=4
327 | 
328 | # Spelling dictionary name. Available dictionaries: none. To make it working
329 | # install python-enchant package..
330 | spelling-dict=
331 | 
332 | # List of comma separated words that should not be checked.
333 | spelling-ignore-words=
334 | 
335 | # A path to a file that contains private dictionary; one word per line.
336 | spelling-private-dict-file=
337 | 
338 | # Tells whether to store unknown words to indicated private dictionary in
339 | # --spelling-private-dict-file option instead of raising a message.
340 | spelling-store-unknown-words=no
341 | 
342 | 
343 | [BASIC]
344 | 
345 | # Naming style matching correct argument names.
346 | argument-naming-style=snake_case
347 | 
348 | # Regular expression matching correct argument names. Overrides argument-
349 | # naming-style.
350 | #argument-rgx=
351 | 
352 | # Naming style matching correct attribute names.
353 | attr-naming-style=snake_case
354 | 
355 | # Regular expression matching correct attribute names. Overrides attr-naming-
356 | # style.
357 | #attr-rgx=
358 | 
359 | # Bad variable names which should always be refused, separated by a comma.
360 | bad-names=foo,
361 |           bar,
362 |           baz,
363 |           toto,
364 |           tutu,
365 |           tata
366 | 
367 | # Naming style matching correct class attribute names.
368 | class-attribute-naming-style=any
369 | 
370 | # Regular expression matching correct class attribute names. Overrides class-
371 | # attribute-naming-style.
372 | #class-attribute-rgx=
373 | 
374 | # Naming style matching correct class names.
375 | class-naming-style=PascalCase
376 | 
377 | # Regular expression matching correct class names. Overrides class-naming-
378 | # style.
379 | #class-rgx=
380 | 
381 | # Naming style matching correct constant names.
382 | const-naming-style=UPPER_CASE
383 | 
384 | # Regular expression matching correct constant names. Overrides const-naming-
385 | # style.
386 | #const-rgx=
387 | 
388 | # Minimum line length for functions/classes that require docstrings, shorter
389 | # ones are exempt.
390 | docstring-min-length=-1
391 | 
392 | # Naming style matching correct function names.
393 | function-naming-style=snake_case
394 | 
395 | # Regular expression matching correct function names. Overrides function-
396 | # naming-style.
397 | #function-rgx=
398 | 
399 | # Good variable names which should always be accepted, separated by a comma.
400 | good-names=i,
401 |            j,
402 |            k,
403 |            ex,
404 |            Run,
405 |            _
406 | 
407 | # Include a hint for the correct naming format with invalid-name.
408 | include-naming-hint=no
409 | 
410 | # Naming style matching correct inline iteration names.
411 | inlinevar-naming-style=any
412 | 
413 | # Regular expression matching correct inline iteration names. Overrides
414 | # inlinevar-naming-style.
415 | #inlinevar-rgx=
416 | 
417 | # Naming style matching correct method names.
418 | method-naming-style=snake_case
419 | 
420 | # Regular expression matching correct method names. Overrides method-naming-
421 | # style.
422 | #method-rgx=
423 | 
424 | # Naming style matching correct module names.
425 | module-naming-style=snake_case
426 | 
427 | # Regular expression matching correct module names. Overrides module-naming-
428 | # style.
429 | #module-rgx=
430 | 
431 | # Colon-delimited sets of names that determine each other's naming style when
432 | # the name regexes allow several styles.
433 | name-group=
434 | 
435 | # Regular expression which should only match function or class names that do
436 | # not require a docstring.
437 | no-docstring-rgx=^_
438 | 
439 | # List of decorators that produce properties, such as abc.abstractproperty. Add
440 | # to this list to register other decorators that produce valid properties.
441 | # These decorators are taken in consideration only for invalid-name.
442 | property-classes=abc.abstractproperty
443 | 
444 | # Naming style matching correct variable names.
445 | variable-naming-style=snake_case
446 | 
447 | # Regular expression matching correct variable names. Overrides variable-
448 | # naming-style.
449 | #variable-rgx=
450 | 
451 | 
452 | [SIMILARITIES]
453 | 
454 | # Ignore comments when computing similarities.
455 | ignore-comments=yes
456 | 
457 | # Ignore docstrings when computing similarities.
458 | ignore-docstrings=yes
459 | 
460 | # Ignore imports when computing similarities.
461 | ignore-imports=no
462 | 
463 | # Minimum lines number of a similarity.
464 | min-similarity-lines=4
465 | 
466 | 
467 | [CLASSES]
468 | 
469 | # List of method names used to declare (i.e. assign) instance attributes.
470 | defining-attr-methods=__init__,
471 |                       __new__,
472 |                       setUp
473 | 
474 | # List of member names, which should be excluded from the protected access
475 | # warning.
476 | exclude-protected=_asdict,
477 |                   _fields,
478 |                   _replace,
479 |                   _source,
480 |                   _make
481 | 
482 | # List of valid names for the first argument in a class method.
483 | valid-classmethod-first-arg=cls
484 | 
485 | # List of valid names for the first argument in a metaclass class method.
486 | valid-metaclass-classmethod-first-arg=cls
487 | 
488 | 
489 | [DESIGN]
490 | 
491 | # Maximum number of arguments for function / method.
492 | max-args=5
493 | 
494 | # Maximum number of attributes for a class (see R0902).
495 | max-attributes=7
496 | 
497 | # Maximum number of boolean expressions in an if statement.
498 | max-bool-expr=5
499 | 
500 | # Maximum number of branch for function / method body.
501 | max-branches=12
502 | 
503 | # Maximum number of locals for function / method body.
504 | max-locals=15
505 | 
506 | # Maximum number of parents for a class (see R0901).
507 | max-parents=7
508 | 
509 | # Maximum number of public methods for a class (see R0904).
510 | max-public-methods=20
511 | 
512 | # Maximum number of return / yield for function / method body.
513 | max-returns=6
514 | 
515 | # Maximum number of statements in function / method body.
516 | max-statements=50
517 | 
518 | # Minimum number of public methods for a class (see R0903).
519 | min-public-methods=2
520 | 
521 | 
522 | [IMPORTS]
523 | 
524 | # Allow wildcard imports from modules that define __all__.
525 | allow-wildcard-with-all=no
526 | 
527 | # Analyse import fallback blocks. This can be used to support both Python 2 and
528 | # 3 compatible code, which means that the block might have code that exists
529 | # only in one or another interpreter, leading to false positives when analysed.
530 | analyse-fallback-blocks=no
531 | 
532 | # Deprecated modules which should not be used, separated by a comma.
533 | deprecated-modules=optparse,tkinter.tix
534 | 
535 | # Create a graph of external dependencies in the given file (report RP0402 must
536 | # not be disabled).
537 | ext-import-graph=
538 | 
539 | # Create a graph of every (i.e. internal and external) dependencies in the
540 | # given file (report RP0402 must not be disabled).
541 | import-graph=
542 | 
543 | # Create a graph of internal dependencies in the given file (report RP0402 must
544 | # not be disabled).
545 | int-import-graph=
546 | 
547 | # Force import order to recognize a module as part of the standard
548 | # compatibility libraries.
549 | known-standard-library=
550 | 
551 | # Force import order to recognize a module as part of a third party library.
552 | known-third-party=enchant
553 | 
554 | 
555 | [EXCEPTIONS]
556 | 
557 | # Exceptions that will emit a warning when being caught. Defaults to
558 | # "Exception".
559 | overgeneral-exceptions=Exception
560 | 


--------------------------------------------------------------------------------