├── imet ├── __init__.py ├── utils.py ├── make_submission.py ├── loss.py ├── dataset.py ├── make_folds.py ├── transforms.py ├── partialconv2d.py ├── models.py ├── adabound.py ├── main.py └── seresnet_partial.py ├── requirements.txt ├── zipfiles.bash ├── pytorch_helper_bot ├── examples │ └── imagenette │ │ ├── requirements.txt │ │ ├── dataset.py │ │ ├── README.md │ │ ├── transforms.py │ │ ├── models.py │ │ ├── main.py │ │ └── logs │ │ ├── colab_o0_bs64_e5.txt │ │ ├── colab_o1_bs64_e5_2.txt │ │ ├── colab_o2_bs64_e5.txt │ │ ├── colab_o1_bs64_e5.txt │ │ ├── bs32_8460.txt │ │ ├── bs64_mixup02_8600.txt │ │ ├── bs64_8680.txt │ │ └── bs64_e10.txt ├── .gitignore ├── helperbot │ ├── __init__.py │ ├── loss.py │ ├── differential_learning_rates.py │ ├── weight_decay.py │ ├── logger.py │ ├── metrics.py │ ├── callbacks.py │ ├── lr_scheduler.py │ └── bot.py ├── setup.py └── README.md ├── .gitignore ├── setup.py ├── README.md ├── LICENSE └── pylintrc /imet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.0.0 2 | albumentations>=0.2.3 3 | pretrainedmodels>=0.7.4 4 | tqdm==4.29.1 5 | scikit-learn>=0.21.2 6 | pandas>=0.24.0 -------------------------------------------------------------------------------- /zipfiles.bash: -------------------------------------------------------------------------------- 1 | rm imet.7z 2 | 7za a -bd -mx=0 imet.7z imet/*.py setup.py 3 | rm helperbot.7z 4 | cd pytorch_helper_bot 5 | 7za a -bd -mx=0 ../helperbot.7z helperbot/*.py *.py 6 | -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/requirements.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.1.12 2 | pretrainedmodels>=0.7.4 3 | https://github.com/ceshine/pytorch_helper_bot/archive/0.1.6.zip 4 | tensorboardX -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | 7 | # submission file 8 | *.csv 9 | 10 | # pyCharm files 11 | .idea/ 12 | 13 | .mypy_cache/ 14 | .vscode/ 15 | input/ 16 | references/ 17 | 18 | *.7z 19 | *.zip 20 | 21 | bot 22 | input 23 | -------------------------------------------------------------------------------- /pytorch_helper_bot/.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | *# 3 | *~ 4 | cache 5 | __pycache__ 6 | .dir-locals.el 7 | .idea/ 8 | .vscode/ 9 | .ipynb_checkpoints/ 10 | *.7z 11 | *.html 12 | *.gz 13 | *.out 14 | runs/ 15 | data/ 16 | plots 17 | *.zip 18 | .mypy_cache 19 | pylintrc 20 | *.egg-info/ 21 | .cache/ 22 | core 23 | .nv/ 24 | .bash_history 25 | data -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='imet', 5 | packages=['imet'], 6 | install_requires=[ 7 | 'torch>=1.0.0', 8 | 'albumentations>=0.2.3', 9 | 'pretrainedmodels>=0.7.4', 10 | 'pandas>=0.24.0', 11 | 'scikit-learn>=0.21.2', 12 | 'tqdm==4.29.1', 13 | 'helperbot>=0.1.3' 14 | ] 15 | ) 16 | -------------------------------------------------------------------------------- /pytorch_helper_bot/helperbot/__init__.py: -------------------------------------------------------------------------------- 1 | from .differential_learning_rates import setup_differential_learning_rates, freeze_layers 2 | from .bot import BaseBot 3 | from .lr_scheduler import TriangularLR, GradualWarmupScheduler 4 | from .weight_decay import WeightDecayOptimizerWrapper 5 | from .metrics import Metric, AUC, FBeta, Top1Accuracy, TopKAccuracy 6 | from .callbacks import LearningRateSchedulerCallback, MixUpCallback 7 | -------------------------------------------------------------------------------- /imet/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from pathlib import Path 4 | from multiprocessing.pool import ThreadPool 5 | from typing import Dict 6 | 7 | import pandas as pd 8 | from scipy.stats.mstats import gmean 9 | import torch 10 | from torch import nn 11 | from torch.utils.data import DataLoader 12 | 13 | 14 | ON_KAGGLE: bool = 'KAGGLE_WORKING_DIR' in os.environ 15 | 16 | 17 | def gmean_df(df: pd.DataFrame) -> pd.DataFrame: 18 | return df.groupby(level=0).agg(lambda x: gmean(list(x))) 19 | 20 | 21 | def mean_df(df: pd.DataFrame) -> pd.DataFrame: 22 | return df.groupby(level=0).mean() 23 | -------------------------------------------------------------------------------- /pytorch_helper_bot/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup( 4 | name='PyTorchHelperBot', 5 | version='0.1.6', 6 | packages=['helperbot', ], 7 | install_requires=[ 8 | 'torch>=0.4.1', 9 | 'dataclasses' 10 | ], 11 | classifiers=[ # Optional 12 | # How mature is this project? Common values are 13 | # 3 - Alpha 14 | # 4 - Beta 15 | # 5 - Production/Stable 16 | 'Development Status :: 3 - Alpha', 17 | 18 | # Indicate who your project is intended for 19 | 'Intended Audience :: Developers', 20 | 21 | # Pick your license as you wish 22 | 'License :: OSI Approved :: MIT License', 23 | 24 | # Specify the Python versions you support here. In particular, ensure 25 | # that you indicate whether you support Python 2, Python 3 or both. 26 | 'Programming Language :: Python :: 3.6', 27 | 'Programming Language :: Python :: 3.7', 28 | ], 29 | ) 30 | -------------------------------------------------------------------------------- /pytorch_helper_bot/helperbot/loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class MixUpSoftmaxLoss(nn.Module): 5 | "Reference: https://github.com/fastai/fastai/blob/master/fastai/callbacks/mixup.py#L6" 6 | 7 | def __init__(self, crit, reduction='mean'): 8 | super().__init__() 9 | self.crit = crit 10 | setattr(self.crit, 'reduction', 'none') 11 | self.reduction = reduction 12 | 13 | def forward(self, output, target): 14 | if len(target.size()) == 2: 15 | loss1 = self.crit(output, target[:, 0].long()) 16 | loss2 = self.crit(output, target[:, 1].long()) 17 | lambda_ = target[:, 2] 18 | d = (loss1 * lambda_ + loss2 * (1-lambda_)).mean() 19 | else: 20 | # This handles the cases without MixUp for backward compatibility 21 | d = self.crit(output, target) 22 | if self.reduction == 'mean': 23 | return d.mean() 24 | elif self.reduction == 'sum': 25 | return d.sum() 26 | return d 27 | -------------------------------------------------------------------------------- /imet/make_submission.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import pandas as pd 4 | 5 | from .utils import mean_df 6 | from .dataset import DATA_ROOT 7 | from .main import binarize_prediction, CACHE_DIR 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser() 12 | arg = parser.add_argument 13 | arg('predictions', nargs='+') 14 | arg('--threshold', type=float, default=0.2) 15 | args = parser.parse_args() 16 | sample_submission = pd.read_csv( 17 | DATA_ROOT / 'sample_submission.csv', index_col='id') 18 | dfs = [] 19 | for prediction in args.predictions: 20 | df = pd.read_pickle( 21 | CACHE_DIR / f"preds_{prediction}.pkl") 22 | print(df.shape) 23 | # print(df.isnull().sum().sum()) 24 | df = df.reindex(sample_submission.index) 25 | print(df.isnull().sum().sum()) 26 | dfs.append(df) 27 | df = pd.concat(dfs) 28 | df = mean_df(df) 29 | df[:] = binarize_prediction(df.values, threshold=args.threshold) 30 | df = df.apply(get_classes, axis=1) 31 | df.name = 'attribute_ids' 32 | df.to_csv("submission.csv", header=True) 33 | 34 | 35 | def get_classes(item): 36 | return ' '.join(str(cls_idx) for cls_idx, is_present in item.items() if is_present) 37 | 38 | 39 | if __name__ == '__main__': 40 | main() 41 | -------------------------------------------------------------------------------- /pytorch_helper_bot/README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Helper Bot 2 | [WIP] a high-level PyTorch helper package 3 | 4 | This project is intended for my personal use. Backward compatibility will not be guaranteed. Important releases will be tagged. 5 | 6 | ## Motivation 7 | 8 | [*fast.ai*](https://github.com/fastai/fastai) is great, and I recommend it for all deep learning beginners. But since it's beginner-friendly, a lot of more sophisticated stuffs are abstracted heavily and hidden from users. Reading the source code is often required before you can tweak the underlying algorithms. The advent of `doc` function greatly speeds up the process by quickly directing the user to the source code and documentation. 9 | 10 | However, *fast.ai* has become stronger and bigger. Not everyone has time to keep up with its codebase. Hence the creation of this project. I built a relatively thin layer of abstraction upon PyTorch from scratch, with a lot of ideas and code borrowed from various sources (mainly *fast.ai*). Only features that are relevant to my use cases are added. 11 | 12 | Another similar project is [pytorch/ignite](https://github.com/pytorch/ignite). 13 | 14 | ## Examples 15 | 16 | There are almost no unit tests yet. The following example(s) are somewhat functional tests. 17 | 18 | * [Imagenette Image Classification](examples/imagenette/) -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Callable, List, Optional, Dict 3 | 4 | import cv2 5 | import torch 6 | import pandas as pd 7 | from torch.utils.data import Dataset 8 | 9 | from transforms import tensor_transform 10 | 11 | 12 | N_CLASSES = 1103 13 | DATA_ROOT = Path('./data') 14 | 15 | 16 | def build_dataframe_from_folder(root: Path, class_map: Optional[Dict] = None): 17 | if class_map is None: 18 | new_class_map = {} 19 | tmp = [] 20 | for subfolder in root.iterdir(): 21 | if class_map is None: 22 | new_class_map[subfolder.name] = len(new_class_map) 23 | class_id = new_class_map[subfolder.name] 24 | else: 25 | class_id = class_map[subfolder.name] 26 | for image in subfolder.iterdir(): 27 | tmp.append((image, class_id)) 28 | df = pd.DataFrame(tmp, columns=["image_path", "label"]) 29 | if class_map is None: 30 | return df, new_class_map 31 | return df 32 | 33 | 34 | class TrainDataset(Dataset): 35 | def __init__(self, df: pd.DataFrame, image_transform: Callable, debug: bool = True): 36 | super().__init__() 37 | self._df = df 38 | self._image_transform = image_transform 39 | self._debug = debug 40 | 41 | def __len__(self): 42 | return len(self._df) 43 | 44 | def __getitem__(self, idx: int): 45 | item = self._df.iloc[idx] 46 | image = load_transform_image( 47 | item.image_path, self._image_transform, debug=self._debug) 48 | target = torch.tensor(item.label).long() 49 | return image, target 50 | 51 | 52 | def load_transform_image( 53 | image_path: Path, image_transform: Callable, debug: bool = False): 54 | image = cv2.imread(str(image_path.absolute())) 55 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 56 | image = image_transform(image=image)["image"] 57 | # if debug: 58 | # image.save('_debug.jpg') 59 | tensor = tensor_transform(image=image)["image"] 60 | return tensor 61 | -------------------------------------------------------------------------------- /pytorch_helper_bot/helperbot/differential_learning_rates.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, List, Dict 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | def opt_params(layer, learning_rate): 8 | return {'params': layer.parameters(), 'lr': learning_rate} 9 | 10 | 11 | def setup_differential_learning_rates( 12 | optimizer_constructor: Callable[[List[Dict]], torch.optim.Optimizer], 13 | model: torch.nn.Module, 14 | lrs: List[float]) -> torch.optim.Optimizer: 15 | """Set up a optimizer with differential learning rates 16 | 17 | Reference: fast.ai v0.7 18 | 19 | Parameters 20 | ---------- 21 | optimizer_constructor : Callable[[List[Dict]]] 22 | Optimizer constructor or a partial that returns an Optimizer object. 23 | model : torch.nn.Module 24 | The PyTorch model you want to optimize. Needs to have .get_layer_groups() method. 25 | lrs : List[float] 26 | A list of learning rates for each layer group. 27 | """ 28 | layer_groups = model.get_layer_groups() 29 | assert len(layer_groups) == len( 30 | lrs), f'size mismatch, expected {len(layer_groups)} lrs, but got {len(lrs)}' 31 | optimizer = optimizer_constructor( 32 | [opt_params(*p) for p in zip(layer_groups, lrs)]) 33 | return optimizer 34 | 35 | 36 | def children(m): 37 | return m if isinstance(m, (list, tuple)) else list(m.children()) 38 | 39 | 40 | def set_trainable_attr(m, b): 41 | m.trainable = b 42 | for p in m.parameters(): 43 | p.requires_grad = b 44 | 45 | 46 | def apply_leaf(m, f): 47 | c = children(m) 48 | if isinstance(m, nn.Module): 49 | f(m) 50 | if len(c) > 0: 51 | for l in c: 52 | apply_leaf(l, f) 53 | 54 | 55 | def set_trainable(l, b): 56 | apply_leaf(l, lambda m: set_trainable_attr(m, b)) 57 | 58 | 59 | def freeze_layers(layer_groups: List, freeze_flags: List[bool]): 60 | assert len(freeze_flags) == len(layer_groups) 61 | for layer, flag in zip(layer_groups, freeze_flags): 62 | set_trainable(layer, not flag) 63 | -------------------------------------------------------------------------------- /imet/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | # Source: https://www.kaggle.com/c/human-protein-atlas-image-classification/discussion/78109 6 | 7 | 8 | class FocalLoss(nn.Module): 9 | """Adapted from: https://github.com/kuangliu/pytorch-retinanet/blob/master/loss.py 10 | F.logsimoid used as in https://gist.github.com/AdrienLE/bf31dfe94569319f6e47b2de8df13416#file-focal_dice_1-py 11 | """ 12 | 13 | def __init__(self, alpha, gamma): 14 | super(FocalLoss, self).__init__() 15 | assert alpha > 0 and alpha < 1 16 | self.alpha = alpha 17 | self.gamma = gamma 18 | 19 | def forward(self, x, y): 20 | '''Focal loss. 21 | Args: 22 | x: (tensor) sized [N,]. 23 | y: (tensor) sized [N,]. 24 | Return: 25 | (tensor) focal loss. 26 | ''' 27 | y = y.float() 28 | pt_log = F.logsigmoid(-x * (y * 2 - 1)) 29 | # w = alpha if t > 0 else 1-alpha 30 | at = (self.alpha * y + (1-self.alpha) * (1-y)) * 2 31 | w = at * (pt_log * self.gamma).exp() 32 | # Don't calculate gradients of the weights 33 | w = w.detach() 34 | return F.binary_cross_entropy_with_logits(x, y, w, reduction="mean") 35 | 36 | def __str__(self): 37 | return f"" 38 | 39 | 40 | class FbetaLoss(nn.Module): 41 | def __init__(self, beta=1): 42 | super(FbetaLoss, self).__init__() 43 | self.small_value = 1e-6 44 | self.beta = beta 45 | 46 | def forward(self, logits, labels): 47 | beta = self.beta 48 | batch_size = logits.size()[0] 49 | p = F.sigmoid(logits) 50 | l = labels 51 | num_pos = torch.sum(p, 1) + self.small_value 52 | num_pos_hat = torch.sum(l, 1) + self.small_value 53 | tp = torch.sum(l * p, 1) 54 | precise = tp / num_pos 55 | recall = tp / num_pos_hat 56 | fs = (1 + beta * beta) * precise * recall / \ 57 | (beta * beta * precise + recall + self.small_value) 58 | loss = fs.sum() / batch_size 59 | return 1 - loss 60 | -------------------------------------------------------------------------------- /pytorch_helper_bot/helperbot/weight_decay.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Sequence 2 | 3 | from torch.optim import Optimizer 4 | 5 | 6 | class WeightDecayOptimizerWrapper(Optimizer): 7 | def __init__(self, optimizer: Optimizer, weight_decay: Union[Sequence[float], float], change_with_lr: bool = True) -> None: 8 | self.optimizer = optimizer 9 | if isinstance(weight_decay, (list, tuple)): 10 | assert len(weight_decay) == len(self.optimizer.param_groups) 11 | assert all((x >= 0 for x in weight_decay)) 12 | self.weight_decays = weight_decay 13 | else: 14 | assert weight_decay >= 0 15 | self.weight_decays = [weight_decay] * \ 16 | len(self.optimizer.param_groups) 17 | self.state = self.optimizer.state 18 | self.change_with_lr = change_with_lr 19 | 20 | def step(self, closure=None) -> None: 21 | for group, weight_decay in zip(self.optimizer.param_groups, self.weight_decays): 22 | for param in group['params']: 23 | if param.grad is None or weight_decay == 0: 24 | continue 25 | if self.change_with_lr: 26 | param.data = param.data.add( 27 | -weight_decay * group['lr'], param.data) 28 | else: 29 | param.data.add_(-weight_decay, param.data) 30 | self.optimizer.step() 31 | 32 | def zero_grad(self) -> None: 33 | self.optimizer.zero_grad() 34 | 35 | def add_param_group(self, param_group): 36 | self.optimizer.add_param_group(param_group) 37 | 38 | def load_state_dict(self, state_dict): 39 | self.optimizer.load_state_dict(state_dict) 40 | 41 | def state_dict(self): 42 | return self.optimizer.state_dict() 43 | 44 | def __repr__(self): 45 | return self.optimizer.__repr__() 46 | 47 | def __getstate__(self): 48 | return self.optimizer.__getstate__() 49 | 50 | def __setstate__(self, state): 51 | self.optimizer.__setstate__(state) 52 | self.state = self.optimizer.state 53 | 54 | @property 55 | def param_groups(self): 56 | return self.optimizer.param_groups 57 | -------------------------------------------------------------------------------- /pytorch_helper_bot/helperbot/logger.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import logging 3 | from pathlib import Path 4 | 5 | 6 | class Logger: 7 | def __init__(self, model_name, log_dir, level=logging.INFO, use_tensorboard=False, echo=False): 8 | self.model_name = model_name 9 | (Path(log_dir) / "summaries").mkdir(parents=True, exist_ok=True) 10 | date_str = datetime.now().strftime('%Y%m%d_%H%M') 11 | log_file = 'log_{}.txt'.format(date_str) 12 | formatter = logging.Formatter( 13 | '[[%(asctime)s]] %(message)s', 14 | datefmt='%m/%d/%Y %I:%M:%S %p' 15 | ) 16 | self.logger = logging.getLogger("bot") 17 | # Remove all existing handlers 18 | self.logger.handlers = [] 19 | # Initialize handlers 20 | fh = logging.FileHandler( 21 | Path(log_dir) / Path(log_file)) 22 | fh.setFormatter(formatter) 23 | self.logger.addHandler(fh) 24 | if echo: 25 | sh = logging.StreamHandler() 26 | sh.setFormatter(formatter) 27 | self.logger.addHandler(sh) 28 | self.logger.setLevel(level) 29 | self.logger.propagate = False 30 | self.tbwriter = None 31 | if use_tensorboard: 32 | from tensorboardX import SummaryWriter 33 | # Tensorboard 34 | folder = str( 35 | Path(log_dir) / "summaries" / 36 | "{}_{}".format(self.model_name, date_str)) 37 | self.tbwriter = SummaryWriter( 38 | folder 39 | ) 40 | 41 | def info(self, msg, *args): 42 | self.logger.info(msg, *args) 43 | 44 | def warning(self, msg, *args): 45 | self.logger.warning(msg, *args) 46 | 47 | def debug(self, msg, *args): 48 | self.logger.debug(msg, *args) 49 | 50 | def error(self, msg, *args): 51 | self.logger.error(msg, *args) 52 | 53 | def tb_scalars(self, key, value, step): 54 | if self.tbwriter is None: 55 | self.debug("Tensorboard writer is not enabled.") 56 | else: 57 | if isinstance(value, dict): 58 | self.tbwriter.add_scalars(key, value, step) 59 | else: 60 | self.tbwriter.add_scalar(key, value, step) 61 | -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/README.md: -------------------------------------------------------------------------------- 1 | # Imagenette Example 2 | 3 | ## Preparation 4 | 5 | Download the [imagenette dataset (full)](https://s3.amazonaws.com/fast-ai-imageclas/imagenette.tgz) and extract into `data` folder. It should now contains two folders: `train` and `val`. 6 | 7 | ## Training instructions 8 | 9 | Run `python main.py -h` to view all the available arguments. 10 | 11 | ## Some Local Results 12 | 13 | Hardware: i7-7700 + GTX 1070 14 | 15 | | Size (px) | Epochs | Accuracy | Params | Arch | Log | 16 | |--|--|--|--|--|--| 17 | | 192 | 5 | 86.80% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0` | seresnext50 | [bs64_8680.txt](logs/bs64_8680.txt) | 18 | | 192 | 5 | 86.00% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0.2` | seresnext50| [bs64_mixup02_8600.txt](logs/bs64_mixup02_8600.txt) | 19 | | 192 | 10 | 89.80% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0` | seresnext50| [bs64_e10.txt](logs/bs64_e10.txt) | 20 | 21 | ## Google Colab Results 22 | 23 | [Notebook Link](https://colab.research.google.com/drive/1NppuVSUvNYIEfL7j3DEOKemhrdZFFPDg) 24 | 25 | | Size (px) | Epochs | Accuracy | Params | Arch | Log | Amp | Time | 26 | |--|--|--|--|--|--|--|--| 27 | | 192 | 5 | 85.60% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0` | seresnext50 | [colab_o0_bs64_e5.txt](logs/colab_o0_bs64_e5.txt) | | 13min 18s | 28 | | 192 | 5 | 84.20% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0 --amp O1` | seresnext50 | [colab_o1_bs64_e5.txt](logs/colab_o1_bs64_e5.txt) | O1 | 9min 59s | 29 | | 192 | 5 | 85.80% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0 --amp O1` | seresnext50 | [colab_o1_bs64_e5_2.txt](logs/colab_o1_bs64_e5_2.txt)* | O1 | 9min 47s | 30 | | 192 | 5 | 85.40% | `--batch-size 64 --lr 5e-3 --mixup-alpha 0 --amp O2` | seresnext50 | [colab_o2_bs64_e5.txt](logs/colab_o2_bs64_e5.txt)* | O2 | 9min 35s | 31 | | 192 | 20 | 93.40% | `--batch-size 64 --lr 3e-3 --mixup-alpha 0` | seresnext50 | [colab_o0_bs64_e20.txt](logs/colab_o0_bs64_e20.txt) | | 52min 50s | 32 | | 192 | 20 | 92.40% | `--batch-size 64 --lr 3e-3 --mixup-alpha 0 --amp O1` | seresnext50 | [colab_o1_bs64_e20.txt](logs/colab_o1_bs64_e20.txt) | O1 | 39min 31s | 33 | | 192 | 20 | 93.00% | `--batch-size 64 --lr 3e-3 --mixup-alpha 0 --amp O2` | seresnext50 | [colab_o2_bs64_e20.txt](logs/colab_o2_bs64_e20.txt)* | O2 | 38min 1s | 34 | 35 | *: Using the newer version where the weights of the entire network is explicitly initialized. 36 | -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/transforms.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from albumentations import ( 3 | Compose, HorizontalFlip, Rotate, HueSaturationValue, 4 | RandomBrightness, RandomContrast, RandomGamma, JpegCompression, GaussNoise, 5 | Cutout, MedianBlur, Blur, OneOf, IAAAdditiveGaussianNoise, OpticalDistortion, 6 | GridDistortion, IAAPiecewiseAffine, ShiftScaleRotate, CenterCrop, 7 | RandomCrop, CenterCrop, Resize, PadIfNeeded, RandomScale, SmallestMaxSize 8 | ) 9 | from albumentations.pytorch.transforms import ToTensor 10 | 11 | cv2.setNumThreads(0) 12 | 13 | train_transform = Compose([ 14 | SmallestMaxSize(224), 15 | RandomScale(scale_limit=0.125), 16 | # PadIfNeeded(256, 256, border_mode=cv2.BORDER_CONSTANT., value=0, p=1.), 17 | # ShiftScaleRotate( 18 | # shift_limit=0.0625, scale_limit=0.1, rotate_limit=30, 19 | # border_mode=cv2.BORDER_REFLECT_101, p=1.), 20 | Rotate(limit=20, border_mode=cv2.BORDER_REFLECT_101, p=1.), 21 | OneOf([ 22 | RandomCrop(192, 192, p=0.9), 23 | CenterCrop(192, 192, p=0.1), 24 | ], p=1.), 25 | HorizontalFlip(p=0.5), 26 | RandomContrast(limit=0.2, p=0.5), 27 | RandomGamma(gamma_limit=(80, 120), p=0.5), 28 | RandomBrightness(limit=0.2, p=0.5), 29 | # HueSaturationValue(hue_shift_limit=5, sat_shift_limit=20, 30 | # val_shift_limit=10, p=1.), 31 | # OneOf([ 32 | # OpticalDistortion(p=0.3), 33 | # GridDistortion(p=0.1), 34 | # IAAPiecewiseAffine(p=0.3), 35 | # ], p=0.2), 36 | # OneOf([ 37 | # IAAAdditiveGaussianNoise( 38 | # loc=0, scale=(1., 6.75), per_channel=False, p=0.3), 39 | # GaussNoise(var_limit=(5.0, 20.0), p=0.6), 40 | # ], p=0.5), 41 | # Cutout(num_holes=4, max_h_size=30, max_w_size=50, p=0.75), 42 | # JpegCompression(quality_lower=50, quality_upper=100, p=0.5) 43 | ]) 44 | 45 | 46 | test_transform = Compose([ 47 | # RandomScale(scale_limit=0.125), 48 | SmallestMaxSize(224), 49 | # PadIfNeeded(256, 256, border_mode=cv2.BORDER_REFLECT_101, value=0, p=1.), 50 | # OneOf([ 51 | # RandomCrop(224, 224, p=0.9), 52 | # CenterCrop(224, 224, p=0.1), 53 | # ], p=1.), 54 | CenterCrop(192, 192, p=1.) 55 | # HorizontalFlip(p=0.5), 56 | ]) 57 | 58 | 59 | tensor_transform = ToTensor(normalize=dict( 60 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 61 | ) 62 | -------------------------------------------------------------------------------- /imet/dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Callable, List 3 | 4 | import cv2 5 | import torch 6 | import pandas as pd 7 | from PIL import Image 8 | from torch.utils.data import Dataset 9 | 10 | from .transforms import tensor_transform 11 | from .utils import ON_KAGGLE 12 | 13 | 14 | N_CLASSES = 1103 15 | DATA_ROOT = Path('../input/imet-2019-fgvc6' if ON_KAGGLE else './data') 16 | 17 | 18 | class TrainDataset(Dataset): 19 | def __init__(self, root: Path, df: pd.DataFrame, 20 | image_transform: Callable, debug: bool = True): 21 | super().__init__() 22 | self._root = root 23 | self._df = df 24 | self._image_transform = image_transform 25 | self._debug = debug 26 | 27 | def __len__(self): 28 | return len(self._df) 29 | 30 | def __getitem__(self, idx: int): 31 | item = self._df.iloc[idx] 32 | image = load_transform_image( 33 | item, self._root, self._image_transform, debug=self._debug) 34 | target = torch.from_numpy( 35 | item.iloc[1:-1].values.astype("float32")).float() 36 | return image, target 37 | 38 | 39 | class TestDataset(Dataset): 40 | def __init__(self, root: Path, df: pd.DataFrame, 41 | image_transform: Callable, debug: bool = True): 42 | self._root = root 43 | self._df = df 44 | self._image_transform = image_transform 45 | self._debug = debug 46 | 47 | def __len__(self): 48 | return len(self._df) 49 | 50 | def __getitem__(self, idx): 51 | item = self._df.iloc[idx] 52 | image = load_transform_image( 53 | item, self._root, self._image_transform, debug=self._debug) 54 | return image, 0 55 | 56 | 57 | def load_transform_image( 58 | item, root: Path, image_transform: Callable, debug: bool = False): 59 | image = load_image(item, root) 60 | image = image_transform(image=image)["image"] 61 | if debug: 62 | image.save('_debug.png') 63 | tensor = tensor_transform(image=image)["image"] 64 | return tensor 65 | 66 | 67 | def load_image(item, root: Path) -> Image.Image: 68 | image = cv2.imread(str(root / f'{item.id}.png')) 69 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 70 | # base_size = min(image.shape[0], image.shape[1]) 71 | # ratio = 256 / base_size 72 | # image = cv2.resize(image, None, fx=ratio, fy=ratio, 73 | # interpolation=cv2.INTER_CUBIC) 74 | return image 75 | 76 | 77 | def get_ids(root: Path) -> List[str]: 78 | return sorted({p.name.split('_')[0] for p in root.glob('*.png')}) 79 | -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/models.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import torch 4 | import numpy as np 5 | import pretrainedmodels 6 | from torch import nn 7 | from torch.nn import functional as F 8 | 9 | 10 | class Flatten(nn.Module): 11 | def forward(self, x): 12 | return x.view(x.size(0), -1) 13 | 14 | 15 | def create_net(net_cls, pretrained: bool): 16 | net = net_cls(pretrained=pretrained) 17 | return net 18 | 19 | 20 | def get_head(nf: int, n_classes): 21 | model = nn.Sequential( 22 | nn.AdaptiveAvgPool2d(1), 23 | Flatten(), 24 | # nn.Dropout(p=0.25), 25 | nn.Linear(nf, n_classes) 26 | ) 27 | return model 28 | 29 | 30 | def init_weights(model): 31 | for i, module in enumerate(model): 32 | if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)): 33 | if module.weight is not None: 34 | nn.init.uniform_(module.weight) 35 | if module.bias is not None: 36 | nn.init.constant_(module.bias, 0) 37 | if isinstance(module, (nn.Linear, nn.Conv2d, nn.Conv1d)): 38 | if getattr(module, "weight_v", None) is not None: 39 | print("Initing linear with weight normalization") 40 | assert model[i].weight_g is not None 41 | else: 42 | nn.init.kaiming_normal_(module.weight) 43 | print("Initing linear") 44 | if module.bias is not None: 45 | nn.init.constant_(module.bias, 0) 46 | return model 47 | 48 | 49 | def get_seresnet_model(arch: str = "se_resnext50_32x4d", n_classes: int = 10, pretrained: bool = False): 50 | full = pretrainedmodels.__dict__[arch]( 51 | pretrained='imagenet' if pretrained else None) 52 | model = nn.Sequential( 53 | nn.Sequential(full.layer0, full.layer1, full.layer2, full.layer3[:3]), 54 | nn.Sequential(full.layer3[3:], full.layer4), 55 | get_head(2048, n_classes)) 56 | print(" | ".join([ 57 | "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model])) 58 | return init_weights(model) 59 | 60 | 61 | def get_densenet_model(arch: str = "densenet169", n_classes: int = 10, pretrained: bool = False): 62 | full = pretrainedmodels.__dict__[arch]( 63 | pretrained='imagenet' if pretrained else None) 64 | print(len(full.features)) 65 | model = nn.Sequential( 66 | nn.Sequential(*full.features[:8]), 67 | nn.Sequential(*full.features[8:]), 68 | get_head(full.features[-1].num_features, n_classes)) 69 | print(" | ".join([ 70 | "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model])) 71 | return init_weights(model) 72 | 73 | 74 | class Swish(nn.Module): 75 | def forward(self, x): 76 | """ Swish activation function """ 77 | return x * torch.sigmoid(x) 78 | -------------------------------------------------------------------------------- /pytorch_helper_bot/helperbot/metrics.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Tuple 3 | 4 | import torch 5 | import numpy as np 6 | from sklearn.metrics import fbeta_score, roc_auc_score 7 | from sklearn.exceptions import UndefinedMetricWarning 8 | 9 | 10 | class Metric: 11 | name = "metric" 12 | 13 | def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]: 14 | raise NotImplementedError() 15 | 16 | 17 | class FBeta(Metric): 18 | """FBeta for binary targets""" 19 | name = "fbeta" 20 | 21 | def __init__(self, step, beta=2, average="binary"): 22 | self.step = step 23 | self.beta = beta 24 | self.average = average 25 | 26 | def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]: 27 | best_fbeta, best_thres = self.find_best_fbeta_threshold( 28 | truth.numpy(), torch.sigmoid(pred).numpy(), 29 | step=self.step, beta=self.beta) 30 | return best_fbeta * -1, f"{best_fbeta:.4f} @ {best_thres:.2f}" 31 | 32 | def find_best_fbeta_threshold(self, truth, probs, beta=2, step=0.05): 33 | best, best_thres = 0, -1 34 | with warnings.catch_warnings(): 35 | warnings.simplefilter('ignore', category=UndefinedMetricWarning) 36 | for thres in np.arange(step, 1, step): 37 | current = fbeta_score( 38 | truth, (probs >= thres).astype("int8"), 39 | beta=beta, average=self.average) 40 | if current > best: 41 | best = current 42 | best_thres = thres 43 | return best, best_thres 44 | 45 | 46 | class AUC(Metric): 47 | """AUC for binary targets""" 48 | name = "auc" 49 | 50 | def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]: 51 | auc_score = roc_auc_score( 52 | truth.numpy(), torch.sigmoid(pred).numpy()) 53 | return auc_score * -1, f"{auc_score * 100:.2f}" 54 | 55 | 56 | class Top1Accuracy(Metric): 57 | name = "accuracy" 58 | 59 | def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]: 60 | correct = torch.sum( 61 | truth.view(-1) == torch.argmax(pred, dim=-1).view(-1)).item() 62 | total = truth.view(-1).size(0) 63 | accuracy = (correct / total) 64 | return accuracy * -1, f"{accuracy * 100:.2f}%" 65 | 66 | 67 | class TopKAccuracy(Metric): 68 | def __init__(self, k=1): 69 | self.name = f"top_{k}_accuracy" 70 | self.k = k 71 | 72 | def __call__(self, truth: torch.Tensor, pred: torch.Tensor) -> Tuple[float, str]: 73 | with torch.no_grad(): 74 | _, pred = pred.topk(self.k, dim=1, largest=True, sorted=True) 75 | pred = pred.t() 76 | correct = pred.eq( 77 | truth.view(1, -1).expand_as(pred) 78 | ).view(-1).float().sum(0, keepdim=True) 79 | accuracy = correct.mul_(100.0 / truth.size(0)).item() 80 | return accuracy * -1, f"{accuracy:.2f}%" 81 | -------------------------------------------------------------------------------- /imet/make_folds.py: -------------------------------------------------------------------------------- 1 | import random 2 | import argparse 3 | from collections import defaultdict, Counter 4 | from typing import Dict 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | from .dataset import DATA_ROOT 11 | from .main import CACHE_DIR 12 | from .utils import ON_KAGGLE 13 | 14 | N_CLASSES = 1103 15 | 16 | 17 | def expand_labels(): 18 | print("Expanding labels...") 19 | df = pd.read_csv(DATA_ROOT / 'train.csv') 20 | df_label_names = pd.read_csv(DATA_ROOT / "labels.csv") 21 | labels = np.zeros((len(df), N_CLASSES)).astype("uint8") 22 | for i, row in tqdm(df.iterrows(), total=df.shape[0], disable=ON_KAGGLE): 23 | for label in row['attribute_ids'].split(' '): 24 | labels[i, int(label)] = 1 25 | df_labels = pd.DataFrame( 26 | labels, 27 | index=df.index, columns=df_label_names.attribute_name.values 28 | ) 29 | df = pd.concat([df[["id"]], df_labels], axis=1) 30 | df.to_pickle(str(CACHE_DIR / "train_expanded_labels.pickle")) 31 | return df 32 | 33 | 34 | def make_folds(n_folds: int, min_occurence: int = 30) -> pd.DataFrame: 35 | from iterstrat.ml_stratifiers import MultilabelStratifiedKFold 36 | try: 37 | df = pd.read_pickle(DATA_ROOT / "train_expanded_labels.pickle") 38 | except: 39 | df = expand_labels() 40 | skf = MultilabelStratifiedKFold( 41 | n_splits=n_folds, random_state=42, shuffle=True) 42 | print("Creating folds...") 43 | labels_to_use = (np.sum(df.iloc[:, 1:].values, axis=0) > min_occurence) 44 | empty_rows = np.sum(df.iloc[:, 1:].values[:, labels_to_use], axis=1) == 0 45 | print("Empty rows after truncating:", sum(empty_rows)) 46 | print("Eligible labels:", sum(labels_to_use)) 47 | df = df[~empty_rows] 48 | folds = np.array([-1] * len(df)) 49 | for fold, (_, valid_idx) in enumerate(skf.split(df[["id"]], df.iloc[:, 1:].values[:, labels_to_use])): 50 | folds[valid_idx] = fold 51 | df['fold'] = folds 52 | return df 53 | 54 | 55 | def make_folds_reference(n_folds: int) -> pd.DataFrame: 56 | df = pd.read_csv(DATA_ROOT / 'train.csv') 57 | cls_counts = Counter(cls for classes in df['attribute_ids'].str.split() 58 | for cls in classes) 59 | fold_cls_counts: Dict = defaultdict(int) 60 | folds = [-1] * len(df) 61 | for item in tqdm(df.sample(frac=1, random_state=42, disable=ON_KAGGLE).itertuples(), 62 | total=len(df)): 63 | cls = min(item.attribute_ids.split(), key=lambda cls: cls_counts[cls]) 64 | fold_counts = [(f, fold_cls_counts[f, cls]) for f in range(n_folds)] 65 | min_count = min([count for _, count in fold_counts]) 66 | random.seed(item.Index) 67 | fold = random.choice([f for f, count in fold_counts 68 | if count == min_count]) 69 | folds[item.Index] = fold 70 | for cls in item.attribute_ids.split(): 71 | fold_cls_counts[fold, cls] += 1 72 | df['fold'] = folds 73 | return df 74 | 75 | 76 | def main(): 77 | parser = argparse.ArgumentParser() 78 | parser.add_argument('--n-folds', type=int, default=10) 79 | args = parser.parse_args() 80 | df = make_folds(n_folds=args.n_folds) 81 | df.to_pickle(CACHE_DIR / 'folds.pkl') 82 | 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /pytorch_helper_bot/helperbot/callbacks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class Callback: 6 | def on_batch_inputs(self, bot, input_tensors, targets): 7 | return input_tensors, targets 8 | 9 | def on_epoch_ends(self, bot, epoch): 10 | return 11 | 12 | def on_step_ends(self, bot): 13 | return 14 | 15 | 16 | class MixUpCallback(Callback): 17 | """Assumes the first dimension is batch. 18 | 19 | Reference: https://github.com/fastai/fastai/blob/master/fastai/callbacks/mixup.py 20 | """ 21 | 22 | def __init__(self, alpha: float = 0.4, softmax_target: bool = False): 23 | super().__init__() 24 | self.alpha = alpha 25 | self.softmax_target = softmax_target 26 | 27 | def on_batch_inputs(self, bot, input_tensors, targets): 28 | batch = input_tensors[0] 29 | permuted_idx = torch.randperm(batch.size(0)).to(batch.device) 30 | lambd = np.random.beta(self.alpha, self.alpha, batch.size(0)) 31 | lambd = np.concatenate( 32 | [lambd[:, np.newaxis], 1-lambd[:, np.newaxis]], axis=1 33 | ).max(axis=1) 34 | # Create the tensor and expand (for batch inputs) 35 | lambd_tensor = batch.new(lambd).view( 36 | -1, *[1 for _ in range(len(batch.size())-1)] 37 | ).expand(-1, *batch.shape[1:]) 38 | # Combine input batch 39 | new_batch = (batch * lambd_tensor + 40 | batch[permuted_idx] * (1-lambd_tensor)) 41 | # Create the tensor and expand (for target) 42 | lambd_tensor = batch.new(lambd).view( 43 | -1, *[1 for _ in range(len(targets.size())-1)] 44 | ).expand(-1, *targets.shape[1:]) 45 | # Combine targets 46 | if self.softmax_target: 47 | new_targets = torch.stack([ 48 | targets.float(), targets[permuted_idx].float(), lambd_tensor 49 | ], dim=1) 50 | else: 51 | new_targets = ( 52 | targets * lambd_tensor + 53 | targets[permuted_idx] * (1-lambd_tensor) 54 | ) 55 | input_tensors[0] = new_batch 56 | return input_tensors, new_targets 57 | 58 | 59 | class LearningRateSchedulerCallback(Callback): 60 | def __init__(self, scheduler): 61 | super().__init__() 62 | self.scheduler = scheduler 63 | 64 | def on_step_ends(self, bot): 65 | self.scheduler.step() 66 | 67 | 68 | class StepwiseLinearPropertyScheduler(Callback): 69 | def __init__(self, target_obj, property_name, start_val, end_val, decay_start_step, decay): 70 | self.target_obj = target_obj 71 | self.property_name = property_name 72 | self.start_val = start_val 73 | self.end_val = end_val 74 | self.decay_start_step = decay_start_step 75 | self.decay = decay 76 | 77 | def on_step_ends(self, bot): 78 | if bot.step % 200 == 0: 79 | bot.logger.info( 80 | "%s %s %.4f", 81 | self.target_obj.__class__.__name__, 82 | self.property_name, 83 | getattr(self.target_obj, self.property_name)) 84 | new_val = self.get_value(bot) 85 | setattr(self.target_obj, self.property_name, new_val) 86 | 87 | def get_value(self, bot): 88 | if self.start_val == self.end_val or bot.step <= self.decay_start_step: 89 | return self.start_val 90 | change = (self.end_val - self.start_val) * min( 91 | ((bot.step - self.decay_start_step) * self.decay), 1 92 | ) 93 | return self.start_val + change 94 | -------------------------------------------------------------------------------- /imet/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | import cv2 5 | from PIL import Image 6 | from torchvision.transforms import ( 7 | Normalize, Compose, Resize) 8 | from albumentations import ( 9 | Compose, HorizontalFlip, Rotate, HueSaturationValue, 10 | RandomBrightness, RandomContrast, RandomGamma, JpegCompression, GaussNoise, 11 | Cutout, MedianBlur, Blur, OneOf, IAAAdditiveGaussianNoise, OpticalDistortion, 12 | GridDistortion, IAAPiecewiseAffine, ShiftScaleRotate, CenterCrop, 13 | RandomCrop, CenterCrop, Resize, PadIfNeeded, RandomScale, SmallestMaxSize 14 | ) 15 | import albumentations.augmentations.functional as F 16 | from albumentations.pytorch.transforms import ToTensor 17 | 18 | cv2.setNumThreads(0) 19 | 20 | 21 | class RandomCropIfNeeded(RandomCrop): 22 | """Take from: 23 | https://www.kaggle.com/c/imet-2019-fgvc6/discussion/94687 24 | """ 25 | 26 | def __init__(self, height, width, always_apply=False, p=1.0): 27 | super().__init__(always_apply, p) 28 | self.height = height 29 | self.width = width 30 | 31 | def apply(self, img, h_start=0, w_start=0, **params): 32 | h, w, _ = img.shape 33 | return F.random_crop(img, min(self.height, h), min(self.width, w), h_start, w_start) 34 | 35 | 36 | def get_train_transform(border_mode, size=320): 37 | return Compose([ 38 | # PadIfNeeded(256, 256, border_mode=cv2.BORDER_CONSTANT., value=0, p=1.), 39 | # ShiftScaleRotate( 40 | # shift_limit=0.0625, scale_limit=0.1, rotate_limit=30, 41 | # border_mode=cv2.BORDER_REFLECT_101, p=1.), 42 | # RandomScale(scale_limit=0.125), 43 | # HorizontalFlip(p=0.5), 44 | # RandomContrast(limit=0.2, p=0.5), 45 | # RandomGamma(gamma_limit=(80, 120), p=0.5), 46 | # RandomBrightness(limit=0.2, p=0.5), 47 | # Rotate(limit=20, border_mode=border_mode, p=1.), 48 | HorizontalFlip(p=0.5), 49 | OneOf([ 50 | RandomBrightness(0.1, p=1), 51 | RandomContrast(0.1, p=1), 52 | ], p=0.3), 53 | ShiftScaleRotate(shift_limit=0.1, scale_limit=0.0, 54 | rotate_limit=15, p=0.3), 55 | IAAAdditiveGaussianNoise(p=0.3), 56 | RandomCropIfNeeded(size * 2, size * 2), 57 | Resize(size, size), 58 | # HueSaturationValue(hue_shift_limit=5, sat_shift_limit=20, 59 | # val_shift_limit=10, p=1.), 60 | # OneOf([ 61 | # OpticalDistortion(p=0.3), 62 | # GridDistortion(p=0.1), 63 | # IAAPiecewiseAffine(p=0.3), 64 | # ], p=0.2), 65 | # OneOf([ 66 | # IAAAdditiveGaussianNoise( 67 | # loc=0, scale=(1., 6.75), per_channel=False, p=0.3), 68 | # GaussNoise(var_limit=(5.0, 20.0), p=0.6), 69 | # ], p=0.5), 70 | # Cutout(num_holes=4, max_h_size=30, max_w_size=50, p=0.75), 71 | # JpegCompression(quality_lower=50, quality_upper=100, p=0.5) 72 | ]) 73 | 74 | 75 | def get_test_transform(size=320, flip=True): 76 | transformations = [ 77 | # SmallestMaxSize(320), 78 | # RandomScale(scale_limit=0.125), 79 | # PadIfNeeded(256, 256, border_mode=cv2.BORDER_REFLECT_101, value=0, p=1.), 80 | # OneOf([ 81 | # RandomCrop(256, 256, p=0.9), 82 | # CenterCrop(256, 256, p=0.1), 83 | # ], p=1.), 84 | RandomCropIfNeeded(size * 2, size * 2), 85 | Resize(size, size), 86 | ] 87 | if flip: 88 | transformations.append(HorizontalFlip(p=1.)) 89 | return Compose(transformations) 90 | 91 | 92 | tensor_transform = ToTensor(normalize=dict( 93 | mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 94 | ) 95 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # imet-collection-2019 2 | 3 | A fairly generic solution to iMet Collection 2019 - FGVC6 on Kaggle 4 | 5 | Credit: This solution is built upon [Konstantin Lopuhin's public baseline](https://github.com/lopuhin/kaggle-imet-2019). 6 | 7 | ## Requirements 8 | 9 | Directly taken from [requirements.txt](requirements.txt) (they're also in [setup.py](setup.py)). 10 | 11 | * torch>=1.0.0 12 | * albumentations>=0.2.3 13 | * pretrainedmodels>=0.7.4 14 | * tqdm>=4.29.1 15 | * scikit-learn>=0.21.2 16 | * pandas>=0.24.0 17 | * helperbot>=0.1.2 18 | 19 | `helperbot` is included in this repo via `git subtree`. Install it after PyTorch and before everythin else: 20 | 21 | ``` 22 | cd pytorch_helper_bot && pip install . 23 | ``` 24 | 25 | ## Environments 26 | 27 | I trained all my models using Kaggle Kernel. Example public kernels can be found at: 28 | 29 | * [Trainer](https://www.kaggle.com/ceshine/imet-trainer) 30 | * [Validation (with TTA)](https://www.kaggle.com/ceshine/imet-validation-kernel-public) 31 | * [Inference - 256x256](https://www.kaggle.com/ceshine/imet-inference-kernel-public?scriptVersionId=16663008) - Private score *0.614* with 3 models (already in bronze range). 32 | * [Inference - 320x320](https://www.kaggle.com/ceshine/imet-inference-kernel-public?scriptVersionId=17048642) - Private score *0.622* with 3 models. 33 | 34 | One drawback of Kaggle Kernel is that it's hard to control the version of PyTorch. My models trained during competition were trained with PyTorch 1.0, but the model cannot be loaded in the post-competition kernels due to this [compatibility issue](https://github.com/pytorch/pytorch/issues/20756). (The issue was fixed in the PyTorch master branch, but has not been released yet at the time of writing.) 35 | 36 | To avoid this kind of hassles in the future, I started to keep two versions of trained model: one which contains fully pickled model using `torch.save(model, f'final_{fold}.pth')` to speed up experiment iteration; and one which has only model weights and the name of the architecture as a failover using `torch.save([args.arch, model.state_dict()], f'failover_{args.arch}_{fold}.pth')`. 37 | 38 | ### Freezing the first three (Resnet) layers 39 | 40 | The 10th place solution suggested that only training the last two (Renset) layers is sufficient to get good accuracies. This technique allow us to training se-resnext101 models in Kaggle Kernel with 320x320 images faster. (Otherwise the models will be underfit and underperformed relative to se-resnext50). 41 | 42 | The code that freezes the first three layers lives in the [*freezing*](https://github.com/ceshine/imet-collection-2019/tree/freezing) branch. 43 | 44 | * [3-model se-resnext101 Inference](https://www.kaggle.com/ceshine/imet-inference-kernel-public?scriptVersionId=17497470) - private 0.625 45 | * [8-model se-resnext101 Inference](https://www.kaggle.com/ceshine/imet-inference-kernel-public?scriptVersionId=17498665) - private 0.629 (near silver range) 46 | 47 | ## Instructions 48 | 49 | ### Making K-Fold validation sets 50 | 51 | Example: 52 | 53 | ``` 54 | python -m imet.make_folds --n-folds 10 55 | ``` 56 | 57 | This will create a `folds.pkl` that you can reuse later. 58 | 59 | ### Training model 60 | 61 | Example: 62 | 63 | ``` 64 | python -m imet.main train --batch-size 48 --epochs 11 --fold 0 --arch seresnext101 --early-stop 4 65 | ``` 66 | 67 | ### Evaluating model (with TTA) 68 | 69 | Example: 70 | 71 | ``` 72 | python -m imet.main validate --fold 0 --batch-size 256 --tta 4 --model . 73 | ``` 74 | 75 | ### Making Predictions (with TTA) 76 | 77 | Example: 78 | 79 | ``` 80 | python -m imet.main predict_test --batch-size 256 --fold 0 --tta 5 --model ./seresnext50/ 81 | ``` 82 | 83 | Then create a submission file (this one only uses predictions from three models): 84 | 85 | ``` 86 | python -m imet.make_submission test_0 test_1 test_2 --threshold 0.09 87 | ``` -------------------------------------------------------------------------------- /imet/partialconv2d.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # BSD 3-Clause License 3 | # 4 | # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Author & Contact: Guilin Liu (guilinl@nvidia.com) 7 | ############################################################################### 8 | 9 | import torch 10 | import torch.nn.functional as F 11 | from torch import nn, cuda 12 | from torch.autograd import Variable 13 | 14 | 15 | class PartialConv2d(nn.Conv2d): 16 | def __init__(self, *args, **kwargs): 17 | 18 | # whether the mask is multi-channel or not 19 | if 'multi_channel' in kwargs: 20 | self.multi_channel = kwargs['multi_channel'] 21 | kwargs.pop('multi_channel') 22 | else: 23 | self.multi_channel = False 24 | 25 | if 'return_mask' in kwargs: 26 | self.return_mask = kwargs['return_mask'] 27 | kwargs.pop('return_mask') 28 | else: 29 | self.return_mask = False 30 | 31 | super(PartialConv2d, self).__init__(*args, **kwargs) 32 | 33 | if self.multi_channel: 34 | self.weight_maskUpdater = torch.ones( 35 | self.out_channels, self.in_channels, self.kernel_size[0], self.kernel_size[1]) 36 | else: 37 | self.weight_maskUpdater = torch.ones( 38 | 1, 1, self.kernel_size[0], self.kernel_size[1]) 39 | 40 | self.slide_winsize = self.weight_maskUpdater.shape[1] * \ 41 | self.weight_maskUpdater.shape[2] * self.weight_maskUpdater.shape[3] 42 | 43 | self.last_size = (None, None, None, None) 44 | self.update_mask = None 45 | self.mask_ratio = None 46 | 47 | def forward(self, input, mask_in=None): 48 | assert len(input.shape) == 4 49 | if mask_in is not None or self.last_size != tuple(input.shape): 50 | self.last_size = tuple(input.shape) 51 | 52 | with torch.no_grad(): 53 | if self.weight_maskUpdater.type() != input.type(): 54 | self.weight_maskUpdater = self.weight_maskUpdater.to(input) 55 | 56 | if mask_in is None: 57 | # if mask is not provided, create a mask 58 | if self.multi_channel: 59 | mask = torch.ones( 60 | input.data.shape[0], input.data.shape[1], input.data.shape[2], input.data.shape[3]).to(input) 61 | else: 62 | mask = torch.ones( 63 | 1, 1, input.data.shape[2], input.data.shape[3]).to(input) 64 | else: 65 | mask = mask_in 66 | 67 | self.update_mask = F.conv2d(mask, self.weight_maskUpdater, bias=None, 68 | stride=self.stride, padding=self.padding, dilation=self.dilation, groups=1) 69 | 70 | self.mask_ratio = self.slide_winsize/(self.update_mask + 1e-8) 71 | # self.mask_ratio = torch.max(self.update_mask)/(self.update_mask + 1e-8) 72 | self.update_mask = torch.clamp(self.update_mask, 0, 1) 73 | self.mask_ratio = torch.mul(self.mask_ratio, self.update_mask) 74 | 75 | # if self.update_mask.type() != input.type() or self.mask_ratio.type() != input.type(): 76 | # self.update_mask.to(input) 77 | # self.mask_ratio.to(input) 78 | 79 | raw_out = super(PartialConv2d, self).forward( 80 | torch.mul(input, mask) if mask_in is not None else input) 81 | 82 | if self.bias is not None: 83 | bias_view = self.bias.view(1, self.out_channels, 1, 1) 84 | output = torch.mul(raw_out - bias_view, 85 | self.mask_ratio) + bias_view 86 | output = torch.mul(output, self.update_mask) 87 | else: 88 | output = torch.mul(raw_out, self.mask_ratio) 89 | 90 | if self.return_mask: 91 | return output, self.update_mask 92 | else: 93 | return output 94 | -------------------------------------------------------------------------------- /pytorch_helper_bot/helperbot/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | from torch.optim.lr_scheduler import _LRScheduler 2 | from torch.optim import Optimizer 3 | 4 | 5 | class BaseLRScheduler(_LRScheduler): 6 | def __init__(self, optimizer, last_epoch=-1): 7 | """Intentionally not calling super().__init__()""" 8 | if not isinstance(optimizer, Optimizer): 9 | flag = False 10 | try: 11 | from apex.fp16_utils.fp16_optimizer import FP16_Optimizer 12 | if isinstance(optimizer, FP16_Optimizer): 13 | flag = True 14 | except ModuleNotFoundError: 15 | pass 16 | if not flag: 17 | raise TypeError('{} is not an Optimizer'.format( 18 | type(optimizer).__name__)) 19 | self.optimizer = optimizer 20 | if last_epoch == -1: 21 | for group in optimizer.param_groups: 22 | group.setdefault('initial_lr', group['lr']) 23 | else: 24 | for i, group in enumerate(optimizer.param_groups): 25 | if 'initial_lr' not in group: 26 | raise KeyError("param 'initial_lr' is not specified " 27 | "in param_groups[{}] when resuming an optimizer".format(i)) 28 | self.base_lrs = list( 29 | map(lambda group: group['initial_lr'], optimizer.param_groups)) 30 | self.step(last_epoch + 1) 31 | self.last_epoch = last_epoch 32 | 33 | 34 | class TriangularLR(BaseLRScheduler): 35 | def __init__(self, optimizer, max_mul, ratio, steps_per_cycle, decay=1, last_epoch=-1): 36 | self.max_mul = max_mul - 1 37 | self.turning_point = steps_per_cycle // (ratio + 1) 38 | self.steps_per_cycle = steps_per_cycle 39 | self.decay = decay 40 | self.history = [] 41 | super().__init__(optimizer, last_epoch) 42 | 43 | def get_lr(self): 44 | residual = self.last_epoch % self.steps_per_cycle 45 | multiplier = self.decay ** (self.last_epoch // self.steps_per_cycle) 46 | if residual <= self.turning_point: 47 | multiplier *= self.max_mul * (residual / self.turning_point) 48 | else: 49 | multiplier *= self.max_mul * ( 50 | (self.steps_per_cycle - residual) / 51 | (self.steps_per_cycle - self.turning_point)) 52 | new_lr = [ 53 | lr * (1 + multiplier) / (self.max_mul + 1) for lr in self.base_lrs] 54 | self.history.append(new_lr) 55 | return new_lr 56 | 57 | 58 | class GradualWarmupScheduler(BaseLRScheduler): 59 | """ Gradually warm-up(increasing) learning rate in optimizer. 60 | Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'. 61 | Source: https://github.com/ildoonet/pytorch-gradual-warmup-lr/blob/master/warmup_scheduler/scheduler.py 62 | Args: 63 | optimizer (Optimizer): Wrapped optimizer. 64 | multiplier: target learning rate = base lr * multiplier 65 | total_epoch: target learning rate is reached at total_epoch, gradually 66 | after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau) 67 | """ 68 | 69 | def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None): 70 | self.multiplier = multiplier 71 | if self.multiplier <= 1.: 72 | raise ValueError('multiplier should be greater than 1.') 73 | self.total_epoch = total_epoch 74 | self.after_scheduler = after_scheduler 75 | self.finished = False 76 | super().__init__(optimizer) 77 | 78 | def get_lr(self): 79 | if self.last_epoch > self.total_epoch: 80 | if self.after_scheduler: 81 | if not self.finished: 82 | self.after_scheduler.base_lrs = self.base_lrs 83 | self.finished = True 84 | return self.after_scheduler.get_lr() 85 | return self.base_lrs 86 | return [base_lr / self.multiplier * ((self.multiplier - 1.) * self.last_epoch / self.total_epoch + 1.) for base_lr in self.base_lrs] 87 | 88 | def step(self, epoch=None): 89 | if self.finished and self.after_scheduler: 90 | return self.after_scheduler.step(epoch) 91 | else: 92 | return super(GradualWarmupScheduler, self).step(epoch) 93 | -------------------------------------------------------------------------------- /imet/models.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import torch 4 | import numpy as np 5 | import pretrainedmodels 6 | from torch import nn 7 | from torch.nn import functional as F 8 | import torchvision.models as M 9 | # from efficientnet_pytorch import EfficientNet 10 | 11 | from . import seresnet_partial 12 | from .utils import ON_KAGGLE 13 | 14 | 15 | class Flatten(nn.Module): 16 | def forward(self, x): 17 | return x.view(x.size(0), -1) 18 | 19 | 20 | def create_net(net_cls, pretrained: bool): 21 | if ON_KAGGLE and pretrained: 22 | net = net_cls() 23 | model_name = net_cls.__name__ 24 | weights_path = f'../input/{model_name}/{model_name}.pth' 25 | net.load_state_dict(torch.load(weights_path)) 26 | else: 27 | net = net_cls(pretrained=pretrained) 28 | return net 29 | 30 | 31 | def get_head(nf: int, n_classes): 32 | model = nn.Sequential( 33 | nn.ReLU(), 34 | nn.AdaptiveAvgPool2d(1), 35 | Flatten(), 36 | # nn.BatchNorm1d(nf), 37 | nn.Dropout(p=0.25), 38 | nn.Linear(nf, n_classes) 39 | # nn.BatchNorm1d(nf), 40 | # nn.Dropout(p=0.25), 41 | # nn.Linear(nf, 1024), 42 | # nn.BatchNorm1d(1024), 43 | # nn.Dropout(p=0.25), 44 | # nn.Linear(1024, n_classes) 45 | ) 46 | for i, module in enumerate(model): 47 | if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)): 48 | if module.weight is not None: 49 | nn.init.uniform_(module.weight) 50 | if module.bias is not None: 51 | nn.init.constant_(module.bias, 0) 52 | if isinstance(module, nn.Linear): 53 | if getattr(module, "weight_v", None) is not None: 54 | print("Initing linear with weight normalization") 55 | assert model[i].weight_g is not None 56 | else: 57 | nn.init.kaiming_normal_(module.weight) 58 | print("Initing linear") 59 | if module.bias is not None: 60 | nn.init.constant_(module.bias, 0) 61 | return model 62 | 63 | 64 | def get_seresnet_model(arch: str = "se_resnext101_32x4d", n_classes: int = 1103, pretrained=True): 65 | full = pretrainedmodels.__dict__[arch]( 66 | pretrained='imagenet' if pretrained else None) 67 | model = nn.Sequential( 68 | nn.Sequential(full.layer0, full.layer1, full.layer2, full.layer3[:3]), 69 | nn.Sequential(full.layer3[3:], full.layer4), 70 | get_head(2048, n_classes)) 71 | print(" | ".join([ 72 | "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model])) 73 | return model 74 | 75 | 76 | def get_seresnet_partial_model(arch: str = "se_resnext101_32x4d", n_classes: int = 1103, pretrained=True): 77 | full = seresnet_partial.__dict__[arch]( 78 | pretrained='imagenet' if pretrained else None) 79 | model = nn.Sequential( 80 | nn.Sequential(full.layer0, full.layer1, full.layer2, full.layer3[:3]), 81 | nn.Sequential(full.layer3[3:], full.layer4), 82 | get_head(2048, n_classes)) 83 | print(" | ".join([ 84 | "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model])) 85 | return model 86 | 87 | 88 | def get_densenet_model(arch: str = "densenet169", n_classes: int = 1103, pretrained=True): 89 | full = pretrainedmodels.__dict__[arch]( 90 | pretrained='imagenet' if pretrained else None) 91 | print(len(full.features)) 92 | model = nn.Sequential( 93 | nn.Sequential(*full.features[:8]), 94 | nn.Sequential(*full.features[8:]), 95 | get_head(full.features[-1].num_features, n_classes)) 96 | print(" | ".join([ 97 | "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model])) 98 | return model 99 | 100 | 101 | class Swish(nn.Module): 102 | def forward(self, x): 103 | """ Swish activation function """ 104 | return x * torch.sigmoid(x) 105 | 106 | 107 | # def get_efficientnet(arch: str = "efficientnet-b3", n_classes: int = 1103, pretrained=True): 108 | # if pretrained == True: 109 | # base_model = EfficientNet.from_pretrained(arch) 110 | # else: 111 | # base_model = EfficientNet.from_name(arch) 112 | # # print(base_model) 113 | # print(len(base_model._blocks)) 114 | # model = nn.Sequential( 115 | # nn.Sequential( 116 | # base_model._conv_stem, 117 | # base_model._bn0, 118 | # Swish(), 119 | # *base_model._blocks[:20] 120 | # ), 121 | # nn.Sequential(*base_model._blocks[20:]), 122 | # nn.Sequential( 123 | # base_model._conv_head, 124 | # base_model._bn1, 125 | # Swish(), 126 | # *get_head(base_model._fc.in_features, n_classes)[1:], 127 | # ) 128 | # ) 129 | # print(" | ".join([ 130 | # "{:,d}".format(np.sum([p.numel() for p in x.parameters()])) for x in model])) 131 | # return model 132 | -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from pathlib import Path 4 | from dataclasses import dataclass 5 | 6 | import pandas as pd 7 | import torch 8 | from torch import nn, cuda 9 | from torch.utils.data import DataLoader 10 | from torch.optim.lr_scheduler import CosineAnnealingLR 11 | from helperbot import ( 12 | BaseBot, WeightDecayOptimizerWrapper, TriangularLR, 13 | GradualWarmupScheduler, LearningRateSchedulerCallback, 14 | MixUpCallback, Top1Accuracy, TopKAccuracy 15 | ) 16 | from helperbot.loss import MixUpSoftmaxLoss 17 | 18 | from models import get_seresnet_model, get_densenet_model 19 | from dataset import TrainDataset, N_CLASSES, DATA_ROOT, build_dataframe_from_folder 20 | from transforms import train_transform, test_transform 21 | 22 | try: 23 | from apex import amp 24 | APEX_AVAILABLE = True 25 | except ModuleNotFoundError: 26 | APEX_AVAILABLE = False 27 | 28 | CACHE_DIR = Path('./data/cache/') 29 | CACHE_DIR.mkdir(exist_ok=True, parents=True) 30 | MODEL_DIR = Path('./data/cache/') 31 | MODEL_DIR.mkdir(exist_ok=True, parents=True) 32 | 33 | NO_DECAY = [ 34 | 'bias', 'bn1.weight', 'bn2.weight', 'bn3.weight' 35 | ] 36 | 37 | 38 | def make_loader(args, ds_class, df: pd.DataFrame, image_transform, drop_last=False, shuffle=False) -> DataLoader: 39 | return DataLoader( 40 | ds_class(df, image_transform, debug=args.debug), 41 | shuffle=shuffle, 42 | batch_size=args.batch_size, 43 | num_workers=args.workers, 44 | drop_last=drop_last 45 | ) 46 | 47 | 48 | @dataclass 49 | class ImageClassificationBot(BaseBot): 50 | checkpoint_dir: Path = CACHE_DIR / "model_cache/" 51 | log_dir: Path = MODEL_DIR / "logs/" 52 | 53 | def __post_init__(self): 54 | super().__post_init__() 55 | self.loss_format = "%.6f" 56 | self.metrics = (Top1Accuracy(), TopKAccuracy(k=3)) 57 | self.monitor_metric = "accuracy" 58 | 59 | def extract_prediction(self, x): 60 | return x 61 | 62 | 63 | def train_from_scratch(args, model, train_loader, valid_loader, criterion): 64 | n_steps = len(train_loader) * args.epochs 65 | optimizer = WeightDecayOptimizerWrapper( 66 | torch.optim.Adam( 67 | [ 68 | { 69 | 'params': [p for n, p in model.named_parameters() 70 | if not any(nd in n for nd in NO_DECAY)], 71 | }, 72 | { 73 | 'params': [p for n, p in model.named_parameters() 74 | if any(nd in n for nd in NO_DECAY)], 75 | } 76 | ], 77 | weight_decay=0, 78 | lr=args.lr 79 | ), 80 | weight_decay=[1e-1, 0], 81 | change_with_lr=True 82 | ) 83 | if args.debug: 84 | print( 85 | "No decay:", 86 | [n for n, p in model.named_parameters() 87 | if any(nd in n for nd in NO_DECAY)] 88 | ) 89 | if args.amp: 90 | if not APEX_AVAILABLE: 91 | raise ValueError("Apex is not installed!") 92 | model, optimizer = amp.initialize( 93 | model, optimizer, opt_level=args.amp 94 | ) 95 | 96 | callbacks = [ 97 | LearningRateSchedulerCallback( 98 | # TriangularLR( 99 | # optimizer, 100, ratio=4, steps_per_cycle=n_steps 100 | # ) 101 | GradualWarmupScheduler( 102 | optimizer, 100, len(train_loader), 103 | after_scheduler=CosineAnnealingLR( 104 | optimizer, n_steps - len(train_loader) 105 | ) 106 | ) 107 | ) 108 | ] 109 | if args.mixup_alpha: 110 | callbacks.append(MixUpCallback( 111 | alpha=args.mixup_alpha, softmax_target=True)) 112 | bot = ImageClassificationBot( 113 | model=model, train_loader=train_loader, 114 | val_loader=valid_loader, clip_grad=10., 115 | optimizer=optimizer, echo=True, 116 | criterion=criterion, 117 | avg_window=len(train_loader) // 5, 118 | callbacks=callbacks, 119 | pbar=True, use_tensorboard=True, 120 | use_amp=(args.amp != '') 121 | ) 122 | bot.train( 123 | n_steps, 124 | log_interval=len(train_loader) // 6, 125 | snapshot_interval=len(train_loader) // 2, 126 | # early_stopping_cnt=8, 127 | min_improv=1e-2, 128 | keep_n_snapshots=1 129 | ) 130 | bot.remove_checkpoints(keep=1) 131 | bot.load_model(bot.best_performers[0][1]) 132 | torch.save(bot.model.state_dict(), CACHE_DIR / 133 | f"final_weights.pth") 134 | bot.remove_checkpoints(keep=0) 135 | 136 | 137 | def main(): 138 | parser = argparse.ArgumentParser() 139 | arg = parser.add_argument 140 | arg('--batch-size', type=int, default=32) 141 | arg('--lr', type=float, default=2e-3) 142 | arg('--workers', type=int, default=4) 143 | arg('--epochs', type=int, default=5) 144 | arg('--mixup-alpha', type=float, default=0) 145 | arg('--arch', type=str, default='seresnext50') 146 | arg('--amp', type=str, default='') 147 | arg('--debug', action='store_true') 148 | args = parser.parse_args() 149 | 150 | train_dir = DATA_ROOT / 'train' 151 | valid_dir = DATA_ROOT / 'val' 152 | 153 | use_cuda = cuda.is_available() 154 | if args.arch == 'seresnext50': 155 | model = get_seresnet_model( 156 | arch="se_resnext50_32x4d", 157 | n_classes=N_CLASSES, pretrained=False) 158 | elif args.arch == 'seresnext101': 159 | model = get_seresnet_model( 160 | arch="se_resnext101_32x4d", 161 | n_classes=N_CLASSES, pretrained=False) 162 | elif args.arch.startswith("densenet"): 163 | model = get_densenet_model(arch=args.arch) 164 | else: 165 | raise ValueError("No such model") 166 | if use_cuda: 167 | model = model.cuda() 168 | criterion = MixUpSoftmaxLoss(nn.CrossEntropyLoss()) 169 | (CACHE_DIR / 'params.json').write_text( 170 | json.dumps(vars(args), indent=4, sort_keys=True)) 171 | 172 | df_train, class_map = build_dataframe_from_folder(train_dir) 173 | df_valid = build_dataframe_from_folder(valid_dir, class_map) 174 | 175 | train_loader = make_loader( 176 | args, TrainDataset, df_train, train_transform, drop_last=True, shuffle=True) 177 | valid_loader = make_loader( 178 | args, TrainDataset, df_valid, test_transform, shuffle=False) 179 | 180 | print(f'{len(train_loader.dataset):,} items in train, ' 181 | f'{len(valid_loader.dataset):,} in valid') 182 | 183 | train_from_scratch(args, model, train_loader, valid_loader, criterion) 184 | 185 | 186 | if __name__ == '__main__': 187 | main() 188 | -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/logs/colab_o0_bs64_e5.txt: -------------------------------------------------------------------------------- 1 | Initing linear 2 | 5,610,928 | 19,899,968 | 2,260,047 3 | 12,894 items in train, 500 in valid 4 | [[06/22/2019 11:23:00 AM]] SEED: 9293 5 | [[06/22/2019 11:23:00 AM]] # of parameters: 27,770,943 6 | [[06/22/2019 11:23:00 AM]] # of trainable parameters: 27,770,943 7 | [[06/22/2019 11:23:00 AM]] Optimizer Adam ( 8 | Parameter Group 0 9 | amsgrad: False 10 | betas: (0.9, 0.999) 11 | eps: 1e-08 12 | initial_lr: 0.005 13 | lr: 5e-05 14 | weight_decay: 0 15 | 16 | Parameter Group 1 17 | amsgrad: False 18 | betas: (0.9, 0.999) 19 | eps: 1e-08 20 | initial_lr: 0.005 21 | lr: 5e-05 22 | weight_decay: 0 23 | ) 24 | [[06/22/2019 11:23:00 AM]] Batches per epoch: 201 25 | [[06/22/2019 11:23:00 AM]] ====================Epoch 1==================== 26 | [[06/22/2019 11:23:25 AM]] Step 33: train 3.692032 lr: 8.134e-04 27 | [[06/22/2019 11:23:48 AM]] Step 66: train 1.930618 lr: 1.626e-03 28 | [[06/22/2019 11:24:12 AM]] Step 99: train 1.695598 lr: 2.439e-03 29 | 100% 8/8 [00:03<00:00, 2.77it/s] 30 | [[06/22/2019 11:24:16 AM]] Criterion loss: 2.293814 31 | [[06/22/2019 11:24:16 AM]] accuracy: 38.40% 32 | [[06/22/2019 11:24:16 AM]] top_3_accuracy: 67.00% 33 | [[06/22/2019 11:24:16 AM]] Snapshot metric -0.38400000 34 | [[06/22/2019 11:24:16 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.38400000_100.pth... 35 | [[06/22/2019 11:24:16 AM]] New low 36 | 37 | [[06/22/2019 11:24:39 AM]] Step 132: train 1.584172 lr: 3.251e-03 38 | [[06/22/2019 11:25:03 AM]] Step 165: train 1.500142 lr: 4.064e-03 39 | [[06/22/2019 11:25:27 AM]] Step 198: train 1.571352 lr: 4.877e-03 40 | 100% 8/8 [00:03<00:00, 2.86it/s] 41 | [[06/22/2019 11:25:32 AM]] Criterion loss: 2.362044 42 | [[06/22/2019 11:25:32 AM]] accuracy: 36.40% 43 | [[06/22/2019 11:25:32 AM]] top_3_accuracy: 70.40% 44 | [[06/22/2019 11:25:32 AM]] Snapshot metric -0.36400000 45 | [[06/22/2019 11:25:32 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.36400000_200.pth... 46 | [[06/22/2019 11:25:33 AM]] ====================Epoch 2==================== 47 | [[06/22/2019 11:25:56 AM]] Step 231: train 1.433132 lr: 4.986e-03 48 | [[06/22/2019 11:26:20 AM]] Step 264: train 1.347544 lr: 4.932e-03 49 | [[06/22/2019 11:26:44 AM]] Step 297: train 1.336156 lr: 4.837e-03 50 | 100% 8/8 [00:03<00:00, 2.94it/s] 51 | [[06/22/2019 11:26:50 AM]] Criterion loss: 1.654578 52 | [[06/22/2019 11:26:50 AM]] accuracy: 46.40% 53 | [[06/22/2019 11:26:50 AM]] top_3_accuracy: 79.40% 54 | [[06/22/2019 11:26:50 AM]] Snapshot metric -0.46400000 55 | [[06/22/2019 11:26:50 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.46400000_300.pth... 56 | [[06/22/2019 11:26:50 AM]] New low 57 | 58 | [[06/22/2019 11:27:12 AM]] Step 330: train 1.203578 lr: 4.703e-03 59 | [[06/22/2019 11:27:37 AM]] Step 363: train 1.156871 lr: 4.533e-03 60 | [[06/22/2019 11:28:01 AM]] Step 396: train 1.103429 lr: 4.329e-03 61 | 100% 8/8 [00:03<00:00, 2.88it/s] 62 | [[06/22/2019 11:28:08 AM]] Criterion loss: 0.921457 63 | [[06/22/2019 11:28:08 AM]] accuracy: 68.60% 64 | [[06/22/2019 11:28:08 AM]] top_3_accuracy: 90.80% 65 | [[06/22/2019 11:28:08 AM]] Snapshot metric -0.68600000 66 | [[06/22/2019 11:28:08 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.68600000_400.pth... 67 | [[06/22/2019 11:28:08 AM]] New low 68 | 69 | [[06/22/2019 11:28:09 AM]] ====================Epoch 3==================== 70 | [[06/22/2019 11:28:31 AM]] Step 429: train 1.057488 lr: 4.094e-03 71 | [[06/22/2019 11:28:55 AM]] Step 462: train 1.021254 lr: 3.834e-03 72 | [[06/22/2019 11:29:20 AM]] Step 495: train 1.042827 lr: 3.551e-03 73 | 100% 8/8 [00:03<00:00, 2.89it/s] 74 | [[06/22/2019 11:29:27 AM]] Criterion loss: 1.046612 75 | [[06/22/2019 11:29:27 AM]] accuracy: 66.20% 76 | [[06/22/2019 11:29:27 AM]] top_3_accuracy: 90.00% 77 | [[06/22/2019 11:29:27 AM]] Snapshot metric -0.66200000 78 | [[06/22/2019 11:29:27 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.66200000_500.pth... 79 | [[06/22/2019 11:29:48 AM]] Step 528: train 0.999482 lr: 3.250e-03 80 | [[06/22/2019 11:30:13 AM]] Step 561: train 0.969348 lr: 2.937e-03 81 | [[06/22/2019 11:30:38 AM]] Step 594: train 0.918918 lr: 2.617e-03 82 | 100% 8/8 [00:03<00:00, 2.81it/s] 83 | [[06/22/2019 11:30:45 AM]] Criterion loss: 0.816670 84 | [[06/22/2019 11:30:45 AM]] accuracy: 73.40% 85 | [[06/22/2019 11:30:45 AM]] top_3_accuracy: 92.00% 86 | [[06/22/2019 11:30:45 AM]] Snapshot metric -0.73400000 87 | [[06/22/2019 11:30:45 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.73400000_600.pth... 88 | [[06/22/2019 11:30:46 AM]] New low 89 | 90 | [[06/22/2019 11:30:48 AM]] ====================Epoch 4==================== 91 | [[06/22/2019 11:31:07 AM]] Step 627: train 0.849912 lr: 2.295e-03 92 | [[06/22/2019 11:31:32 AM]] Step 660: train 0.829518 lr: 1.976e-03 93 | [[06/22/2019 11:31:57 AM]] Step 693: train 0.766648 lr: 1.666e-03 94 | 100% 8/8 [00:03<00:00, 2.88it/s] 95 | [[06/22/2019 11:32:05 AM]] Criterion loss: 0.635795 96 | [[06/22/2019 11:32:05 AM]] accuracy: 79.80% 97 | [[06/22/2019 11:32:05 AM]] top_3_accuracy: 95.00% 98 | [[06/22/2019 11:32:05 AM]] Snapshot metric -0.79800000 99 | [[06/22/2019 11:32:05 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.79800000_700.pth... 100 | [[06/22/2019 11:32:05 AM]] New low 101 | 102 | [[06/22/2019 11:32:25 AM]] Step 726: train 0.761491 lr: 1.370e-03 103 | [[06/22/2019 11:32:50 AM]] Step 759: train 0.715271 lr: 1.093e-03 104 | [[06/22/2019 11:33:15 AM]] Step 792: train 0.683347 lr: 8.388e-04 105 | 100% 8/8 [00:03<00:00, 2.81it/s] 106 | [[06/22/2019 11:33:24 AM]] Criterion loss: 0.546431 107 | [[06/22/2019 11:33:24 AM]] accuracy: 84.00% 108 | [[06/22/2019 11:33:24 AM]] top_3_accuracy: 96.00% 109 | [[06/22/2019 11:33:24 AM]] Snapshot metric -0.84000000 110 | [[06/22/2019 11:33:24 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84000000_800.pth... 111 | [[06/22/2019 11:33:24 AM]] New low 112 | 113 | [[06/22/2019 11:33:27 AM]] ====================Epoch 5==================== 114 | [[06/22/2019 11:33:44 AM]] Step 825: train 0.663847 lr: 6.124e-04 115 | [[06/22/2019 11:34:09 AM]] Step 858: train 0.629520 lr: 4.173e-04 116 | [[06/22/2019 11:34:34 AM]] Step 891: train 0.604525 lr: 2.567e-04 117 | 100% 8/8 [00:03<00:00, 2.71it/s] 118 | [[06/22/2019 11:34:44 AM]] Criterion loss: 0.488755 119 | [[06/22/2019 11:34:44 AM]] accuracy: 85.60% 120 | [[06/22/2019 11:34:44 AM]] top_3_accuracy: 96.40% 121 | [[06/22/2019 11:34:44 AM]] Snapshot metric -0.85600000 122 | [[06/22/2019 11:34:44 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85600000_900.pth... 123 | [[06/22/2019 11:34:44 AM]] New low 124 | 125 | [[06/22/2019 11:35:02 AM]] Step 924: train 0.575820 lr: 1.335e-04 126 | [[06/22/2019 11:35:27 AM]] Step 957: train 0.563364 lr: 4.948e-05 127 | [[06/22/2019 11:35:52 AM]] Step 990: train 0.548493 lr: 6.181e-06 128 | 100% 8/8 [00:03<00:00, 2.80it/s] 129 | [[06/22/2019 11:36:03 AM]] Criterion loss: 0.468685 130 | [[06/22/2019 11:36:03 AM]] accuracy: 85.20% 131 | [[06/22/2019 11:36:03 AM]] top_3_accuracy: 96.60% 132 | [[06/22/2019 11:36:03 AM]] Snapshot metric -0.85200000 133 | [[06/22/2019 11:36:03 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85200000_1000.pth... 134 | CPU times: user 3.45 s, sys: 413 ms, total: 3.86 s 135 | Wall time: 13min 18s -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/logs/colab_o1_bs64_e5_2.txt: -------------------------------------------------------------------------------- 1 | 5,610,928 | 19,899,968 | 2,260,047 2 | 12,894 items in train, 500 in valid 3 | Selected optimization level O1: Insert automatic casts around Pytorch functions and Tensor methods. 4 | 5 | Defaults for this optimization level are: 6 | enabled : True 7 | opt_level : O1 8 | cast_model_type : None 9 | patch_torch_functions : True 10 | keep_batchnorm_fp32 : None 11 | master_weights : None 12 | loss_scale : dynamic 13 | Processing user overrides (additional kwargs that are not None)... 14 | After processing overrides, optimization options are: 15 | enabled : True 16 | opt_level : O1 17 | cast_model_type : None 18 | patch_torch_functions : True 19 | keep_batchnorm_fp32 : None 20 | master_weights : None 21 | loss_scale : dynamic 22 | [[06/23/2019 03:02:37 AM]] SEED: 9293 23 | [[06/23/2019 03:02:37 AM]] # of parameters: 27,770,943 24 | [[06/23/2019 03:02:37 AM]] # of trainable parameters: 27,770,943 25 | [[06/23/2019 03:02:37 AM]] Optimizer Adam ( 26 | Parameter Group 0 27 | amsgrad: False 28 | betas: (0.9, 0.999) 29 | eps: 1e-08 30 | initial_lr: 0.005 31 | lr: 5e-05 32 | weight_decay: 0 33 | 34 | Parameter Group 1 35 | amsgrad: False 36 | betas: (0.9, 0.999) 37 | eps: 1e-08 38 | initial_lr: 0.005 39 | lr: 5e-05 40 | weight_decay: 0 41 | ) 42 | [[06/23/2019 03:02:37 AM]] Batches per epoch: 201 43 | [[06/23/2019 03:02:37 AM]] ====================Epoch 1==================== 44 | [[06/23/2019 03:02:55 AM]] Step 33: train 3.570696 lr: 8.134e-04 45 | [[06/23/2019 03:03:13 AM]] Step 66: train 1.898845 lr: 1.626e-03 46 | Gradient overflow. Skipping step, loss scaler 0 reducing loss scale to 32768.0 47 | [[06/23/2019 03:03:30 AM]] Step 99: train 1.691594 lr: 2.439e-03 48 | 100% 8/8 [00:03<00:00, 2.31it/s] 49 | [[06/23/2019 03:03:34 AM]] Criterion loss: 1.909745 50 | [[06/23/2019 03:03:34 AM]] accuracy: 36.00% 51 | [[06/23/2019 03:03:34 AM]] top_3_accuracy: 70.80% 52 | [[06/23/2019 03:03:34 AM]] Snapshot metric -0.36000000 53 | [[06/23/2019 03:03:34 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.36000000_100.pth... 54 | [[06/23/2019 03:03:35 AM]] New low 55 | 56 | [[06/23/2019 03:03:51 AM]] Step 132: train 1.606339 lr: 3.251e-03 57 | [[06/23/2019 03:04:09 AM]] Step 165: train 1.566232 lr: 4.064e-03 58 | [[06/23/2019 03:04:26 AM]] Step 198: train 1.537102 lr: 4.877e-03 59 | 100% 8/8 [00:03<00:00, 2.47it/s] 60 | [[06/23/2019 03:04:30 AM]] Criterion loss: 1.674237 61 | [[06/23/2019 03:04:30 AM]] accuracy: 47.60% 62 | [[06/23/2019 03:04:30 AM]] top_3_accuracy: 78.80% 63 | [[06/23/2019 03:04:30 AM]] Snapshot metric -0.47600000 64 | [[06/23/2019 03:04:30 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.47600000_200.pth... 65 | [[06/23/2019 03:04:30 AM]] New low 66 | 67 | [[06/23/2019 03:04:31 AM]] ====================Epoch 2==================== 68 | [[06/23/2019 03:04:48 AM]] Step 231: train 1.502071 lr: 4.986e-03 69 | [[06/23/2019 03:05:05 AM]] Step 264: train 1.334250 lr: 4.932e-03 70 | [[06/23/2019 03:05:23 AM]] Step 297: train 1.301363 lr: 4.837e-03 71 | 100% 8/8 [00:03<00:00, 2.57it/s] 72 | [[06/23/2019 03:05:28 AM]] Criterion loss: 1.491104 73 | [[06/23/2019 03:05:28 AM]] accuracy: 54.40% 74 | [[06/23/2019 03:05:28 AM]] top_3_accuracy: 85.40% 75 | [[06/23/2019 03:05:28 AM]] Snapshot metric -0.54400000 76 | [[06/23/2019 03:05:28 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.54400000_300.pth... 77 | [[06/23/2019 03:05:29 AM]] New low 78 | 79 | [[06/23/2019 03:05:44 AM]] Step 330: train 1.177583 lr: 4.703e-03 80 | [[06/23/2019 03:06:02 AM]] Step 363: train 1.276057 lr: 4.533e-03 81 | [[06/23/2019 03:06:19 AM]] Step 396: train 1.165876 lr: 4.329e-03 82 | 100% 8/8 [00:03<00:00, 2.61it/s] 83 | [[06/23/2019 03:06:24 AM]] Criterion loss: 1.189894 84 | [[06/23/2019 03:06:24 AM]] accuracy: 63.80% 85 | [[06/23/2019 03:06:24 AM]] top_3_accuracy: 87.80% 86 | [[06/23/2019 03:06:24 AM]] Snapshot metric -0.63800000 87 | [[06/23/2019 03:06:24 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.63800000_400.pth... 88 | [[06/23/2019 03:06:25 AM]] New low 89 | 90 | [[06/23/2019 03:06:26 AM]] ====================Epoch 3==================== 91 | [[06/23/2019 03:06:41 AM]] Step 429: train 1.088577 lr: 4.094e-03 92 | [[06/23/2019 03:06:59 AM]] Step 462: train 1.025925 lr: 3.834e-03 93 | [[06/23/2019 03:07:16 AM]] Step 495: train 1.032965 lr: 3.551e-03 94 | 100% 8/8 [00:03<00:00, 2.09it/s] 95 | [[06/23/2019 03:07:23 AM]] Criterion loss: 1.222470 96 | [[06/23/2019 03:07:23 AM]] accuracy: 61.40% 97 | [[06/23/2019 03:07:23 AM]] top_3_accuracy: 86.40% 98 | [[06/23/2019 03:07:23 AM]] Snapshot metric -0.61400000 99 | [[06/23/2019 03:07:23 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.61400000_500.pth... 100 | [[06/23/2019 03:07:38 AM]] Step 528: train 0.993605 lr: 3.250e-03 101 | [[06/23/2019 03:07:55 AM]] Step 561: train 0.962152 lr: 2.937e-03 102 | [[06/23/2019 03:08:13 AM]] Step 594: train 0.881560 lr: 2.617e-03 103 | 100% 8/8 [00:02<00:00, 3.14it/s] 104 | [[06/23/2019 03:08:19 AM]] Criterion loss: 0.784809 105 | [[06/23/2019 03:08:19 AM]] accuracy: 75.60% 106 | [[06/23/2019 03:08:19 AM]] top_3_accuracy: 92.20% 107 | [[06/23/2019 03:08:19 AM]] Snapshot metric -0.75600000 108 | [[06/23/2019 03:08:19 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.75600000_600.pth... 109 | [[06/23/2019 03:08:20 AM]] New low 110 | 111 | [[06/23/2019 03:08:21 AM]] ====================Epoch 4==================== 112 | [[06/23/2019 03:08:35 AM]] Step 627: train 0.827620 lr: 2.295e-03 113 | [[06/23/2019 03:08:53 AM]] Step 660: train 0.838944 lr: 1.976e-03 114 | [[06/23/2019 03:09:10 AM]] Step 693: train 0.801387 lr: 1.666e-03 115 | 100% 8/8 [00:03<00:00, 2.29it/s] 116 | [[06/23/2019 03:09:17 AM]] Criterion loss: 0.810328 117 | [[06/23/2019 03:09:17 AM]] accuracy: 72.20% 118 | [[06/23/2019 03:09:17 AM]] top_3_accuracy: 92.60% 119 | [[06/23/2019 03:09:18 AM]] Snapshot metric -0.72200000 120 | [[06/23/2019 03:09:18 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.72200000_700.pth... 121 | [[06/23/2019 03:09:31 AM]] Step 726: train 0.734263 lr: 1.370e-03 122 | [[06/23/2019 03:09:49 AM]] Step 759: train 0.728053 lr: 1.093e-03 123 | [[06/23/2019 03:10:07 AM]] Step 792: train 0.709520 lr: 8.388e-04 124 | 100% 8/8 [00:03<00:00, 2.16it/s] 125 | [[06/23/2019 03:10:15 AM]] Criterion loss: 0.518678 126 | [[06/23/2019 03:10:15 AM]] accuracy: 84.00% 127 | [[06/23/2019 03:10:15 AM]] top_3_accuracy: 96.00% 128 | [[06/23/2019 03:10:15 AM]] Snapshot metric -0.84000000 129 | [[06/23/2019 03:10:15 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84000000_800.pth... 130 | [[06/23/2019 03:10:15 AM]] New low 131 | 132 | [[06/23/2019 03:10:17 AM]] ====================Epoch 5==================== 133 | [[06/23/2019 03:10:29 AM]] Step 825: train 0.672310 lr: 6.124e-04 134 | [[06/23/2019 03:10:47 AM]] Step 858: train 0.607981 lr: 4.173e-04 135 | [[06/23/2019 03:11:05 AM]] Step 891: train 0.629262 lr: 2.567e-04 136 | 100% 8/8 [00:03<00:00, 2.90it/s] 137 | [[06/23/2019 03:11:13 AM]] Criterion loss: 0.509262 138 | [[06/23/2019 03:11:13 AM]] accuracy: 83.60% 139 | [[06/23/2019 03:11:13 AM]] top_3_accuracy: 96.80% 140 | [[06/23/2019 03:11:13 AM]] Snapshot metric -0.83600000 141 | [[06/23/2019 03:11:13 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83600000_900.pth... 142 | [[06/23/2019 03:11:26 AM]] Step 924: train 0.587658 lr: 1.335e-04 143 | [[06/23/2019 03:11:43 AM]] Step 957: train 0.581012 lr: 4.948e-05 144 | [[06/23/2019 03:12:01 AM]] Step 990: train 0.579771 lr: 6.181e-06 145 | 100% 8/8 [00:03<00:00, 2.93it/s] 146 | [[06/23/2019 03:12:10 AM]] Criterion loss: 0.479807 147 | [[06/23/2019 03:12:10 AM]] accuracy: 85.80% 148 | [[06/23/2019 03:12:10 AM]] top_3_accuracy: 96.80% 149 | [[06/23/2019 03:12:10 AM]] Snapshot metric -0.85800000 150 | [[06/23/2019 03:12:10 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85800000_1000.pth... 151 | [[06/23/2019 03:12:10 AM]] New low 152 | 153 | CPU times: user 2.93 s, sys: 345 ms, total: 3.28 s 154 | Wall time: 9min 47s -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/logs/colab_o2_bs64_e5.txt: -------------------------------------------------------------------------------- 1 | 5,610,928 | 19,899,968 | 2,260,047 2 | 12,894 items in train, 500 in valid 3 | Selected optimization level O2: FP16 training with FP32 batchnorm and FP32 master weights. 4 | 5 | Defaults for this optimization level are: 6 | enabled : True 7 | opt_level : O2 8 | cast_model_type : torch.float16 9 | patch_torch_functions : False 10 | keep_batchnorm_fp32 : True 11 | master_weights : True 12 | loss_scale : dynamic 13 | Processing user overrides (additional kwargs that are not None)... 14 | After processing overrides, optimization options are: 15 | enabled : True 16 | opt_level : O2 17 | cast_model_type : torch.float16 18 | patch_torch_functions : False 19 | keep_batchnorm_fp32 : True 20 | master_weights : True 21 | loss_scale : dynamic 22 | [[06/23/2019 03:24:49 AM]] SEED: 9293 23 | [[06/23/2019 03:24:49 AM]] # of parameters: 27,770,943 24 | [[06/23/2019 03:24:49 AM]] # of trainable parameters: 27,770,943 25 | [[06/23/2019 03:24:49 AM]] Optimizer Adam ( 26 | Parameter Group 0 27 | amsgrad: False 28 | betas: (0.9, 0.999) 29 | eps: 1e-08 30 | initial_lr: 0.005 31 | lr: 5e-05 32 | weight_decay: 0 33 | 34 | Parameter Group 1 35 | amsgrad: False 36 | betas: (0.9, 0.999) 37 | eps: 1e-08 38 | initial_lr: 0.005 39 | lr: 5e-05 40 | weight_decay: 0 41 | ) 42 | [[06/23/2019 03:24:49 AM]] Batches per epoch: 201 43 | [[06/23/2019 03:24:49 AM]] ====================Epoch 1==================== 44 | Gradient overflow. Skipping step, loss scaler 0 reducing loss scale to 32768.0 45 | [[06/23/2019 03:25:07 AM]] Step 33: train 3.563893 lr: 8.134e-04 46 | [[06/23/2019 03:25:24 AM]] Step 66: train 1.888989 lr: 1.626e-03 47 | [[06/23/2019 03:25:42 AM]] Step 99: train 1.706567 lr: 2.439e-03 48 | 100% 8/8 [00:03<00:00, 2.11it/s] 49 | [[06/23/2019 03:25:46 AM]] Criterion loss: 3.025426 50 | [[06/23/2019 03:25:46 AM]] accuracy: 32.20% 51 | [[06/23/2019 03:25:46 AM]] top_3_accuracy: 64.60% 52 | [[06/23/2019 03:25:46 AM]] Snapshot metric -0.32200000 53 | [[06/23/2019 03:25:46 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.32200000_100.pth... 54 | [[06/23/2019 03:25:46 AM]] New low 55 | 56 | [[06/23/2019 03:26:02 AM]] Step 132: train 1.597432 lr: 3.251e-03 57 | [[06/23/2019 03:26:19 AM]] Step 165: train 1.581918 lr: 4.064e-03 58 | [[06/23/2019 03:26:36 AM]] Step 198: train 1.577245 lr: 4.877e-03 59 | 100% 8/8 [00:02<00:00, 2.73it/s] 60 | [[06/23/2019 03:26:40 AM]] Criterion loss: 2.104764 61 | [[06/23/2019 03:26:40 AM]] accuracy: 39.40% 62 | [[06/23/2019 03:26:40 AM]] top_3_accuracy: 73.40% 63 | [[06/23/2019 03:26:40 AM]] Snapshot metric -0.39400000 64 | [[06/23/2019 03:26:40 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.39400000_200.pth... 65 | [[06/23/2019 03:26:40 AM]] New low 66 | 67 | [[06/23/2019 03:26:41 AM]] ====================Epoch 2==================== 68 | [[06/23/2019 03:26:57 AM]] Step 231: train 1.530420 lr: 4.986e-03 69 | [[06/23/2019 03:27:15 AM]] Step 264: train 1.378344 lr: 4.932e-03 70 | [[06/23/2019 03:27:32 AM]] Step 297: train 1.330413 lr: 4.837e-03 71 | 100% 8/8 [00:04<00:00, 1.97it/s] 72 | [[06/23/2019 03:27:37 AM]] Criterion loss: 1.163028 73 | [[06/23/2019 03:27:37 AM]] accuracy: 61.80% 74 | [[06/23/2019 03:27:37 AM]] top_3_accuracy: 85.00% 75 | [[06/23/2019 03:27:37 AM]] Snapshot metric -0.61800000 76 | [[06/23/2019 03:27:37 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.61800000_300.pth... 77 | [[06/23/2019 03:27:38 AM]] New low 78 | 79 | [[06/23/2019 03:27:53 AM]] Step 330: train 1.229342 lr: 4.703e-03 80 | [[06/23/2019 03:28:10 AM]] Step 363: train 1.240462 lr: 4.533e-03 81 | [[06/23/2019 03:28:27 AM]] Step 396: train 1.153345 lr: 4.329e-03 82 | 100% 8/8 [00:03<00:00, 2.66it/s] 83 | [[06/23/2019 03:28:32 AM]] Criterion loss: 0.839671 84 | [[06/23/2019 03:28:32 AM]] accuracy: 73.40% 85 | [[06/23/2019 03:28:32 AM]] top_3_accuracy: 92.80% 86 | [[06/23/2019 03:28:32 AM]] Snapshot metric -0.73400000 87 | [[06/23/2019 03:28:32 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.73400000_400.pth... 88 | [[06/23/2019 03:28:32 AM]] New low 89 | 90 | [[06/23/2019 03:28:33 AM]] ====================Epoch 3==================== 91 | [[06/23/2019 03:28:48 AM]] Step 429: train 1.091728 lr: 4.094e-03 92 | [[06/23/2019 03:29:06 AM]] Step 462: train 1.046135 lr: 3.834e-03 93 | [[06/23/2019 03:29:23 AM]] Step 495: train 1.024378 lr: 3.551e-03 94 | 100% 8/8 [00:03<00:00, 2.75it/s] 95 | [[06/23/2019 03:29:29 AM]] Criterion loss: 0.782142 96 | [[06/23/2019 03:29:29 AM]] accuracy: 74.40% 97 | [[06/23/2019 03:29:29 AM]] top_3_accuracy: 92.00% 98 | [[06/23/2019 03:29:29 AM]] Snapshot metric -0.74400000 99 | [[06/23/2019 03:29:29 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.74400000_500.pth... 100 | [[06/23/2019 03:29:44 AM]] Step 528: train 1.027445 lr: 3.250e-03 101 | [[06/23/2019 03:30:01 AM]] Step 561: train 0.966508 lr: 2.937e-03 102 | [[06/23/2019 03:30:18 AM]] Step 594: train 0.886668 lr: 2.617e-03 103 | 100% 8/8 [00:03<00:00, 2.16it/s] 104 | [[06/23/2019 03:30:24 AM]] Criterion loss: 0.823883 105 | [[06/23/2019 03:30:24 AM]] accuracy: 73.20% 106 | [[06/23/2019 03:30:24 AM]] top_3_accuracy: 91.20% 107 | [[06/23/2019 03:30:24 AM]] Snapshot metric -0.73200000 108 | [[06/23/2019 03:30:24 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.73200000_600.pth... 109 | [[06/23/2019 03:30:26 AM]] ====================Epoch 4==================== 110 | [[06/23/2019 03:30:40 AM]] Step 627: train 0.816750 lr: 2.295e-03 111 | [[06/23/2019 03:30:57 AM]] Step 660: train 0.861780 lr: 1.976e-03 112 | [[06/23/2019 03:31:15 AM]] Step 693: train 0.810774 lr: 1.666e-03 113 | 100% 8/8 [00:03<00:00, 2.19it/s] 114 | [[06/23/2019 03:31:22 AM]] Criterion loss: 0.706307 115 | [[06/23/2019 03:31:22 AM]] accuracy: 77.20% 116 | [[06/23/2019 03:31:22 AM]] top_3_accuracy: 93.00% 117 | [[06/23/2019 03:31:22 AM]] Snapshot metric -0.77200000 118 | [[06/23/2019 03:31:22 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.77200000_700.pth... 119 | [[06/23/2019 03:31:22 AM]] New low 120 | 121 | [[06/23/2019 03:31:35 AM]] Step 726: train 0.774412 lr: 1.370e-03 122 | [[06/23/2019 03:31:53 AM]] Step 759: train 0.753918 lr: 1.093e-03 123 | [[06/23/2019 03:32:10 AM]] Step 792: train 0.728576 lr: 8.388e-04 124 | 100% 8/8 [00:03<00:00, 2.04it/s] 125 | [[06/23/2019 03:32:18 AM]] Criterion loss: 0.544742 126 | [[06/23/2019 03:32:18 AM]] accuracy: 83.40% 127 | [[06/23/2019 03:32:18 AM]] top_3_accuracy: 95.80% 128 | [[06/23/2019 03:32:18 AM]] Snapshot metric -0.83400000 129 | [[06/23/2019 03:32:18 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83400000_800.pth... 130 | [[06/23/2019 03:32:18 AM]] New low 131 | 132 | [[06/23/2019 03:32:20 AM]] ====================Epoch 5==================== 133 | [[06/23/2019 03:32:32 AM]] Step 825: train 0.683776 lr: 6.124e-04 134 | [[06/23/2019 03:32:49 AM]] Step 858: train 0.616099 lr: 4.173e-04 135 | [[06/23/2019 03:33:07 AM]] Step 891: train 0.637941 lr: 2.567e-04 136 | 100% 8/8 [00:03<00:00, 2.76it/s] 137 | [[06/23/2019 03:33:15 AM]] Criterion loss: 0.504005 138 | [[06/23/2019 03:33:15 AM]] accuracy: 82.80% 139 | [[06/23/2019 03:33:15 AM]] top_3_accuracy: 96.20% 140 | [[06/23/2019 03:33:15 AM]] Snapshot metric -0.82800000 141 | [[06/23/2019 03:33:15 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.82800000_900.pth... 142 | [[06/23/2019 03:33:28 AM]] Step 924: train 0.592936 lr: 1.335e-04 143 | [[06/23/2019 03:33:45 AM]] Step 957: train 0.582012 lr: 4.948e-05 144 | [[06/23/2019 03:34:02 AM]] Step 990: train 0.588021 lr: 6.181e-06 145 | 100% 8/8 [00:03<00:00, 2.70it/s] 146 | [[06/23/2019 03:34:11 AM]] Criterion loss: 0.487183 147 | [[06/23/2019 03:34:11 AM]] accuracy: 85.40% 148 | [[06/23/2019 03:34:11 AM]] top_3_accuracy: 96.20% 149 | [[06/23/2019 03:34:11 AM]] Snapshot metric -0.85400000 150 | [[06/23/2019 03:34:11 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85400000_1000.pth... 151 | [[06/23/2019 03:34:11 AM]] New low 152 | 153 | CPU times: user 2.89 s, sys: 358 ms, total: 3.25 s 154 | Wall time: 9min 35s -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/logs/colab_o1_bs64_e5.txt: -------------------------------------------------------------------------------- 1 | Initing linear 2 | 5,610,928 | 19,899,968 | 2,260,047 3 | 12,894 items in train, 500 in valid 4 | Selected optimization level O1: Insert automatic casts around Pytorch functions and Tensor methods. 5 | 6 | Defaults for this optimization level are: 7 | enabled : True 8 | opt_level : O1 9 | cast_model_type : None 10 | patch_torch_functions : True 11 | keep_batchnorm_fp32 : None 12 | master_weights : None 13 | loss_scale : dynamic 14 | Processing user overrides (additional kwargs that are not None)... 15 | After processing overrides, optimization options are: 16 | enabled : True 17 | opt_level : O1 18 | cast_model_type : None 19 | patch_torch_functions : True 20 | keep_batchnorm_fp32 : None 21 | master_weights : None 22 | loss_scale : dynamic 23 | [[06/22/2019 11:07:14 AM]] SEED: 9293 24 | [[06/22/2019 11:07:14 AM]] # of parameters: 27,770,943 25 | [[06/22/2019 11:07:14 AM]] # of trainable parameters: 27,770,943 26 | [[06/22/2019 11:07:14 AM]] Optimizer Adam ( 27 | Parameter Group 0 28 | amsgrad: False 29 | betas: (0.9, 0.999) 30 | eps: 1e-08 31 | initial_lr: 0.005 32 | lr: 5e-05 33 | weight_decay: 0 34 | 35 | Parameter Group 1 36 | amsgrad: False 37 | betas: (0.9, 0.999) 38 | eps: 1e-08 39 | initial_lr: 0.005 40 | lr: 5e-05 41 | weight_decay: 0 42 | ) 43 | [[06/22/2019 11:07:14 AM]] Batches per epoch: 201 44 | [[06/22/2019 11:07:14 AM]] ====================Epoch 1==================== 45 | Gradient overflow. Skipping step, loss scaler 0 reducing loss scale to 32768.0 46 | [[06/22/2019 11:07:33 AM]] Step 33: train 3.761103 lr: 8.134e-04 47 | [[06/22/2019 11:07:50 AM]] Step 66: train 1.999741 lr: 1.626e-03 48 | [[06/22/2019 11:08:08 AM]] Step 99: train 1.745328 lr: 2.439e-03 49 | 100% 8/8 [00:03<00:00, 2.54it/s] 50 | [[06/22/2019 11:08:12 AM]] Criterion loss: 1.965406 51 | [[06/22/2019 11:08:12 AM]] accuracy: 40.80% 52 | [[06/22/2019 11:08:12 AM]] top_3_accuracy: 69.40% 53 | [[06/22/2019 11:08:12 AM]] Snapshot metric -0.40800000 54 | [[06/22/2019 11:08:12 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.40800000_100.pth... 55 | [[06/22/2019 11:08:12 AM]] New low 56 | 57 | [[06/22/2019 11:08:28 AM]] Step 132: train 1.582526 lr: 3.251e-03 58 | [[06/22/2019 11:08:46 AM]] Step 165: train 1.515693 lr: 4.064e-03 59 | [[06/22/2019 11:09:04 AM]] Step 198: train 1.504430 lr: 4.877e-03 60 | 100% 8/8 [00:03<00:00, 2.60it/s] 61 | [[06/22/2019 11:09:08 AM]] Criterion loss: 1.618016 62 | [[06/22/2019 11:09:08 AM]] accuracy: 48.60% 63 | [[06/22/2019 11:09:08 AM]] top_3_accuracy: 79.00% 64 | [[06/22/2019 11:09:08 AM]] Snapshot metric -0.48600000 65 | [[06/22/2019 11:09:08 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.48600000_200.pth... 66 | [[06/22/2019 11:09:08 AM]] New low 67 | 68 | [[06/22/2019 11:09:08 AM]] ====================Epoch 2==================== 69 | [[06/22/2019 11:09:25 AM]] Step 231: train 1.415068 lr: 4.986e-03 70 | [[06/22/2019 11:09:43 AM]] Step 264: train 1.320727 lr: 4.932e-03 71 | [[06/22/2019 11:10:01 AM]] Step 297: train 1.312385 lr: 4.837e-03 72 | 100% 8/8 [00:03<00:00, 2.21it/s] 73 | [[06/22/2019 11:10:06 AM]] Criterion loss: 1.416207 74 | [[06/22/2019 11:10:06 AM]] accuracy: 54.80% 75 | [[06/22/2019 11:10:06 AM]] top_3_accuracy: 82.80% 76 | [[06/22/2019 11:10:06 AM]] Snapshot metric -0.54800000 77 | [[06/22/2019 11:10:06 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.54800000_300.pth... 78 | [[06/22/2019 11:10:07 AM]] New low 79 | 80 | [[06/22/2019 11:10:22 AM]] Step 330: train 1.171946 lr: 4.703e-03 81 | [[06/22/2019 11:10:40 AM]] Step 363: train 1.107085 lr: 4.533e-03 82 | [[06/22/2019 11:10:58 AM]] Step 396: train 1.065447 lr: 4.329e-03 83 | 100% 8/8 [00:03<00:00, 2.49it/s] 84 | [[06/22/2019 11:11:03 AM]] Criterion loss: 0.983896 85 | [[06/22/2019 11:11:03 AM]] accuracy: 68.20% 86 | [[06/22/2019 11:11:03 AM]] top_3_accuracy: 89.60% 87 | [[06/22/2019 11:11:03 AM]] Snapshot metric -0.68200000 88 | [[06/22/2019 11:11:03 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.68200000_400.pth... 89 | [[06/22/2019 11:11:03 AM]] New low 90 | 91 | [[06/22/2019 11:11:04 AM]] ====================Epoch 3==================== 92 | [[06/22/2019 11:11:20 AM]] Step 429: train 1.057125 lr: 4.094e-03 93 | [[06/22/2019 11:11:39 AM]] Step 462: train 1.027017 lr: 3.834e-03 94 | [[06/22/2019 11:11:57 AM]] Step 495: train 1.018028 lr: 3.551e-03 95 | 100% 8/8 [00:03<00:00, 2.77it/s] 96 | [[06/22/2019 11:12:03 AM]] Criterion loss: 0.961748 97 | [[06/22/2019 11:12:03 AM]] accuracy: 69.00% 98 | [[06/22/2019 11:12:03 AM]] top_3_accuracy: 91.00% 99 | [[06/22/2019 11:12:03 AM]] Snapshot metric -0.69000000 100 | [[06/22/2019 11:12:03 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.69000000_500.pth... 101 | [[06/22/2019 11:12:18 AM]] Step 528: train 0.942188 lr: 3.250e-03 102 | [[06/22/2019 11:12:36 AM]] Step 561: train 0.954713 lr: 2.937e-03 103 | [[06/22/2019 11:12:54 AM]] Step 594: train 0.911246 lr: 2.617e-03 104 | 100% 8/8 [00:03<00:00, 2.27it/s] 105 | [[06/22/2019 11:13:00 AM]] Criterion loss: 0.943144 106 | [[06/22/2019 11:13:00 AM]] accuracy: 70.00% 107 | [[06/22/2019 11:13:00 AM]] top_3_accuracy: 90.00% 108 | [[06/22/2019 11:13:00 AM]] Snapshot metric -0.70000000 109 | [[06/22/2019 11:13:00 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.70000000_600.pth... 110 | [[06/22/2019 11:13:00 AM]] New low 111 | 112 | [[06/22/2019 11:13:02 AM]] ====================Epoch 4==================== 113 | [[06/22/2019 11:13:16 AM]] Step 627: train 0.844525 lr: 2.295e-03 114 | [[06/22/2019 11:13:34 AM]] Step 660: train 0.806685 lr: 1.976e-03 115 | [[06/22/2019 11:13:52 AM]] Step 693: train 0.746364 lr: 1.666e-03 116 | 100% 8/8 [00:04<00:00, 1.98it/s] 117 | [[06/22/2019 11:14:00 AM]] Criterion loss: 0.556791 118 | [[06/22/2019 11:14:00 AM]] accuracy: 83.00% 119 | [[06/22/2019 11:14:00 AM]] top_3_accuracy: 95.60% 120 | [[06/22/2019 11:14:00 AM]] Snapshot metric -0.83000000 121 | [[06/22/2019 11:14:00 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83000000_700.pth... 122 | [[06/22/2019 11:14:00 AM]] New low 123 | 124 | [[06/22/2019 11:14:14 AM]] Step 726: train 0.754921 lr: 1.370e-03 125 | [[06/22/2019 11:14:31 AM]] Step 759: train 0.711394 lr: 1.093e-03 126 | [[06/22/2019 11:14:49 AM]] Step 792: train 0.662325 lr: 8.388e-04 127 | 100% 8/8 [00:03<00:00, 2.09it/s] 128 | [[06/22/2019 11:14:57 AM]] Criterion loss: 0.540010 129 | [[06/22/2019 11:14:57 AM]] accuracy: 83.20% 130 | [[06/22/2019 11:14:57 AM]] top_3_accuracy: 95.20% 131 | [[06/22/2019 11:14:57 AM]] Snapshot metric -0.83200000 132 | [[06/22/2019 11:14:57 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83200000_800.pth... 133 | [[06/22/2019 11:14:59 AM]] ====================Epoch 5==================== 134 | [[06/22/2019 11:15:12 AM]] Step 825: train 0.657531 lr: 6.124e-04 135 | [[06/22/2019 11:15:31 AM]] Step 858: train 0.639147 lr: 4.173e-04 136 | [[06/22/2019 11:15:48 AM]] Step 891: train 0.591341 lr: 2.567e-04 137 | 100% 8/8 [00:03<00:00, 2.73it/s] 138 | [[06/22/2019 11:15:57 AM]] Criterion loss: 0.488692 139 | [[06/22/2019 11:15:57 AM]] accuracy: 84.20% 140 | [[06/22/2019 11:15:57 AM]] top_3_accuracy: 96.00% 141 | [[06/22/2019 11:15:57 AM]] Snapshot metric -0.84200000 142 | [[06/22/2019 11:15:57 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84200000_900.pth... 143 | [[06/22/2019 11:15:57 AM]] New low 144 | 145 | [[06/22/2019 11:16:09 AM]] Step 924: train 0.566965 lr: 1.335e-04 146 | [[06/22/2019 11:16:27 AM]] Step 957: train 0.551050 lr: 4.948e-05 147 | [[06/22/2019 11:16:45 AM]] Step 990: train 0.532422 lr: 6.181e-06 148 | 100% 8/8 [00:03<00:00, 2.18it/s] 149 | [[06/22/2019 11:16:54 AM]] Criterion loss: 0.471129 150 | [[06/22/2019 11:16:54 AM]] accuracy: 84.00% 151 | [[06/22/2019 11:16:54 AM]] top_3_accuracy: 96.60% 152 | [[06/22/2019 11:16:54 AM]] Snapshot metric -0.84000000 153 | [[06/22/2019 11:16:54 AM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84000000_1000.pth... 154 | CPU times: user 2.62 s, sys: 334 ms, total: 2.95 s 155 | Wall time: 9min 59s -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/logs/bs32_8460.txt: -------------------------------------------------------------------------------- 1 | [[06/22/2019 02:17:52 PM]] SEED: 9293 2 | [[06/22/2019 02:17:52 PM]] # of parameters: 27,770,943 3 | [[06/22/2019 02:17:52 PM]] # of trainable parameters: 27,770,943 4 | [[06/22/2019 02:17:52 PM]] Optimizer Adam ( 5 | Parameter Group 0 6 | amsgrad: False 7 | betas: (0.9, 0.999) 8 | eps: 1e-08 9 | initial_lr: 0.002 10 | lr: 2e-05 11 | weight_decay: 0 12 | 13 | Parameter Group 1 14 | amsgrad: False 15 | betas: (0.9, 0.999) 16 | eps: 1e-08 17 | initial_lr: 0.002 18 | lr: 2e-05 19 | weight_decay: 0 20 | ) 21 | [[06/22/2019 02:17:52 PM]] Batches per epoch: 402 22 | [[06/22/2019 02:17:52 PM]] ====================Epoch 1==================== 23 | [[06/22/2019 02:18:10 PM]] Step 40: train 3.956017 lr: 2.072e-04 24 | [[06/22/2019 02:18:27 PM]] Step 80: train 2.060594 lr: 4.042e-04 25 | [[06/22/2019 02:18:44 PM]] Step 120: train 1.976278 lr: 6.012e-04 26 | [[06/22/2019 02:19:02 PM]] Step 160: train 1.772698 lr: 7.982e-04 27 | [[06/22/2019 02:19:19 PM]] Step 200: train 1.656106 lr: 9.952e-04 28 | [[06/22/2019 02:19:22 PM]] Criterion loss: 1.560624 29 | [[06/22/2019 02:19:22 PM]] accuracy: 49.80% 30 | [[06/22/2019 02:19:22 PM]] top_3_accuracy: 75.60% 31 | [[06/22/2019 02:19:22 PM]] Snapshot metric -0.49800000 32 | [[06/22/2019 02:19:22 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.49800000_201.pth... 33 | [[06/22/2019 02:19:22 PM]] New low 34 | 35 | [[06/22/2019 02:19:39 PM]] Step 240: train 1.619188 lr: 1.192e-03 36 | [[06/22/2019 02:19:56 PM]] Step 280: train 1.540767 lr: 1.389e-03 37 | [[06/22/2019 02:20:13 PM]] Step 320: train 1.467717 lr: 1.586e-03 38 | [[06/22/2019 02:20:31 PM]] Step 360: train 1.525094 lr: 1.783e-03 39 | [[06/22/2019 02:20:48 PM]] Step 400: train 1.454986 lr: 1.980e-03 40 | [[06/22/2019 02:20:51 PM]] Criterion loss: 1.493544 41 | [[06/22/2019 02:20:51 PM]] accuracy: 53.80% 42 | [[06/22/2019 02:20:51 PM]] top_3_accuracy: 78.00% 43 | [[06/22/2019 02:20:51 PM]] Snapshot metric -0.53800000 44 | [[06/22/2019 02:20:51 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.53800000_402.pth... 45 | [[06/22/2019 02:20:52 PM]] New low 46 | 47 | [[06/22/2019 02:20:52 PM]] ====================Epoch 2==================== 48 | [[06/22/2019 02:21:08 PM]] Step 440: train 1.488533 lr: 1.998e-03 49 | [[06/22/2019 02:21:26 PM]] Step 480: train 1.380048 lr: 1.990e-03 50 | [[06/22/2019 02:21:43 PM]] Step 520: train 1.430035 lr: 1.975e-03 51 | [[06/22/2019 02:22:00 PM]] Step 560: train 1.249746 lr: 1.955e-03 52 | [[06/22/2019 02:22:17 PM]] Step 600: train 1.240363 lr: 1.929e-03 53 | [[06/22/2019 02:22:21 PM]] Criterion loss: 0.982757 54 | [[06/22/2019 02:22:21 PM]] accuracy: 66.00% 55 | [[06/22/2019 02:22:21 PM]] top_3_accuracy: 92.20% 56 | [[06/22/2019 02:22:21 PM]] Snapshot metric -0.66000000 57 | [[06/22/2019 02:22:21 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.66000000_603.pth... 58 | [[06/22/2019 02:22:21 PM]] New low 59 | 60 | [[06/22/2019 02:22:37 PM]] Step 640: train 1.268782 lr: 1.897e-03 61 | [[06/22/2019 02:22:55 PM]] Step 680: train 1.264315 lr: 1.860e-03 62 | [[06/22/2019 02:23:12 PM]] Step 720: train 1.153008 lr: 1.818e-03 63 | [[06/22/2019 02:23:29 PM]] Step 760: train 1.135256 lr: 1.770e-03 64 | [[06/22/2019 02:23:46 PM]] Step 800: train 1.113838 lr: 1.718e-03 65 | [[06/22/2019 02:23:50 PM]] Criterion loss: 0.893659 66 | [[06/22/2019 02:23:50 PM]] accuracy: 70.20% 67 | [[06/22/2019 02:23:50 PM]] top_3_accuracy: 93.20% 68 | [[06/22/2019 02:23:50 PM]] Snapshot metric -0.70200000 69 | [[06/22/2019 02:23:50 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.70200000_804.pth... 70 | [[06/22/2019 02:23:51 PM]] New low 71 | 72 | [[06/22/2019 02:23:51 PM]] ====================Epoch 3==================== 73 | [[06/22/2019 02:24:07 PM]] Step 840: train 1.078236 lr: 1.662e-03 74 | [[06/22/2019 02:24:24 PM]] Step 880: train 1.151317 lr: 1.601e-03 75 | [[06/22/2019 02:24:43 PM]] Step 920: train 1.030114 lr: 1.537e-03 76 | [[06/22/2019 02:25:02 PM]] Step 960: train 1.032722 lr: 1.469e-03 77 | [[06/22/2019 02:25:20 PM]] Step 1000: train 1.093392 lr: 1.399e-03 78 | [[06/22/2019 02:25:26 PM]] Criterion loss: 0.766180 79 | [[06/22/2019 02:25:26 PM]] accuracy: 76.20% 80 | [[06/22/2019 02:25:26 PM]] top_3_accuracy: 94.60% 81 | [[06/22/2019 02:25:26 PM]] Snapshot metric -0.76200000 82 | [[06/22/2019 02:25:26 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.76200000_1005.pth... 83 | [[06/22/2019 02:25:26 PM]] New low 84 | 85 | [[06/22/2019 02:25:42 PM]] Step 1040: train 1.119177 lr: 1.326e-03 86 | [[06/22/2019 02:26:01 PM]] Step 1080: train 0.856363 lr: 1.251e-03 87 | [[06/22/2019 02:26:20 PM]] Step 1120: train 0.873459 lr: 1.175e-03 88 | [[06/22/2019 02:26:39 PM]] Step 1160: train 0.928308 lr: 1.098e-03 89 | [[06/22/2019 02:26:57 PM]] Step 1200: train 0.913717 lr: 1.020e-03 90 | [[06/22/2019 02:27:02 PM]] Criterion loss: 0.745631 91 | [[06/22/2019 02:27:02 PM]] accuracy: 76.80% 92 | [[06/22/2019 02:27:02 PM]] top_3_accuracy: 93.40% 93 | [[06/22/2019 02:27:02 PM]] Snapshot metric -0.76800000 94 | [[06/22/2019 02:27:02 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.76800000_1206.pth... 95 | [[06/22/2019 02:27:02 PM]] New low 96 | 97 | [[06/22/2019 02:27:02 PM]] ====================Epoch 4==================== 98 | [[06/22/2019 02:27:18 PM]] Step 1240: train 0.906149 lr: 9.414e-04 99 | [[06/22/2019 02:27:36 PM]] Step 1280: train 0.858262 lr: 8.637e-04 100 | [[06/22/2019 02:27:54 PM]] Step 1320: train 0.795978 lr: 7.867e-04 101 | [[06/22/2019 02:28:12 PM]] Step 1360: train 0.783649 lr: 7.111e-04 102 | [[06/22/2019 02:28:30 PM]] Step 1400: train 0.849409 lr: 6.373e-04 103 | [[06/22/2019 02:28:35 PM]] Criterion loss: 0.626640 104 | [[06/22/2019 02:28:35 PM]] accuracy: 80.60% 105 | [[06/22/2019 02:28:35 PM]] top_3_accuracy: 95.40% 106 | [[06/22/2019 02:28:35 PM]] Snapshot metric -0.80600000 107 | [[06/22/2019 02:28:35 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.80600000_1407.pth... 108 | [[06/22/2019 02:28:36 PM]] New low 109 | 110 | [[06/22/2019 02:28:50 PM]] Step 1440: train 0.748868 lr: 5.656e-04 111 | [[06/22/2019 02:29:08 PM]] Step 1480: train 0.772972 lr: 4.966e-04 112 | [[06/22/2019 02:29:26 PM]] Step 1520: train 0.767017 lr: 4.307e-04 113 | [[06/22/2019 02:29:43 PM]] Step 1560: train 0.733338 lr: 3.683e-04 114 | [[06/22/2019 02:30:01 PM]] Step 1600: train 0.687413 lr: 3.097e-04 115 | [[06/22/2019 02:30:07 PM]] Criterion loss: 0.540119 116 | [[06/22/2019 02:30:07 PM]] accuracy: 84.60% 117 | [[06/22/2019 02:30:07 PM]] top_3_accuracy: 94.60% 118 | [[06/22/2019 02:30:07 PM]] Snapshot metric -0.84600000 119 | [[06/22/2019 02:30:07 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84600000_1608.pth... 120 | [[06/22/2019 02:30:07 PM]] New low 121 | 122 | [[06/22/2019 02:30:07 PM]] ====================Epoch 5==================== 123 | [[06/22/2019 02:30:22 PM]] Step 1640: train 0.686983 lr: 2.553e-04 124 | [[06/22/2019 02:30:39 PM]] Step 1680: train 0.700227 lr: 2.055e-04 125 | [[06/22/2019 02:30:56 PM]] Step 1720: train 0.620291 lr: 1.605e-04 126 | [[06/22/2019 02:31:14 PM]] Step 1760: train 0.587356 lr: 1.206e-04 127 | [[06/22/2019 02:31:31 PM]] Step 1800: train 0.602121 lr: 8.614e-05 128 | [[06/22/2019 02:31:38 PM]] Criterion loss: 0.498786 129 | [[06/22/2019 02:31:38 PM]] accuracy: 84.60% 130 | [[06/22/2019 02:31:38 PM]] top_3_accuracy: 95.80% 131 | [[06/22/2019 02:31:38 PM]] Snapshot metric -0.84600000 132 | [[06/22/2019 02:31:38 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84600000_1809.pth... 133 | [[06/22/2019 02:31:52 PM]] Step 1840: train 0.621550 lr: 5.723e-05 134 | [[06/22/2019 02:32:10 PM]] Step 1880: train 0.559325 lr: 3.407e-05 135 | [[06/22/2019 02:32:28 PM]] Step 1920: train 0.624009 lr: 1.682e-05 136 | [[06/22/2019 02:32:45 PM]] Step 1960: train 0.610800 lr: 5.560e-06 137 | [[06/22/2019 02:33:03 PM]] Step 2000: train 0.585742 lr: 3.740e-07 138 | [[06/22/2019 02:33:10 PM]] Criterion loss: 0.506756 139 | [[06/22/2019 02:33:10 PM]] accuracy: 83.40% 140 | [[06/22/2019 02:33:10 PM]] top_3_accuracy: 95.80% 141 | [[06/22/2019 02:33:10 PM]] Snapshot metric -0.83400000 142 | [[06/22/2019 02:33:10 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83400000_2010.pth... 143 | -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/logs/bs64_mixup02_8600.txt: -------------------------------------------------------------------------------- 1 | [[06/22/2019 02:59:53 PM]] SEED: 231 2 | [[06/22/2019 02:59:53 PM]] # of parameters: 27,770,943 3 | [[06/22/2019 02:59:53 PM]] # of trainable parameters: 27,770,943 4 | [[06/22/2019 02:59:53 PM]] Optimizer Adam ( 5 | Parameter Group 0 6 | amsgrad: False 7 | betas: (0.9, 0.999) 8 | eps: 1e-08 9 | initial_lr: 0.005 10 | lr: 5e-05 11 | weight_decay: 0 12 | 13 | Parameter Group 1 14 | amsgrad: False 15 | betas: (0.9, 0.999) 16 | eps: 1e-08 17 | initial_lr: 0.005 18 | lr: 5e-05 19 | weight_decay: 0 20 | ) 21 | [[06/22/2019 02:59:53 PM]] Batches per epoch: 201 22 | [[06/22/2019 02:59:53 PM]] ====================Epoch 1==================== 23 | [[06/22/2019 03:00:10 PM]] Step 20: train 3.708626 lr: 4.933e-04 24 | [[06/22/2019 03:00:27 PM]] Step 40: train 2.107206 lr: 9.858e-04 25 | [[06/22/2019 03:00:43 PM]] Step 60: train 2.039582 lr: 1.478e-03 26 | [[06/22/2019 03:01:00 PM]] Step 80: train 1.885562 lr: 1.971e-03 27 | [[06/22/2019 03:01:16 PM]] Step 100: train 1.781235 lr: 2.463e-03 28 | [[06/22/2019 03:01:19 PM]] Criterion loss: 3.208526 29 | [[06/22/2019 03:01:19 PM]] accuracy: 31.00% 30 | [[06/22/2019 03:01:19 PM]] top_3_accuracy: 51.40% 31 | [[06/22/2019 03:01:19 PM]] Snapshot metric -0.31000000 32 | [[06/22/2019 03:01:19 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.31000000_100.pth... 33 | [[06/22/2019 03:01:19 PM]] New low 34 | 35 | [[06/22/2019 03:01:36 PM]] Step 120: train 1.711841 lr: 2.956e-03 36 | [[06/22/2019 03:01:52 PM]] Step 140: train 1.779275 lr: 3.449e-03 37 | [[06/22/2019 03:02:09 PM]] Step 160: train 1.782424 lr: 3.941e-03 38 | [[06/22/2019 03:02:25 PM]] Step 180: train 1.643625 lr: 4.434e-03 39 | [[06/22/2019 03:02:42 PM]] Step 200: train 1.690528 lr: 4.926e-03 40 | [[06/22/2019 03:02:45 PM]] Criterion loss: 1.394506 41 | [[06/22/2019 03:02:45 PM]] accuracy: 55.80% 42 | [[06/22/2019 03:02:45 PM]] top_3_accuracy: 81.20% 43 | [[06/22/2019 03:02:45 PM]] Snapshot metric -0.55800000 44 | [[06/22/2019 03:02:45 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.55800000_200.pth... 45 | [[06/22/2019 03:02:45 PM]] New low 46 | 47 | [[06/22/2019 03:02:46 PM]] ====================Epoch 2==================== 48 | [[06/22/2019 03:03:02 PM]] Step 220: train 1.586219 lr: 4.996e-03 49 | [[06/22/2019 03:03:19 PM]] Step 240: train 1.540666 lr: 4.977e-03 50 | [[06/22/2019 03:03:35 PM]] Step 260: train 1.584697 lr: 4.942e-03 51 | [[06/22/2019 03:03:52 PM]] Step 280: train 1.529005 lr: 4.893e-03 52 | [[06/22/2019 03:04:09 PM]] Step 300: train 1.468388 lr: 4.830e-03 53 | [[06/22/2019 03:04:11 PM]] Criterion loss: 1.253831 54 | [[06/22/2019 03:04:11 PM]] accuracy: 56.60% 55 | [[06/22/2019 03:04:11 PM]] top_3_accuracy: 86.80% 56 | [[06/22/2019 03:04:11 PM]] Snapshot metric -0.56600000 57 | [[06/22/2019 03:04:11 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.56600000_300.pth... 58 | [[06/22/2019 03:04:12 PM]] New low 59 | 60 | [[06/22/2019 03:04:28 PM]] Step 320: train 1.420705 lr: 4.752e-03 61 | [[06/22/2019 03:04:45 PM]] Step 340: train 1.461756 lr: 4.660e-03 62 | [[06/22/2019 03:05:01 PM]] Step 360: train 1.431700 lr: 4.555e-03 63 | [[06/22/2019 03:05:18 PM]] Step 380: train 1.399826 lr: 4.438e-03 64 | [[06/22/2019 03:05:34 PM]] Step 400: train 1.338424 lr: 4.309e-03 65 | [[06/22/2019 03:05:37 PM]] Criterion loss: 1.110560 66 | [[06/22/2019 03:05:37 PM]] accuracy: 63.80% 67 | [[06/22/2019 03:05:37 PM]] top_3_accuracy: 88.00% 68 | [[06/22/2019 03:05:37 PM]] Snapshot metric -0.63800000 69 | [[06/22/2019 03:05:37 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.63800000_400.pth... 70 | [[06/22/2019 03:05:37 PM]] New low 71 | 72 | [[06/22/2019 03:05:39 PM]] ====================Epoch 3==================== 73 | [[06/22/2019 03:05:54 PM]] Step 420: train 1.360706 lr: 4.168e-03 74 | [[06/22/2019 03:06:11 PM]] Step 440: train 1.345563 lr: 4.018e-03 75 | [[06/22/2019 03:06:27 PM]] Step 460: train 1.260429 lr: 3.858e-03 76 | [[06/22/2019 03:06:44 PM]] Step 480: train 1.311192 lr: 3.690e-03 77 | [[06/22/2019 03:07:00 PM]] Step 500: train 1.265890 lr: 3.515e-03 78 | [[06/22/2019 03:07:03 PM]] Criterion loss: 0.843474 79 | [[06/22/2019 03:07:03 PM]] accuracy: 71.80% 80 | [[06/22/2019 03:07:03 PM]] top_3_accuracy: 92.00% 81 | [[06/22/2019 03:07:03 PM]] Snapshot metric -0.71800000 82 | [[06/22/2019 03:07:03 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.71800000_500.pth... 83 | [[06/22/2019 03:07:03 PM]] New low 84 | 85 | [[06/22/2019 03:07:20 PM]] Step 520: train 1.227812 lr: 3.334e-03 86 | [[06/22/2019 03:07:36 PM]] Step 540: train 1.289461 lr: 3.147e-03 87 | [[06/22/2019 03:07:53 PM]] Step 560: train 1.262719 lr: 2.957e-03 88 | [[06/22/2019 03:08:10 PM]] Step 580: train 1.117412 lr: 2.763e-03 89 | [[06/22/2019 03:08:27 PM]] Step 600: train 1.164743 lr: 2.568e-03 90 | [[06/22/2019 03:08:29 PM]] Criterion loss: 0.726492 91 | [[06/22/2019 03:08:29 PM]] accuracy: 76.80% 92 | [[06/22/2019 03:08:29 PM]] top_3_accuracy: 94.00% 93 | [[06/22/2019 03:08:29 PM]] Snapshot metric -0.76800000 94 | [[06/22/2019 03:08:29 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.76800000_600.pth... 95 | [[06/22/2019 03:08:30 PM]] New low 96 | 97 | [[06/22/2019 03:08:32 PM]] ====================Epoch 4==================== 98 | [[06/22/2019 03:08:47 PM]] Step 620: train 1.115039 lr: 2.373e-03 99 | [[06/22/2019 03:09:04 PM]] Step 640: train 1.121448 lr: 2.179e-03 100 | [[06/22/2019 03:09:21 PM]] Step 660: train 1.164681 lr: 1.986e-03 101 | [[06/22/2019 03:09:38 PM]] Step 680: train 1.079892 lr: 1.797e-03 102 | [[06/22/2019 03:09:55 PM]] Step 700: train 1.058773 lr: 1.611e-03 103 | [[06/22/2019 03:09:58 PM]] Criterion loss: 0.941993 104 | [[06/22/2019 03:09:58 PM]] accuracy: 68.60% 105 | [[06/22/2019 03:09:58 PM]] top_3_accuracy: 90.80% 106 | [[06/22/2019 03:09:58 PM]] Snapshot metric -0.68600000 107 | [[06/22/2019 03:09:58 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.68600000_700.pth... 108 | [[06/22/2019 03:10:15 PM]] Step 720: train 1.053417 lr: 1.432e-03 109 | [[06/22/2019 03:10:33 PM]] Step 740: train 1.014899 lr: 1.258e-03 110 | [[06/22/2019 03:10:49 PM]] Step 760: train 1.034633 lr: 1.093e-03 111 | [[06/22/2019 03:11:06 PM]] Step 780: train 1.066577 lr: 9.358e-04 112 | [[06/22/2019 03:11:22 PM]] Step 800: train 0.943729 lr: 7.883e-04 113 | [[06/22/2019 03:11:25 PM]] Criterion loss: 0.548378 114 | [[06/22/2019 03:11:25 PM]] accuracy: 83.80% 115 | [[06/22/2019 03:11:25 PM]] top_3_accuracy: 94.80% 116 | [[06/22/2019 03:11:25 PM]] Snapshot metric -0.83800000 117 | [[06/22/2019 03:11:25 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83800000_800.pth... 118 | [[06/22/2019 03:11:25 PM]] New low 119 | 120 | [[06/22/2019 03:11:29 PM]] ====================Epoch 5==================== 121 | [[06/22/2019 03:11:42 PM]] Step 820: train 0.915435 lr: 6.513e-04 122 | [[06/22/2019 03:11:59 PM]] Step 840: train 0.926835 lr: 5.256e-04 123 | [[06/22/2019 03:12:15 PM]] Step 860: train 0.959245 lr: 4.119e-04 124 | [[06/22/2019 03:12:32 PM]] Step 880: train 0.954981 lr: 3.109e-04 125 | [[06/22/2019 03:12:49 PM]] Step 900: train 0.939287 lr: 2.233e-04 126 | [[06/22/2019 03:12:51 PM]] Criterion loss: 0.518281 127 | [[06/22/2019 03:12:51 PM]] accuracy: 84.80% 128 | [[06/22/2019 03:12:51 PM]] top_3_accuracy: 95.80% 129 | [[06/22/2019 03:12:51 PM]] Snapshot metric -0.84800000 130 | [[06/22/2019 03:12:51 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84800000_900.pth... 131 | [[06/22/2019 03:12:51 PM]] New low 132 | 133 | [[06/22/2019 03:13:08 PM]] Step 920: train 0.919005 lr: 1.497e-04 134 | [[06/22/2019 03:13:25 PM]] Step 940: train 0.895367 lr: 9.032e-05 135 | [[06/22/2019 03:13:41 PM]] Step 960: train 0.936508 lr: 4.568e-05 136 | [[06/22/2019 03:13:58 PM]] Step 980: train 0.939540 lr: 1.603e-05 137 | [[06/22/2019 03:14:14 PM]] Step 1000: train 0.933365 lr: 1.546e-06 138 | [[06/22/2019 03:14:17 PM]] Criterion loss: 0.484089 139 | [[06/22/2019 03:14:17 PM]] accuracy: 86.00% 140 | [[06/22/2019 03:14:17 PM]] top_3_accuracy: 95.40% 141 | [[06/22/2019 03:14:17 PM]] Snapshot metric -0.86000000 142 | [[06/22/2019 03:14:17 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.86000000_1000.pth... 143 | [[06/22/2019 03:14:17 PM]] New low 144 | 145 | -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/logs/bs64_8680.txt: -------------------------------------------------------------------------------- 1 | [[06/22/2019 02:41:28 PM]] SEED: 231 2 | [[06/22/2019 02:41:28 PM]] # of parameters: 27,770,943 3 | [[06/22/2019 02:41:28 PM]] # of trainable parameters: 27,770,943 4 | [[06/22/2019 02:41:28 PM]] Optimizer Adam ( 5 | Parameter Group 0 6 | amsgrad: False 7 | betas: (0.9, 0.999) 8 | eps: 1e-08 9 | initial_lr: 0.005 10 | lr: 5e-05 11 | weight_decay: 0 12 | 13 | Parameter Group 1 14 | amsgrad: False 15 | betas: (0.9, 0.999) 16 | eps: 1e-08 17 | initial_lr: 0.005 18 | lr: 5e-05 19 | weight_decay: 0 20 | ) 21 | [[06/22/2019 02:41:28 PM]] Batches per epoch: 201 22 | [[06/22/2019 02:41:28 PM]] ====================Epoch 1==================== 23 | [[06/22/2019 02:41:45 PM]] Step 20: train 3.597263 lr: 4.933e-04 24 | [[06/22/2019 02:42:01 PM]] Step 40: train 2.026338 lr: 9.858e-04 25 | [[06/22/2019 02:42:18 PM]] Step 60: train 1.883784 lr: 1.478e-03 26 | [[06/22/2019 02:42:34 PM]] Step 80: train 1.725523 lr: 1.971e-03 27 | [[06/22/2019 02:42:50 PM]] Step 100: train 1.609397 lr: 2.463e-03 28 | [[06/22/2019 02:42:53 PM]] Criterion loss: 2.403526 29 | [[06/22/2019 02:42:53 PM]] accuracy: 34.40% 30 | [[06/22/2019 02:42:53 PM]] top_3_accuracy: 63.00% 31 | [[06/22/2019 02:42:53 PM]] Snapshot metric -0.34400000 32 | [[06/22/2019 02:42:53 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.34400000_100.pth... 33 | [[06/22/2019 02:42:53 PM]] New low 34 | 35 | [[06/22/2019 02:43:10 PM]] Step 120: train 1.539507 lr: 2.956e-03 36 | [[06/22/2019 02:43:26 PM]] Step 140: train 1.656980 lr: 3.449e-03 37 | [[06/22/2019 02:43:43 PM]] Step 160: train 1.592132 lr: 3.941e-03 38 | [[06/22/2019 02:43:59 PM]] Step 180: train 1.371064 lr: 4.434e-03 39 | [[06/22/2019 02:44:16 PM]] Step 200: train 1.439013 lr: 4.926e-03 40 | [[06/22/2019 02:44:18 PM]] Criterion loss: 1.570898 41 | [[06/22/2019 02:44:18 PM]] accuracy: 54.20% 42 | [[06/22/2019 02:44:18 PM]] top_3_accuracy: 79.20% 43 | [[06/22/2019 02:44:18 PM]] Snapshot metric -0.54200000 44 | [[06/22/2019 02:44:18 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.54200000_200.pth... 45 | [[06/22/2019 02:44:19 PM]] New low 46 | 47 | [[06/22/2019 02:44:19 PM]] ====================Epoch 2==================== 48 | [[06/22/2019 02:44:36 PM]] Step 220: train 1.482403 lr: 4.996e-03 49 | [[06/22/2019 02:44:52 PM]] Step 240: train 1.352111 lr: 4.977e-03 50 | [[06/22/2019 02:45:08 PM]] Step 260: train 1.351472 lr: 4.942e-03 51 | [[06/22/2019 02:45:25 PM]] Step 280: train 1.337714 lr: 4.893e-03 52 | [[06/22/2019 02:45:41 PM]] Step 300: train 1.190638 lr: 4.830e-03 53 | [[06/22/2019 02:45:44 PM]] Criterion loss: 1.551471 54 | [[06/22/2019 02:45:44 PM]] accuracy: 57.20% 55 | [[06/22/2019 02:45:44 PM]] top_3_accuracy: 84.60% 56 | [[06/22/2019 02:45:44 PM]] Snapshot metric -0.57200000 57 | [[06/22/2019 02:45:44 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.57200000_300.pth... 58 | [[06/22/2019 02:45:44 PM]] New low 59 | 60 | [[06/22/2019 02:46:01 PM]] Step 320: train 1.171512 lr: 4.752e-03 61 | [[06/22/2019 02:46:17 PM]] Step 340: train 1.148336 lr: 4.660e-03 62 | [[06/22/2019 02:46:34 PM]] Step 360: train 1.125046 lr: 4.555e-03 63 | [[06/22/2019 02:46:52 PM]] Step 380: train 1.155357 lr: 4.438e-03 64 | [[06/22/2019 02:47:10 PM]] Step 400: train 1.104876 lr: 4.309e-03 65 | [[06/22/2019 02:47:13 PM]] Criterion loss: 1.028381 66 | [[06/22/2019 02:47:13 PM]] accuracy: 68.40% 67 | [[06/22/2019 02:47:13 PM]] top_3_accuracy: 89.60% 68 | [[06/22/2019 02:47:13 PM]] Snapshot metric -0.68400000 69 | [[06/22/2019 02:47:13 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.68400000_400.pth... 70 | [[06/22/2019 02:47:13 PM]] New low 71 | 72 | [[06/22/2019 02:47:15 PM]] ====================Epoch 3==================== 73 | [[06/22/2019 02:47:31 PM]] Step 420: train 1.110575 lr: 4.168e-03 74 | [[06/22/2019 02:47:49 PM]] Step 440: train 1.051789 lr: 4.018e-03 75 | [[06/22/2019 02:48:06 PM]] Step 460: train 0.986835 lr: 3.858e-03 76 | [[06/22/2019 02:48:24 PM]] Step 480: train 1.029533 lr: 3.690e-03 77 | [[06/22/2019 02:48:41 PM]] Step 500: train 0.950887 lr: 3.515e-03 78 | [[06/22/2019 02:48:44 PM]] Criterion loss: 0.888465 79 | [[06/22/2019 02:48:44 PM]] accuracy: 69.60% 80 | [[06/22/2019 02:48:44 PM]] top_3_accuracy: 91.80% 81 | [[06/22/2019 02:48:44 PM]] Snapshot metric -0.69600000 82 | [[06/22/2019 02:48:44 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.69600000_500.pth... 83 | [[06/22/2019 02:48:45 PM]] New low 84 | 85 | [[06/22/2019 02:49:03 PM]] Step 520: train 0.961737 lr: 3.334e-03 86 | [[06/22/2019 02:49:20 PM]] Step 540: train 0.851117 lr: 3.147e-03 87 | [[06/22/2019 02:49:38 PM]] Step 560: train 0.842348 lr: 2.957e-03 88 | [[06/22/2019 02:49:57 PM]] Step 580: train 0.833667 lr: 2.763e-03 89 | [[06/22/2019 02:50:14 PM]] Step 600: train 0.892872 lr: 2.568e-03 90 | [[06/22/2019 02:50:17 PM]] Criterion loss: 0.726943 91 | [[06/22/2019 02:50:17 PM]] accuracy: 78.40% 92 | [[06/22/2019 02:50:17 PM]] top_3_accuracy: 93.20% 93 | [[06/22/2019 02:50:17 PM]] Snapshot metric -0.78400000 94 | [[06/22/2019 02:50:17 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.78400000_600.pth... 95 | [[06/22/2019 02:50:18 PM]] New low 96 | 97 | [[06/22/2019 02:50:21 PM]] ====================Epoch 4==================== 98 | [[06/22/2019 02:50:36 PM]] Step 620: train 0.784497 lr: 2.373e-03 99 | [[06/22/2019 02:50:54 PM]] Step 640: train 0.796126 lr: 2.179e-03 100 | [[06/22/2019 02:51:12 PM]] Step 660: train 0.752741 lr: 1.986e-03 101 | [[06/22/2019 02:51:30 PM]] Step 680: train 0.773949 lr: 1.797e-03 102 | [[06/22/2019 02:51:48 PM]] Step 700: train 0.780226 lr: 1.611e-03 103 | [[06/22/2019 02:51:51 PM]] Criterion loss: 0.588716 104 | [[06/22/2019 02:51:51 PM]] accuracy: 81.40% 105 | [[06/22/2019 02:51:51 PM]] top_3_accuracy: 96.00% 106 | [[06/22/2019 02:51:51 PM]] Snapshot metric -0.81400000 107 | [[06/22/2019 02:51:51 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.81400000_700.pth... 108 | [[06/22/2019 02:51:51 PM]] New low 109 | 110 | [[06/22/2019 02:52:09 PM]] Step 720: train 0.763110 lr: 1.432e-03 111 | [[06/22/2019 02:52:27 PM]] Step 740: train 0.733605 lr: 1.258e-03 112 | [[06/22/2019 02:52:44 PM]] Step 760: train 0.665256 lr: 1.093e-03 113 | [[06/22/2019 02:53:02 PM]] Step 780: train 0.687970 lr: 9.358e-04 114 | [[06/22/2019 02:53:19 PM]] Step 800: train 0.635656 lr: 7.883e-04 115 | [[06/22/2019 02:53:21 PM]] Criterion loss: 0.513557 116 | [[06/22/2019 02:53:21 PM]] accuracy: 83.40% 117 | [[06/22/2019 02:53:22 PM]] top_3_accuracy: 95.80% 118 | [[06/22/2019 02:53:22 PM]] Snapshot metric -0.83400000 119 | [[06/22/2019 02:53:22 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83400000_800.pth... 120 | [[06/22/2019 02:53:22 PM]] New low 121 | 122 | [[06/22/2019 02:53:25 PM]] ====================Epoch 5==================== 123 | [[06/22/2019 02:53:40 PM]] Step 820: train 0.626556 lr: 6.513e-04 124 | [[06/22/2019 02:53:57 PM]] Step 840: train 0.600769 lr: 5.256e-04 125 | [[06/22/2019 02:54:15 PM]] Step 860: train 0.598774 lr: 4.119e-04 126 | [[06/22/2019 02:54:33 PM]] Step 880: train 0.527382 lr: 3.109e-04 127 | [[06/22/2019 02:54:50 PM]] Step 900: train 0.511858 lr: 2.233e-04 128 | [[06/22/2019 02:54:53 PM]] Criterion loss: 0.456041 129 | [[06/22/2019 02:54:53 PM]] accuracy: 85.80% 130 | [[06/22/2019 02:54:53 PM]] top_3_accuracy: 96.80% 131 | [[06/22/2019 02:54:53 PM]] Snapshot metric -0.85800000 132 | [[06/22/2019 02:54:53 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.85800000_900.pth... 133 | [[06/22/2019 02:54:53 PM]] New low 134 | 135 | [[06/22/2019 02:55:10 PM]] Step 920: train 0.606073 lr: 1.497e-04 136 | [[06/22/2019 02:55:28 PM]] Step 940: train 0.585275 lr: 9.032e-05 137 | [[06/22/2019 02:55:46 PM]] Step 960: train 0.676848 lr: 4.568e-05 138 | [[06/22/2019 02:56:04 PM]] Step 980: train 0.553108 lr: 1.603e-05 139 | [[06/22/2019 02:56:22 PM]] Step 1000: train 0.535630 lr: 1.546e-06 140 | [[06/22/2019 02:56:25 PM]] Criterion loss: 0.430974 141 | [[06/22/2019 02:56:25 PM]] accuracy: 86.80% 142 | [[06/22/2019 02:56:25 PM]] top_3_accuracy: 96.80% 143 | [[06/22/2019 02:56:25 PM]] Snapshot metric -0.86800000 144 | [[06/22/2019 02:56:25 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.86800000_1000.pth... 145 | [[06/22/2019 02:56:25 PM]] New low 146 | 147 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /pytorch_helper_bot/helperbot/bot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import logging 4 | from pathlib import Path 5 | from collections import deque 6 | from typing import List, Tuple, Iterable, Optional, Union, Sequence 7 | from dataclasses import dataclass, field 8 | 9 | import numpy as np 10 | import torch 11 | from torch.nn.utils.clip_grad import clip_grad_norm_ 12 | from tqdm import tqdm 13 | 14 | from .logger import Logger 15 | from .metrics import Metric 16 | 17 | try: 18 | from apex import amp 19 | APEX_AVAILABLE = True 20 | except ModuleNotFoundError: 21 | APEX_AVAILABLE = False 22 | 23 | SEED = int(os.environ.get("SEED", 9293)) 24 | 25 | random.seed(SEED) 26 | np.random.seed(SEED) 27 | torch.manual_seed(SEED) 28 | torch.cuda.manual_seed(SEED) 29 | 30 | 31 | @dataclass 32 | class BaseBot: 33 | """Base Interface to Model Training and Inference""" 34 | train_loader: Iterable 35 | val_loader: Iterable 36 | avg_window: int 37 | criterion: object 38 | model: torch.nn.Module 39 | optimizer: torch.optim.Optimizer 40 | name: str = "basebot" 41 | use_amp: bool = False 42 | clip_grad: float = 0 43 | batch_idx: int = 0 44 | checkpoint_dir: Path = Path("./data/cache/model_cache/") 45 | device: Union[str, torch.device] = "cuda:0" 46 | log_dir: Path = Path("./data/cache/logs/") 47 | log_level: int = logging.INFO 48 | loss_format: str = "%.8f" 49 | metric_format: Optional[str] = None 50 | use_tensorboard: bool = False 51 | gradient_accumulation_steps: int = 1 52 | echo: bool = True 53 | step: int = 0 54 | best_performers: List[Tuple] = field(init=False) 55 | train_losses: deque = field(init=False) 56 | train_weights: deque = field(init=False) 57 | metrics: Sequence = () 58 | callbacks: Sequence = () 59 | monitor_metric: str = "loss" 60 | pbar: bool = False 61 | 62 | def __post_init__(self): 63 | assert (self.use_amp and APEX_AVAILABLE) or (not self.use_amp) 64 | self.logger = Logger( 65 | self.name, str(self.log_dir), self.log_level, 66 | use_tensorboard=self.use_tensorboard, echo=self.echo) 67 | self.logger.info("SEED: %s", SEED) 68 | self.checkpoint_dir.mkdir(exist_ok=True, parents=True) 69 | self.best_performers: List[Tuple] = [] 70 | if self.metric_format is None: 71 | self.metric_format = self.loss_format 72 | self.train_losses = deque(maxlen=self.avg_window) 73 | self.train_weights = deque(maxlen=self.avg_window) 74 | self.count_model_parameters() 75 | 76 | def count_model_parameters(self): 77 | self.logger.info( 78 | "# of parameters: {:,d}".format( 79 | np.sum(list(p.numel() for p in self.model.parameters())))) 80 | self.logger.info( 81 | "# of trainable parameters: {:,d}".format( 82 | np.sum(list(p.numel() for p in self.model.parameters() if p.requires_grad)))) 83 | 84 | def train_one_step(self, input_tensors, target): 85 | self.model.train() 86 | assert self.model.training 87 | output = self.model(*input_tensors) 88 | batch_loss = self.criterion( 89 | self.extract_prediction(output), target 90 | ) / self.gradient_accumulation_steps 91 | if self.use_amp: 92 | with amp.scale_loss(batch_loss, self.optimizer) as scaled_loss: 93 | scaled_loss.backward() 94 | else: 95 | batch_loss.backward() 96 | self.train_losses.append( 97 | batch_loss.data.cpu().numpy() * self.gradient_accumulation_steps) 98 | self.train_weights.append(input_tensors[0].size(self.batch_idx)) 99 | if self.clip_grad > 0: 100 | if not self.use_amp: 101 | clip_grad_norm_(self.model.parameters(), self.clip_grad) 102 | else: 103 | clip_grad_norm_(amp.master_params( 104 | self.optimizer), self.clip_grad) 105 | if self.step % self.gradient_accumulation_steps == 0: 106 | self.optimizer.step() 107 | self.optimizer.zero_grad() 108 | 109 | def log_progress(self): 110 | train_loss_avg = np.average( 111 | self.train_losses, weights=self.train_weights) 112 | self.logger.info( 113 | "Step %s: train %.6f lr: %.3e", 114 | self.step, train_loss_avg, self.optimizer.param_groups[-1]['lr']) 115 | self.logger.tb_scalars( 116 | "lr", self.optimizer.param_groups[0]['lr'], self.step) 117 | self.logger.tb_scalars( 118 | "losses", {"train": train_loss_avg}, self.step) 119 | 120 | def snapshot(self): 121 | metrics = self.eval(self.val_loader) 122 | target_metric = metrics[self.monitor_metric] 123 | metric_str = self.metric_format % target_metric 124 | self.logger.info("Snapshot metric %s", metric_str) 125 | self.logger.tb_scalars( 126 | "losses", {"val": metrics["loss"]}, self.step) 127 | self.logger.tb_scalars( 128 | "monitor_metric", {"val": target_metric}, self.step) 129 | target_path = ( 130 | self.checkpoint_dir / 131 | "snapshot_{}_{}_{}.pth".format(self.name, metric_str, self.step)) 132 | self.best_performers.append((target_metric, target_path, self.step)) 133 | self.best_performers = sorted(self.best_performers, key=lambda x: x[0]) 134 | self.logger.info("Saving checkpoint %s...", target_path) 135 | torch.save(self.model.state_dict(), target_path) 136 | assert Path(target_path).exists() 137 | return target_metric 138 | 139 | @staticmethod 140 | def extract_prediction(output): 141 | """Assumes single output""" 142 | return output[:, 0] 143 | 144 | @staticmethod 145 | def transform_prediction(prediction): 146 | return prediction 147 | 148 | def run_batch_inputs_callbacks(self, input_tensors, targets): 149 | for callback in self.callbacks: 150 | input_tensors, targets = callback.on_batch_inputs( 151 | self, input_tensors, targets) 152 | return input_tensors, targets 153 | 154 | def run_step_ends_callbacks(self): 155 | for callback in self.callbacks: 156 | callback.on_step_ends(self) 157 | 158 | def run_epoch_ends_callbacks(self, epoch): 159 | for callback in self.callbacks: 160 | callback.on_epoch_ends(self, epoch) 161 | 162 | def train( 163 | self, n_steps, *, log_interval=50, 164 | early_stopping_cnt=0, min_improv=1e-4, 165 | snapshot_interval=2500, keep_n_snapshots=-1): 166 | self.optimizer.zero_grad() 167 | if self.val_loader is not None: 168 | best_val_loss = 100 169 | epoch = 0 170 | wo_improvement = 0 171 | self.best_performers = [] 172 | self.logger.info( 173 | "Optimizer {}".format(str(self.optimizer))) 174 | self.logger.info("Batches per epoch: {}".format( 175 | len(self.train_loader))) 176 | try: 177 | while self.step < n_steps: 178 | epoch += 1 179 | self.logger.info( 180 | "=" * 20 + "Epoch %d" + "=" * 20, epoch) 181 | for *input_tensors, targets in self.train_loader: 182 | input_tensors = [x.to(self.device) for x in input_tensors] 183 | targets = targets.to(self.device) 184 | input_tensors, targets = self.run_batch_inputs_callbacks( 185 | input_tensors, targets) 186 | self.train_one_step(input_tensors, targets) 187 | self.step += 1 188 | if self.step % log_interval == 0: 189 | self.log_progress() 190 | if ((callable(snapshot_interval) and snapshot_interval(self.step)) 191 | or (not callable(snapshot_interval) and self.step % snapshot_interval == 0)): 192 | loss = self.snapshot() 193 | if best_val_loss > loss + min_improv: 194 | self.logger.info("New low\n") 195 | best_val_loss = loss 196 | wo_improvement = 0 197 | else: 198 | wo_improvement += 1 199 | if keep_n_snapshots > 0: 200 | self.remove_checkpoints(keep=keep_n_snapshots) 201 | self.run_step_ends_callbacks() 202 | if early_stopping_cnt and wo_improvement > early_stopping_cnt: 203 | return 204 | if self.step >= n_steps: 205 | break 206 | self.run_epoch_ends_callbacks(epoch + 1) 207 | except KeyboardInterrupt: 208 | pass 209 | 210 | def eval(self, loader): 211 | """Warning: Only support datasets whose predictions and labels fit in memory together.""" 212 | self.model.eval() 213 | preds, ys = [], [] 214 | losses, weights = [], [] 215 | self.logger.debug("Evaluating...") 216 | with torch.set_grad_enabled(False): 217 | for *input_tensors, y_local in tqdm(loader, disable=not self.pbar): 218 | input_tensors = [x.to(self.device) for x in input_tensors] 219 | output = self.extract_prediction(self.model(*input_tensors)) 220 | batch_loss = self.criterion( 221 | output, y_local.to(self.device)) 222 | losses.append(batch_loss.data.cpu().item()) 223 | weights.append(y_local.size(self.batch_idx)) 224 | # Save batch labels and predictions 225 | preds.append(output.cpu()) 226 | ys.append(y_local.cpu()) 227 | loss = np.average(losses, weights=weights) 228 | self.logger.info("Criterion loss: {}".format(self.loss_format % loss)) 229 | metrics = {"loss": loss} 230 | global_ys, global_preds = torch.cat(ys), torch.cat(preds) 231 | for metric in self.metrics: 232 | metric_loss, metric_string = metric(global_ys, global_preds) 233 | metrics[metric.name] = metric_loss 234 | self.logger.info(f"{metric.name}: {metric_string}") 235 | return metrics 236 | 237 | def predict_batch(self, input_tensors): 238 | self.model.eval() 239 | tmp = self.model(*input_tensors) 240 | return self.extract_prediction(tmp) 241 | 242 | def predict_avg(self, loader, k=8): 243 | assert len(self.best_performers) >= k 244 | preds = [] 245 | # Iterating through checkpoints 246 | for i in range(k): 247 | target = self.best_performers[i][1] 248 | self.logger.info("Loading %s", format(target)) 249 | self.load_model(target) 250 | preds.append(self.predict(loader).unsqueeze(0)) 251 | return torch.cat(preds, dim=0).mean(dim=0) 252 | 253 | def predict(self, loader, *, return_y=False): 254 | self.model.eval() 255 | outputs, y_global = [], [] 256 | with torch.set_grad_enabled(False): 257 | for *input_tensors, y_local in tqdm(loader, disable=not self.pbar): 258 | input_tensors = [x.to(self.device) for x in input_tensors] 259 | outputs.append(self.predict_batch(input_tensors).cpu()) 260 | if return_y: 261 | y_global.append(y_local) 262 | outputs = torch.cat(outputs, dim=0) 263 | if return_y: 264 | y_global = torch.cat(y_global, dim=0) 265 | return outputs, y_global.cpu() 266 | return outputs 267 | 268 | def remove_checkpoints(self, keep=0): 269 | for checkpoint in np.unique([x[1] for x in self.best_performers[keep:]]): 270 | Path(checkpoint).unlink() 271 | self.best_performers = self.best_performers[:keep] 272 | 273 | def load_model(self, target_path): 274 | self.model.load_state_dict(torch.load(target_path)) 275 | -------------------------------------------------------------------------------- /imet/adabound.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch.optim import Optimizer 4 | 5 | 6 | class AdaBound(Optimizer): 7 | """Implements AdaBound algorithm. 8 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 9 | Arguments: 10 | params (iterable): iterable of parameters to optimize or dicts defining 11 | parameter groups 12 | lr (float, optional): Adam learning rate (default: 1e-3) 13 | betas (Tuple[float, float], optional): coefficients used for computing 14 | running averages of gradient and its square (default: (0.9, 0.999)) 15 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 16 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 17 | eps (float, optional): term added to the denominator to improve 18 | numerical stability (default: 1e-8) 19 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 20 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 21 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 22 | https://openreview.net/forum?id=Bkg3g2R9FX 23 | """ 24 | 25 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 26 | eps=1e-8, weight_decay=0, amsbound=False): 27 | if not 0.0 <= lr: 28 | raise ValueError("Invalid learning rate: {}".format(lr)) 29 | if not 0.0 <= eps: 30 | raise ValueError("Invalid epsilon value: {}".format(eps)) 31 | if not 0.0 <= betas[0] < 1.0: 32 | raise ValueError( 33 | "Invalid beta parameter at index 0: {}".format(betas[0])) 34 | if not 0.0 <= betas[1] < 1.0: 35 | raise ValueError( 36 | "Invalid beta parameter at index 1: {}".format(betas[1])) 37 | if not 0.0 <= final_lr: 38 | raise ValueError( 39 | "Invalid final learning rate: {}".format(final_lr)) 40 | if not 0.0 <= gamma < 1.0: 41 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 42 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 43 | weight_decay=weight_decay, amsbound=amsbound) 44 | super(AdaBound, self).__init__(params, defaults) 45 | 46 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 47 | 48 | def __setstate__(self, state): 49 | super(AdaBound, self).__setstate__(state) 50 | for group in self.param_groups: 51 | group.setdefault('amsbound', False) 52 | 53 | def step(self, closure=None): 54 | """Performs a single optimization step. 55 | Arguments: 56 | closure (callable, optional): A closure that reevaluates the model 57 | and returns the loss. 58 | """ 59 | loss = None 60 | if closure is not None: 61 | loss = closure() 62 | 63 | for group, base_lr in zip(self.param_groups, self.base_lrs): 64 | for p in group['params']: 65 | if p.grad is None: 66 | continue 67 | grad = p.grad.data 68 | if grad.is_sparse: 69 | raise RuntimeError( 70 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 71 | amsbound = group['amsbound'] 72 | 73 | state = self.state[p] 74 | 75 | # State initialization 76 | if len(state) == 0: 77 | state['step'] = 0 78 | # Exponential moving average of gradient values 79 | state['exp_avg'] = torch.zeros_like(p.data) 80 | # Exponential moving average of squared gradient values 81 | state['exp_avg_sq'] = torch.zeros_like(p.data) 82 | if amsbound: 83 | # Maintains max of all exp. moving avg. of sq. grad. values 84 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 85 | 86 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 87 | if amsbound: 88 | max_exp_avg_sq = state['max_exp_avg_sq'] 89 | beta1, beta2 = group['betas'] 90 | 91 | state['step'] += 1 92 | 93 | if group['weight_decay'] != 0: 94 | grad = grad.add(group['weight_decay'], p.data) 95 | 96 | # Decay the first and second moment running average coefficient 97 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 98 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 99 | if amsbound: 100 | # Maintains the maximum of all 2nd moment running avg. till now 101 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 102 | # Use the max. for normalizing running avg. of gradient 103 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 104 | else: 105 | denom = exp_avg_sq.sqrt().add_(group['eps']) 106 | 107 | bias_correction1 = 1 - beta1 ** state['step'] 108 | bias_correction2 = 1 - beta2 ** state['step'] 109 | step_size = group['lr'] * \ 110 | math.sqrt(bias_correction2) / bias_correction1 111 | 112 | # Applies bounds on actual learning rate 113 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 114 | final_lr = group['final_lr'] * group['lr'] / base_lr 115 | lower_bound = final_lr * \ 116 | (1 - 1 / (group['gamma'] * state['step'] + 1)) 117 | upper_bound = final_lr * \ 118 | (1 + 1 / (group['gamma'] * state['step'])) 119 | step_size = torch.full_like(denom, step_size) 120 | step_size.div_(denom).clamp_( 121 | lower_bound, upper_bound).mul_(exp_avg) 122 | 123 | p.data.add_(-step_size) 124 | 125 | return loss 126 | 127 | 128 | class AdaBoundW(Optimizer): 129 | """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101) 130 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 131 | Arguments: 132 | params (iterable): iterable of parameters to optimize or dicts defining 133 | parameter groups 134 | lr (float, optional): Adam learning rate (default: 1e-3) 135 | betas (Tuple[float, float], optional): coefficients used for computing 136 | running averages of gradient and its square (default: (0.9, 0.999)) 137 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 138 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 139 | eps (float, optional): term added to the denominator to improve 140 | numerical stability (default: 1e-8) 141 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 142 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 143 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 144 | https://openreview.net/forum?id=Bkg3g2R9FX 145 | """ 146 | 147 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 148 | eps=1e-8, weight_decay=0, amsbound=False): 149 | if not 0.0 <= lr: 150 | raise ValueError("Invalid learning rate: {}".format(lr)) 151 | if not 0.0 <= eps: 152 | raise ValueError("Invalid epsilon value: {}".format(eps)) 153 | if not 0.0 <= betas[0] < 1.0: 154 | raise ValueError( 155 | "Invalid beta parameter at index 0: {}".format(betas[0])) 156 | if not 0.0 <= betas[1] < 1.0: 157 | raise ValueError( 158 | "Invalid beta parameter at index 1: {}".format(betas[1])) 159 | if not 0.0 <= final_lr: 160 | raise ValueError( 161 | "Invalid final learning rate: {}".format(final_lr)) 162 | if not 0.0 <= gamma < 1.0: 163 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 164 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 165 | weight_decay=weight_decay, amsbound=amsbound) 166 | super(AdaBoundW, self).__init__(params, defaults) 167 | 168 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 169 | 170 | def __setstate__(self, state): 171 | super(AdaBoundW, self).__setstate__(state) 172 | for group in self.param_groups: 173 | group.setdefault('amsbound', False) 174 | 175 | def step(self, closure=None): 176 | """Performs a single optimization step. 177 | Arguments: 178 | closure (callable, optional): A closure that reevaluates the model 179 | and returns the loss. 180 | """ 181 | loss = None 182 | if closure is not None: 183 | loss = closure() 184 | 185 | for group, base_lr in zip(self.param_groups, self.base_lrs): 186 | for p in group['params']: 187 | if p.grad is None: 188 | continue 189 | grad = p.grad.data 190 | if grad.is_sparse: 191 | raise RuntimeError( 192 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 193 | amsbound = group['amsbound'] 194 | 195 | state = self.state[p] 196 | 197 | # State initialization 198 | if len(state) == 0: 199 | state['step'] = 0 200 | # Exponential moving average of gradient values 201 | state['exp_avg'] = torch.zeros_like(p.data) 202 | # Exponential moving average of squared gradient values 203 | state['exp_avg_sq'] = torch.zeros_like(p.data) 204 | if amsbound: 205 | # Maintains max of all exp. moving avg. of sq. grad. values 206 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 207 | 208 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 209 | if amsbound: 210 | max_exp_avg_sq = state['max_exp_avg_sq'] 211 | beta1, beta2 = group['betas'] 212 | 213 | state['step'] += 1 214 | 215 | # Decay the first and second moment running average coefficient 216 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 217 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 218 | if amsbound: 219 | # Maintains the maximum of all 2nd moment running avg. till now 220 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 221 | # Use the max. for normalizing running avg. of gradient 222 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 223 | else: 224 | denom = exp_avg_sq.sqrt().add_(group['eps']) 225 | 226 | bias_correction1 = 1 - beta1 ** state['step'] 227 | bias_correction2 = 1 - beta2 ** state['step'] 228 | step_size = group['lr'] * \ 229 | math.sqrt(bias_correction2) / bias_correction1 230 | 231 | # Applies bounds on actual learning rate 232 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 233 | final_lr = group['final_lr'] * group['lr'] / base_lr 234 | lower_bound = final_lr * \ 235 | (1 - 1 / (group['gamma'] * state['step'] + 1)) 236 | upper_bound = final_lr * \ 237 | (1 + 1 / (group['gamma'] * state['step'])) 238 | step_size = torch.full_like(denom, step_size) 239 | step_size.div_(denom).clamp_( 240 | lower_bound, upper_bound).mul_(exp_avg) 241 | 242 | if group['weight_decay'] != 0: 243 | decayed_weights = torch.mul(p.data, group['weight_decay']) 244 | p.data.add_(-step_size) 245 | p.data.sub_(decayed_weights) 246 | else: 247 | p.data.add_(-step_size) 248 | 249 | return loss 250 | -------------------------------------------------------------------------------- /pytorch_helper_bot/examples/imagenette/logs/bs64_e10.txt: -------------------------------------------------------------------------------- 1 | [[06/22/2019 03:44:34 PM]] SEED: 231 2 | [[06/22/2019 03:44:34 PM]] # of parameters: 27,770,943 3 | [[06/22/2019 03:44:34 PM]] # of trainable parameters: 27,770,943 4 | [[06/22/2019 03:44:34 PM]] Optimizer Adam ( 5 | Parameter Group 0 6 | amsgrad: False 7 | betas: (0.9, 0.999) 8 | eps: 1e-08 9 | initial_lr: 0.005 10 | lr: 5e-05 11 | weight_decay: 0 12 | 13 | Parameter Group 1 14 | amsgrad: False 15 | betas: (0.9, 0.999) 16 | eps: 1e-08 17 | initial_lr: 0.005 18 | lr: 5e-05 19 | weight_decay: 0 20 | ) 21 | [[06/22/2019 03:44:34 PM]] Batches per epoch: 201 22 | [[06/22/2019 03:44:34 PM]] ====================Epoch 1==================== 23 | [[06/22/2019 03:44:52 PM]] Step 20: train 3.573733 lr: 4.933e-04 24 | [[06/22/2019 03:45:09 PM]] Step 40: train 2.030234 lr: 9.858e-04 25 | [[06/22/2019 03:45:26 PM]] Step 60: train 1.984939 lr: 1.478e-03 26 | [[06/22/2019 03:45:43 PM]] Step 80: train 1.709919 lr: 1.971e-03 27 | [[06/22/2019 03:46:00 PM]] Step 100: train 1.608930 lr: 2.463e-03 28 | [[06/22/2019 03:46:03 PM]] Criterion loss: 2.605317 29 | [[06/22/2019 03:46:03 PM]] accuracy: 36.40% 30 | [[06/22/2019 03:46:03 PM]] top_3_accuracy: 60.20% 31 | [[06/22/2019 03:46:03 PM]] Snapshot metric -0.36400000 32 | [[06/22/2019 03:46:03 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.36400000_100.pth... 33 | [[06/22/2019 03:46:03 PM]] New low 34 | 35 | [[06/22/2019 03:46:20 PM]] Step 120: train 1.557482 lr: 2.956e-03 36 | [[06/22/2019 03:46:36 PM]] Step 140: train 1.548209 lr: 3.449e-03 37 | [[06/22/2019 03:46:53 PM]] Step 160: train 1.653716 lr: 3.941e-03 38 | [[06/22/2019 03:47:09 PM]] Step 180: train 1.428034 lr: 4.434e-03 39 | [[06/22/2019 03:47:26 PM]] Step 200: train 1.459277 lr: 4.926e-03 40 | [[06/22/2019 03:47:28 PM]] Criterion loss: 1.658659 41 | [[06/22/2019 03:47:28 PM]] accuracy: 46.00% 42 | [[06/22/2019 03:47:28 PM]] top_3_accuracy: 77.00% 43 | [[06/22/2019 03:47:28 PM]] Snapshot metric -0.46000000 44 | [[06/22/2019 03:47:28 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.46000000_200.pth... 45 | [[06/22/2019 03:47:29 PM]] New low 46 | 47 | [[06/22/2019 03:47:30 PM]] ====================Epoch 2==================== 48 | [[06/22/2019 03:47:46 PM]] Step 220: train 1.482041 lr: 4.999e-03 49 | [[06/22/2019 03:48:02 PM]] Step 240: train 1.341844 lr: 4.995e-03 50 | [[06/22/2019 03:48:19 PM]] Step 260: train 1.359507 lr: 4.989e-03 51 | [[06/22/2019 03:48:36 PM]] Step 280: train 1.409255 lr: 4.979e-03 52 | [[06/22/2019 03:48:53 PM]] Step 300: train 1.213519 lr: 4.966e-03 53 | [[06/22/2019 03:48:55 PM]] Criterion loss: 1.155639 54 | [[06/22/2019 03:48:55 PM]] accuracy: 63.00% 55 | [[06/22/2019 03:48:55 PM]] top_3_accuracy: 86.00% 56 | [[06/22/2019 03:48:55 PM]] Snapshot metric -0.63000000 57 | [[06/22/2019 03:48:55 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.63000000_300.pth... 58 | [[06/22/2019 03:48:56 PM]] New low 59 | 60 | [[06/22/2019 03:49:13 PM]] Step 320: train 1.209477 lr: 4.950e-03 61 | [[06/22/2019 03:49:29 PM]] Step 340: train 1.168014 lr: 4.932e-03 62 | [[06/22/2019 03:49:46 PM]] Step 360: train 1.130536 lr: 4.910e-03 63 | [[06/22/2019 03:50:03 PM]] Step 380: train 1.201653 lr: 4.885e-03 64 | [[06/22/2019 03:50:19 PM]] Step 400: train 1.146011 lr: 4.858e-03 65 | [[06/22/2019 03:50:22 PM]] Criterion loss: 1.090183 66 | [[06/22/2019 03:50:22 PM]] accuracy: 64.60% 67 | [[06/22/2019 03:50:22 PM]] top_3_accuracy: 88.40% 68 | [[06/22/2019 03:50:22 PM]] Snapshot metric -0.64600000 69 | [[06/22/2019 03:50:22 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.64600000_400.pth... 70 | [[06/22/2019 03:50:23 PM]] New low 71 | 72 | [[06/22/2019 03:50:24 PM]] ====================Epoch 3==================== 73 | [[06/22/2019 03:50:40 PM]] Step 420: train 1.210714 lr: 4.828e-03 74 | [[06/22/2019 03:50:56 PM]] Step 440: train 1.090848 lr: 4.795e-03 75 | [[06/22/2019 03:51:13 PM]] Step 460: train 1.061501 lr: 4.759e-03 76 | [[06/22/2019 03:51:30 PM]] Step 480: train 1.065800 lr: 4.720e-03 77 | [[06/22/2019 03:51:47 PM]] Step 500: train 0.984935 lr: 4.679e-03 78 | [[06/22/2019 03:51:49 PM]] Criterion loss: 1.451416 79 | [[06/22/2019 03:51:49 PM]] accuracy: 55.80% 80 | [[06/22/2019 03:51:49 PM]] top_3_accuracy: 87.00% 81 | [[06/22/2019 03:51:49 PM]] Snapshot metric -0.55800000 82 | [[06/22/2019 03:51:49 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.55800000_500.pth... 83 | [[06/22/2019 03:52:06 PM]] Step 520: train 1.045997 lr: 4.635e-03 84 | [[06/22/2019 03:52:23 PM]] Step 540: train 0.967297 lr: 4.589e-03 85 | [[06/22/2019 03:52:40 PM]] Step 560: train 0.921210 lr: 4.540e-03 86 | [[06/22/2019 03:52:56 PM]] Step 580: train 0.934175 lr: 4.488e-03 87 | [[06/22/2019 03:53:13 PM]] Step 600: train 0.997390 lr: 4.435e-03 88 | [[06/22/2019 03:53:15 PM]] Criterion loss: 0.846122 89 | [[06/22/2019 03:53:15 PM]] accuracy: 71.80% 90 | [[06/22/2019 03:53:15 PM]] top_3_accuracy: 91.20% 91 | [[06/22/2019 03:53:15 PM]] Snapshot metric -0.71800000 92 | [[06/22/2019 03:53:15 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.71800000_600.pth... 93 | [[06/22/2019 03:53:16 PM]] New low 94 | 95 | [[06/22/2019 03:53:18 PM]] ====================Epoch 4==================== 96 | [[06/22/2019 03:53:33 PM]] Step 620: train 0.912232 lr: 4.378e-03 97 | [[06/22/2019 03:53:50 PM]] Step 640: train 0.897097 lr: 4.320e-03 98 | [[06/22/2019 03:54:06 PM]] Step 660: train 0.885259 lr: 4.259e-03 99 | [[06/22/2019 03:54:23 PM]] Step 680: train 0.898099 lr: 4.197e-03 100 | [[06/22/2019 03:54:40 PM]] Step 700: train 0.952782 lr: 4.132e-03 101 | [[06/22/2019 03:54:43 PM]] Criterion loss: 0.828609 102 | [[06/22/2019 03:54:43 PM]] accuracy: 72.20% 103 | [[06/22/2019 03:54:43 PM]] top_3_accuracy: 91.60% 104 | [[06/22/2019 03:54:43 PM]] Snapshot metric -0.72200000 105 | [[06/22/2019 03:54:43 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.72200000_700.pth... 106 | [[06/22/2019 03:54:43 PM]] New low 107 | 108 | [[06/22/2019 03:55:00 PM]] Step 720: train 0.877741 lr: 4.065e-03 109 | [[06/22/2019 03:55:16 PM]] Step 740: train 0.926822 lr: 3.996e-03 110 | [[06/22/2019 03:55:33 PM]] Step 760: train 0.826499 lr: 3.926e-03 111 | [[06/22/2019 03:55:50 PM]] Step 780: train 0.945527 lr: 3.854e-03 112 | [[06/22/2019 03:56:07 PM]] Step 800: train 0.872751 lr: 3.780e-03 113 | [[06/22/2019 03:56:09 PM]] Criterion loss: 0.853279 114 | [[06/22/2019 03:56:09 PM]] accuracy: 72.40% 115 | [[06/22/2019 03:56:09 PM]] top_3_accuracy: 91.40% 116 | [[06/22/2019 03:56:09 PM]] Snapshot metric -0.72400000 117 | [[06/22/2019 03:56:09 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.72400000_800.pth... 118 | [[06/22/2019 03:56:10 PM]] New low 119 | 120 | [[06/22/2019 03:56:13 PM]] ====================Epoch 5==================== 121 | [[06/22/2019 03:56:27 PM]] Step 820: train 0.826435 lr: 3.705e-03 122 | [[06/22/2019 03:56:44 PM]] Step 840: train 0.832256 lr: 3.628e-03 123 | [[06/22/2019 03:57:01 PM]] Step 860: train 0.848991 lr: 3.550e-03 124 | [[06/22/2019 03:57:17 PM]] Step 880: train 0.825508 lr: 3.470e-03 125 | [[06/22/2019 03:57:34 PM]] Step 900: train 0.703108 lr: 3.390e-03 126 | [[06/22/2019 03:57:37 PM]] Criterion loss: 0.754616 127 | [[06/22/2019 03:57:37 PM]] accuracy: 75.40% 128 | [[06/22/2019 03:57:37 PM]] top_3_accuracy: 94.60% 129 | [[06/22/2019 03:57:37 PM]] Snapshot metric -0.75400000 130 | [[06/22/2019 03:57:37 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.75400000_900.pth... 131 | [[06/22/2019 03:57:37 PM]] New low 132 | 133 | [[06/22/2019 03:57:55 PM]] Step 920: train 0.858812 lr: 3.308e-03 134 | [[06/22/2019 03:58:12 PM]] Step 940: train 0.799415 lr: 3.225e-03 135 | [[06/22/2019 03:58:30 PM]] Step 960: train 0.862341 lr: 3.142e-03 136 | [[06/22/2019 03:58:47 PM]] Step 980: train 0.751697 lr: 3.058e-03 137 | [[06/22/2019 03:59:03 PM]] Step 1000: train 0.731585 lr: 2.973e-03 138 | [[06/22/2019 03:59:06 PM]] Criterion loss: 0.707321 139 | [[06/22/2019 03:59:06 PM]] accuracy: 78.20% 140 | [[06/22/2019 03:59:06 PM]] top_3_accuracy: 92.20% 141 | [[06/22/2019 03:59:06 PM]] Snapshot metric -0.78200000 142 | [[06/22/2019 03:59:06 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.78200000_1000.pth... 143 | [[06/22/2019 03:59:06 PM]] New low 144 | 145 | [[06/22/2019 03:59:10 PM]] ====================Epoch 6==================== 146 | [[06/22/2019 03:59:23 PM]] Step 1020: train 0.613325 lr: 2.887e-03 147 | [[06/22/2019 03:59:39 PM]] Step 1040: train 0.756710 lr: 2.801e-03 148 | [[06/22/2019 03:59:56 PM]] Step 1060: train 0.671936 lr: 2.715e-03 149 | [[06/22/2019 04:00:12 PM]] Step 1080: train 0.643940 lr: 2.628e-03 150 | [[06/22/2019 04:00:29 PM]] Step 1100: train 0.702415 lr: 2.541e-03 151 | [[06/22/2019 04:00:32 PM]] Criterion loss: 0.894972 152 | [[06/22/2019 04:00:32 PM]] accuracy: 71.40% 153 | [[06/22/2019 04:00:32 PM]] top_3_accuracy: 92.60% 154 | [[06/22/2019 04:00:32 PM]] Snapshot metric -0.71400000 155 | [[06/22/2019 04:00:32 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.71400000_1100.pth... 156 | [[06/22/2019 04:00:48 PM]] Step 1120: train 0.698035 lr: 2.454e-03 157 | [[06/22/2019 04:01:05 PM]] Step 1140: train 0.665236 lr: 2.368e-03 158 | [[06/22/2019 04:01:21 PM]] Step 1160: train 0.663330 lr: 2.281e-03 159 | [[06/22/2019 04:01:38 PM]] Step 1180: train 0.644659 lr: 2.195e-03 160 | [[06/22/2019 04:01:54 PM]] Step 1200: train 0.710065 lr: 2.109e-03 161 | [[06/22/2019 04:01:57 PM]] Criterion loss: 0.679426 162 | [[06/22/2019 04:01:57 PM]] accuracy: 77.80% 163 | [[06/22/2019 04:01:57 PM]] top_3_accuracy: 93.80% 164 | [[06/22/2019 04:01:57 PM]] Snapshot metric -0.77800000 165 | [[06/22/2019 04:01:57 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.77800000_1200.pth... 166 | [[06/22/2019 04:02:02 PM]] ====================Epoch 7==================== 167 | [[06/22/2019 04:02:14 PM]] Step 1220: train 0.657953 lr: 2.023e-03 168 | [[06/22/2019 04:02:31 PM]] Step 1240: train 0.561891 lr: 1.938e-03 169 | [[06/22/2019 04:02:47 PM]] Step 1260: train 0.674270 lr: 1.854e-03 170 | [[06/22/2019 04:03:05 PM]] Step 1280: train 0.597797 lr: 1.771e-03 171 | [[06/22/2019 04:03:22 PM]] Step 1300: train 0.531784 lr: 1.688e-03 172 | [[06/22/2019 04:03:25 PM]] Criterion loss: 0.606873 173 | [[06/22/2019 04:03:25 PM]] accuracy: 82.00% 174 | [[06/22/2019 04:03:25 PM]] top_3_accuracy: 96.60% 175 | [[06/22/2019 04:03:25 PM]] Snapshot metric -0.82000000 176 | [[06/22/2019 04:03:25 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.82000000_1300.pth... 177 | [[06/22/2019 04:03:25 PM]] New low 178 | 179 | [[06/22/2019 04:03:42 PM]] Step 1320: train 0.530845 lr: 1.606e-03 180 | [[06/22/2019 04:03:59 PM]] Step 1340: train 0.559952 lr: 1.526e-03 181 | [[06/22/2019 04:04:15 PM]] Step 1360: train 0.560953 lr: 1.446e-03 182 | [[06/22/2019 04:04:32 PM]] Step 1380: train 0.549461 lr: 1.368e-03 183 | [[06/22/2019 04:04:48 PM]] Step 1400: train 0.553163 lr: 1.292e-03 184 | [[06/22/2019 04:04:51 PM]] Criterion loss: 0.493012 185 | [[06/22/2019 04:04:51 PM]] accuracy: 84.40% 186 | [[06/22/2019 04:04:51 PM]] top_3_accuracy: 96.20% 187 | [[06/22/2019 04:04:51 PM]] Snapshot metric -0.84400000 188 | [[06/22/2019 04:04:51 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.84400000_1400.pth... 189 | [[06/22/2019 04:04:51 PM]] New low 190 | 191 | [[06/22/2019 04:04:57 PM]] ====================Epoch 8==================== 192 | [[06/22/2019 04:05:09 PM]] Step 1420: train 0.526511 lr: 1.216e-03 193 | [[06/22/2019 04:05:25 PM]] Step 1440: train 0.520478 lr: 1.143e-03 194 | [[06/22/2019 04:05:42 PM]] Step 1460: train 0.455833 lr: 1.071e-03 195 | [[06/22/2019 04:05:58 PM]] Step 1480: train 0.480429 lr: 1.000e-03 196 | [[06/22/2019 04:06:15 PM]] Step 1500: train 0.500275 lr: 9.316e-04 197 | [[06/22/2019 04:06:17 PM]] Criterion loss: 0.519724 198 | [[06/22/2019 04:06:17 PM]] accuracy: 83.60% 199 | [[06/22/2019 04:06:17 PM]] top_3_accuracy: 94.80% 200 | [[06/22/2019 04:06:17 PM]] Snapshot metric -0.83600000 201 | [[06/22/2019 04:06:17 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.83600000_1500.pth... 202 | [[06/22/2019 04:06:35 PM]] Step 1520: train 0.468733 lr: 8.649e-04 203 | [[06/22/2019 04:06:53 PM]] Step 1540: train 0.469848 lr: 8.003e-04 204 | [[06/22/2019 04:07:11 PM]] Step 1560: train 0.507552 lr: 7.376e-04 205 | [[06/22/2019 04:07:28 PM]] Step 1580: train 0.484207 lr: 6.771e-04 206 | [[06/22/2019 04:07:45 PM]] Step 1600: train 0.435758 lr: 6.188e-04 207 | [[06/22/2019 04:07:48 PM]] Criterion loss: 0.423036 208 | [[06/22/2019 04:07:48 PM]] accuracy: 87.40% 209 | [[06/22/2019 04:07:48 PM]] top_3_accuracy: 96.40% 210 | [[06/22/2019 04:07:48 PM]] Snapshot metric -0.87400000 211 | [[06/22/2019 04:07:48 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.87400000_1600.pth... 212 | [[06/22/2019 04:07:48 PM]] New low 213 | 214 | [[06/22/2019 04:07:55 PM]] ====================Epoch 9==================== 215 | [[06/22/2019 04:08:05 PM]] Step 1620: train 0.390876 lr: 5.627e-04 216 | [[06/22/2019 04:08:22 PM]] Step 1640: train 0.457620 lr: 5.090e-04 217 | [[06/22/2019 04:08:39 PM]] Step 1660: train 0.399328 lr: 4.577e-04 218 | [[06/22/2019 04:08:56 PM]] Step 1680: train 0.383037 lr: 4.089e-04 219 | [[06/22/2019 04:09:13 PM]] Step 1700: train 0.409151 lr: 3.626e-04 220 | [[06/22/2019 04:09:16 PM]] Criterion loss: 0.404898 221 | [[06/22/2019 04:09:16 PM]] accuracy: 88.80% 222 | [[06/22/2019 04:09:16 PM]] top_3_accuracy: 96.40% 223 | [[06/22/2019 04:09:16 PM]] Snapshot metric -0.88800000 224 | [[06/22/2019 04:09:16 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.88800000_1700.pth... 225 | [[06/22/2019 04:09:16 PM]] New low 226 | 227 | [[06/22/2019 04:09:33 PM]] Step 1720: train 0.443100 lr: 3.188e-04 228 | [[06/22/2019 04:09:50 PM]] Step 1740: train 0.379389 lr: 2.777e-04 229 | [[06/22/2019 04:10:08 PM]] Step 1760: train 0.369363 lr: 2.393e-04 230 | [[06/22/2019 04:10:25 PM]] Step 1780: train 0.388807 lr: 2.036e-04 231 | [[06/22/2019 04:10:41 PM]] Step 1800: train 0.413061 lr: 1.707e-04 232 | [[06/22/2019 04:10:44 PM]] Criterion loss: 0.355131 233 | [[06/22/2019 04:10:44 PM]] accuracy: 89.80% 234 | [[06/22/2019 04:10:44 PM]] top_3_accuracy: 97.00% 235 | [[06/22/2019 04:10:44 PM]] Snapshot metric -0.89800000 236 | [[06/22/2019 04:10:44 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.89800000_1800.pth... 237 | [[06/22/2019 04:10:44 PM]] New low 238 | 239 | [[06/22/2019 04:10:52 PM]] ====================Epoch 10==================== 240 | [[06/22/2019 04:11:02 PM]] Step 1820: train 0.383528 lr: 1.405e-04 241 | [[06/22/2019 04:11:19 PM]] Step 1840: train 0.317547 lr: 1.133e-04 242 | [[06/22/2019 04:11:35 PM]] Step 1860: train 0.336927 lr: 8.888e-05 243 | [[06/22/2019 04:11:52 PM]] Step 1880: train 0.365633 lr: 6.739e-05 244 | [[06/22/2019 04:12:09 PM]] Step 1900: train 0.366723 lr: 4.883e-05 245 | [[06/22/2019 04:12:12 PM]] Criterion loss: 0.354443 246 | [[06/22/2019 04:12:12 PM]] accuracy: 89.80% 247 | [[06/22/2019 04:12:12 PM]] top_3_accuracy: 97.00% 248 | [[06/22/2019 04:12:12 PM]] Snapshot metric -0.89800000 249 | [[06/22/2019 04:12:12 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.89800000_1900.pth... 250 | [[06/22/2019 04:12:29 PM]] Step 1920: train 0.347152 lr: 3.324e-05 251 | [[06/22/2019 04:12:45 PM]] Step 1940: train 0.342237 lr: 2.062e-05 252 | [[06/22/2019 04:13:02 PM]] Step 1960: train 0.356638 lr: 1.099e-05 253 | [[06/22/2019 04:13:19 PM]] Step 1980: train 0.317537 lr: 4.357e-06 254 | [[06/22/2019 04:13:36 PM]] Step 2000: train 0.320815 lr: 7.389e-07 255 | [[06/22/2019 04:13:39 PM]] Criterion loss: 0.350640 256 | [[06/22/2019 04:13:39 PM]] accuracy: 89.60% 257 | [[06/22/2019 04:13:39 PM]] top_3_accuracy: 97.40% 258 | [[06/22/2019 04:13:39 PM]] Snapshot metric -0.89600000 259 | [[06/22/2019 04:13:39 PM]] Saving checkpoint data/cache/model_cache/snapshot_basebot_-0.89600000_2000.pth... 260 | -------------------------------------------------------------------------------- /imet/main.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import argparse 3 | from itertools import islice 4 | import json 5 | from pathlib import Path 6 | import warnings 7 | from typing import Dict, Callable, List 8 | from functools import partial 9 | from dataclasses import dataclass 10 | 11 | import numpy as np 12 | import pandas as pd 13 | from sklearn.metrics import fbeta_score, log_loss 14 | from sklearn.exceptions import UndefinedMetricWarning 15 | import torch 16 | from torch import nn, cuda 17 | from torch.utils.data import DataLoader 18 | from torch.optim.lr_scheduler import CosineAnnealingLR 19 | from tqdm import tqdm 20 | from helperbot import ( 21 | freeze_layers, TriangularLR, BaseBot, WeightDecayOptimizerWrapper, 22 | GradualWarmupScheduler, FBeta, LearningRateSchedulerCallback, 23 | MixUpCallback 24 | ) 25 | 26 | from .adabound import AdaBound 27 | from .models import get_seresnet_model, get_densenet_model, get_seresnet_partial_model 28 | from .dataset import TrainDataset, TestDataset, get_ids, N_CLASSES, DATA_ROOT 29 | from .transforms import get_train_transform, get_test_transform, cv2 30 | from .utils import ON_KAGGLE 31 | from .loss import FocalLoss 32 | 33 | CACHE_DIR = Path('/tmp/imet' if ON_KAGGLE else './data/cache/') 34 | CACHE_DIR.mkdir(exist_ok=True, parents=True) 35 | MODEL_DIR = Path('.' if ON_KAGGLE else './data/cache/') 36 | MODEL_DIR.mkdir(exist_ok=True, parents=True) 37 | 38 | 39 | def make_loader(args, ds_class, root, df: pd.DataFrame, image_transform, drop_last=False, shuffle=False) -> DataLoader: 40 | return DataLoader( 41 | ds_class(root, df, image_transform, debug=args.debug), 42 | shuffle=shuffle, 43 | batch_size=args.batch_size, 44 | num_workers=args.workers, 45 | drop_last=drop_last 46 | ) 47 | 48 | 49 | def opt_params(layer, learning_rate, final_lr): 50 | return {'params': layer.parameters(), 'lr': learning_rate, 'final_lr': final_lr} 51 | 52 | 53 | def setup_differential_learning_rates( 54 | optimizer_constructor: Callable[[List[Dict]], torch.optim.Optimizer], 55 | layer_groups: List[nn.Parameter], 56 | lrs: List[float], final_lrs: List[float]) -> torch.optim.Optimizer: 57 | assert len(layer_groups) == len( 58 | lrs), f'size mismatch, expected {len(layer_groups)} lrs, but got {len(lrs)}' 59 | optimizer = optimizer_constructor( 60 | [opt_params(*p) for p in zip(layer_groups, lrs, final_lrs)]) 61 | return optimizer 62 | 63 | 64 | @dataclass 65 | class ImageClassificationBot(BaseBot): 66 | checkpoint_dir: Path = CACHE_DIR / "model_cache/" 67 | log_dir: Path = MODEL_DIR / "logs/" 68 | 69 | def __post_init__(self): 70 | super().__post_init__() 71 | self.loss_format = "%.6f" 72 | self.metrics = (FBeta(step=0.05, beta=2, average="samples"),) 73 | self.monitor_metric = "fbeta" 74 | 75 | def extract_prediction(self, x): 76 | return x 77 | 78 | 79 | def train_stage_one(args, model, train_loader, valid_loader, criterion): 80 | optimizer = WeightDecayOptimizerWrapper( 81 | torch.optim.Adam(model.parameters(), lr=2e-3), 82 | 0.1 83 | ) 84 | freeze_layers(model, [True, True, False]) 85 | 86 | # stage 1 87 | n_steps = len(train_loader) // 2 88 | bot = ImageClassificationBot( 89 | model=model, train_loader=train_loader, 90 | val_loader=valid_loader, clip_grad=10., 91 | optimizer=optimizer, echo=not ON_KAGGLE, 92 | criterion=criterion, 93 | avg_window=len(train_loader) // 10, 94 | callbacks=[ 95 | LearningRateSchedulerCallback(TriangularLR( 96 | optimizer, 100, ratio=3, steps_per_cycle=n_steps)) 97 | ], 98 | pbar=not ON_KAGGLE, use_tensorboard=False 99 | ) 100 | bot.logger.info(bot.criterion) 101 | bot.train( 102 | n_steps, 103 | log_interval=len(train_loader) // 10, 104 | snapshot_interval=len(train_loader) // 4 105 | ) 106 | bot.load_model(bot.best_performers[0][1]) 107 | torch.save(bot.model.state_dict(), str( 108 | CACHE_DIR / f"stage1_{args.fold}.pth")) 109 | bot.remove_checkpoints(keep=0) 110 | 111 | 112 | def train_stage_two(args, model, train_loader, valid_loader, criterion): 113 | n_steps = len(train_loader) * args.epochs 114 | optimizer = WeightDecayOptimizerWrapper( 115 | setup_differential_learning_rates( 116 | partial( 117 | torch.optim.Adam, weight_decay=0 118 | # AdaBound, weight_decay=0, gamma=1/5000, betas=(.8, .999) 119 | # torch.optim.SGD, momentum=0.9 120 | ), model, [1e-5, 8e-5, 5e-4], [1., 1., 1.] 121 | ), weight_decay=5e-2, change_with_lr=True) 122 | freeze_layers(model, [False, False, False]) 123 | bot = ImageClassificationBot( 124 | model=model, train_loader=train_loader, 125 | val_loader=valid_loader, clip_grad=10., 126 | optimizer=optimizer, echo=not ON_KAGGLE, 127 | criterion=criterion, 128 | avg_window=len(train_loader) // 15, 129 | callbacks=[ 130 | LearningRateSchedulerCallback( 131 | TriangularLR( 132 | optimizer, 100, ratio=4, steps_per_cycle=n_steps 133 | ) 134 | # GradualWarmupScheduler( 135 | # optimizer, 100, len(train_loader), 136 | # after_scheduler=CosineAnnealingLR( 137 | # optimizer, n_steps - len(train_loader) 138 | # ) 139 | ), 140 | MixUpCallback(alpha=0.2) 141 | ], 142 | pbar=not ON_KAGGLE, use_tensorboard=not ON_KAGGLE 143 | ) 144 | bot.logger.info(bot.criterion) 145 | bot.model.load_state_dict(torch.load( 146 | CACHE_DIR / f"stage1_{args.fold}.pth")) 147 | 148 | # def snapshot_or_not(step): 149 | # if step < 4000: 150 | # if step % 2000 == 0: 151 | # return True 152 | # elif (step - 4000) % 1000 == 0: 153 | # return True 154 | # return False 155 | 156 | bot.train( 157 | n_steps, 158 | log_interval=len(train_loader) // 20, 159 | snapshot_interval=len(train_loader) // 2, 160 | # snapshot_interval=snapshot_or_not, 161 | early_stopping_cnt=args.early_stop, 162 | min_improv=1e-4, 163 | keep_n_snapshots=1 164 | ) 165 | bot.load_model(bot.best_performers[0][1]) 166 | bot.remove_checkpoints(keep=0) 167 | 168 | # Final model 169 | torch.save(bot.model, MODEL_DIR / f"final_{args.fold}.pth") 170 | # Failover (args + state dict) 171 | torch.save( 172 | [args.arch, bot.model.state_dict()], 173 | MODEL_DIR / f"failover_{args.arch}_{args.fold}.pth" 174 | ) 175 | 176 | 177 | def find_best_fbeta_threshold(truth, probs, beta=2, step=0.05): 178 | best, best_thres = 0, -1 179 | argsorted = probs.argsort(axis=1) 180 | with warnings.catch_warnings(): 181 | warnings.simplefilter('ignore', category=UndefinedMetricWarning) 182 | for thres in np.arange(step, .5, step): 183 | current = fbeta_score( 184 | truth, 185 | binarize_prediction( 186 | probs, thres, argsorted 187 | ).astype("int8"), 188 | beta=beta, average="samples") 189 | if current > best: 190 | best = current 191 | best_thres = thres 192 | return best, best_thres 193 | 194 | 195 | def print_eval(truth, preds): 196 | best_score, threshold = find_best_fbeta_threshold( 197 | truth, preds, beta=2, step=0.01 198 | ) 199 | print(f"f2: {best_score:.4f} @ threshold {threshold:.2f}") 200 | print(f"loss: {log_loss(truth, preds) / preds.shape[1]:.8f}") 201 | 202 | 203 | def eval_model(args, valid_loaders: List[DataLoader]): 204 | model_dir = MODEL_DIR / args.model 205 | model = torch.load(str(model_dir / f"final_{args.fold}.pth")) 206 | model = model.cuda() 207 | bot = ImageClassificationBot( 208 | model=model, train_loader=None, 209 | val_loader=None, optimizer=None, 210 | echo=not ON_KAGGLE, criterion=None, 211 | pbar=not ON_KAGGLE, avg_window=100 212 | ) 213 | tmp = [] 214 | for valid_loader in valid_loaders: 215 | preds, truth = bot.predict(valid_loader, return_y=True) 216 | preds = torch.sigmoid(preds) 217 | tmp.append(preds.numpy()) 218 | # print(np.mean(tmp, axis=0, keepdims=False).shape, preds.numpy().shape) 219 | final_preds = np.mean(tmp, axis=0, keepdims=False) 220 | print_eval( 221 | truth.numpy(), 222 | final_preds 223 | ) 224 | if args.min_samples > 0: 225 | final_preds = mask_predictions(args, final_preds) 226 | print_eval( 227 | truth.numpy(), 228 | final_preds 229 | ) 230 | 231 | 232 | def predict_model(args, df: pd.DataFrame, loaders: List[DataLoader], name: str): 233 | model_dir = MODEL_DIR / args.model 234 | model = torch.load(str(model_dir / f"final_{args.fold}.pth")) 235 | model = model.cuda() 236 | bot = ImageClassificationBot( 237 | model=model, train_loader=None, 238 | val_loader=None, optimizer=None, 239 | echo=not ON_KAGGLE, criterion=None, 240 | pbar=not ON_KAGGLE, avg_window=100 241 | ) 242 | tmp = [] 243 | model_dir = MODEL_DIR / args.model 244 | for loader in loaders: 245 | preds = bot.predict(loader, return_y=False) 246 | preds = torch.sigmoid(preds) 247 | tmp.append(preds.numpy()) 248 | final_preds = np.mean(tmp, axis=0, keepdims=False) 249 | # print(np.isnan(final_preds).sum()) 250 | df_preds = pd.DataFrame(final_preds, index=df["id"].values) 251 | df_preds.to_pickle(CACHE_DIR / f"preds_{name}_{args.fold}.pkl") 252 | 253 | 254 | def mask_predictions(args, preds): 255 | folds = pd.read_pickle(CACHE_DIR / 'folds.pkl') 256 | mask = folds.iloc[:, 1:-1].sum(axis=0).values < args.min_samples 257 | print(mask.shape, preds.shape) 258 | print(f"Masking {sum(mask)} labels...") 259 | preds[:, mask] = 0 260 | return preds 261 | 262 | 263 | def main(): 264 | parser = argparse.ArgumentParser() 265 | arg = parser.add_argument 266 | arg('mode', choices=['train', 'validate', 267 | 'predict_valid', 'predict_test']) 268 | arg('--batch-size', type=int, default=32) 269 | arg('--step', type=int, default=1) 270 | arg('--workers', type=int, default=2 if ON_KAGGLE else 4) 271 | arg('--tta', type=int, default=4) 272 | arg('--epochs', type=int, default=10) 273 | arg('--arch', type=str, default='seresnext50') 274 | arg('--min-samples', type=int, default=0) 275 | arg('--debug', action='store_true') 276 | arg('--limit', type=int) 277 | arg('--alpha', type=float, default=.5) 278 | arg('--gamma', type=float, default=.25) 279 | arg('--fold', type=int, default=0) 280 | arg('--model', type=str, default=".") 281 | arg('--early-stop', type=int, default=5) 282 | args = parser.parse_args() 283 | 284 | if args.mode in ("train", "validate", "predict_valid"): 285 | folds = pd.read_pickle(CACHE_DIR / 'folds.pkl') 286 | train_root = DATA_ROOT / 'train' 287 | train_fold = folds[folds['fold'] != args.fold] 288 | valid_fold = folds[folds['fold'] == args.fold] 289 | if args.limit: 290 | train_fold = train_fold[:args.limit] 291 | valid_fold = valid_fold[:args.limit] 292 | 293 | use_cuda = cuda.is_available() 294 | train_transform = get_train_transform(cv2.BORDER_REFLECT_101) 295 | test_transform = get_test_transform() 296 | if args.mode == 'train': 297 | if args.arch == 'seresnext50': 298 | model = get_seresnet_model( 299 | arch="se_resnext50_32x4d", 300 | n_classes=N_CLASSES, pretrained=True if args.mode == 'train' else False) 301 | elif args.arch == 'seresnext101': 302 | model = get_seresnet_model( 303 | arch="se_resnext101_32x4d", 304 | n_classes=N_CLASSES, pretrained=True if args.mode == 'train' else False) 305 | elif args.arch == 'seresnext50-partial': 306 | train_transform = get_train_transform(cv2.BORDER_CONSTANT) 307 | model = get_seresnet_partial_model( 308 | arch="se_resnext50_32x4d", 309 | n_classes=N_CLASSES, pretrained=True if args.mode == 'train' else False) 310 | elif args.arch.startswith("densenet"): 311 | model = get_densenet_model(arch=args.arch) 312 | # elif args.arch.startswith("efficientnet"): 313 | # model = get_efficientnet(arch=args.arch) 314 | else: 315 | raise ValueError("No such model") 316 | if use_cuda: 317 | model = model.cuda() 318 | # criterion = nn.BCEWithLogitsLoss() 319 | criterion = FocalLoss(gamma=args.gamma, alpha=args.alpha) 320 | (CACHE_DIR / 'params.json').write_text( 321 | json.dumps(vars(args), indent=4, sort_keys=True)) 322 | 323 | train_loader = make_loader( 324 | args, TrainDataset, train_root, train_fold, train_transform, drop_last=True, shuffle=True) 325 | valid_loader = make_loader( 326 | args, TrainDataset, train_root, valid_fold, test_transform, shuffle=False) 327 | 328 | print(f'{len(train_loader.dataset):,} items in train, ' 329 | f'{len(valid_loader.dataset):,} in valid') 330 | 331 | # Stage 1 332 | train_stage_one(args, model, train_loader, valid_loader, criterion) 333 | 334 | # Stage 2 335 | train_stage_two(args, model, train_loader, valid_loader, criterion) 336 | 337 | elif args.mode == 'validate': 338 | valid_loaders = [ 339 | make_loader( 340 | args, TrainDataset, train_root, 341 | valid_fold, get_test_transform(), shuffle=False, drop_last=False), 342 | make_loader( 343 | args, TrainDataset, train_root, 344 | valid_fold, get_test_transform(flip=True), shuffle=False, drop_last=False) 345 | ] 346 | eval_model(args, valid_loaders) 347 | elif args.mode.startswith('predict'): 348 | if args.mode == 'predict_valid': 349 | loaders = [ 350 | make_loader( 351 | args, TestDataset, train_root, 352 | valid_fold, get_test_transform(), shuffle=False, drop_last=False), 353 | make_loader( 354 | args, TestDataset, train_root, 355 | valid_fold, get_test_transform(flip=True), shuffle=False, drop_last=False) 356 | ] 357 | predict_model(args, valid_fold, loaders, "valid") 358 | elif args.mode == 'predict_test': 359 | test_root = DATA_ROOT / 'test' 360 | df_test = pd.read_csv(DATA_ROOT / 'sample_submission.csv') 361 | if args.limit: 362 | df_test = df_test[:args.limit] 363 | print(df_test.shape) 364 | loaders = [ 365 | make_loader( 366 | args, TestDataset, test_root, df_test, 367 | get_test_transform(), shuffle=False, drop_last=False), 368 | make_loader( 369 | args, TestDataset, test_root, df_test, 370 | get_test_transform(flip=True), shuffle=False, drop_last=False) 371 | ] 372 | predict_model(args, df_test, loaders, "test") 373 | 374 | 375 | def binarize_prediction(probabilities, threshold: float, argsorted=None, 376 | min_labels=1, max_labels=10): 377 | """ Return matrix of 0/1 predictions, same shape as probabilities. 378 | """ 379 | assert probabilities.shape[1] == N_CLASSES 380 | if argsorted is None: 381 | argsorted = probabilities.argsort(axis=1) 382 | max_mask = _make_mask(argsorted, max_labels) 383 | min_mask = _make_mask(argsorted, min_labels) 384 | prob_mask = probabilities > threshold 385 | return (max_mask & prob_mask) | min_mask 386 | 387 | 388 | def _make_mask(argsorted, top_n: int): 389 | mask = np.zeros_like(argsorted, dtype=np.uint8) 390 | col_indices = argsorted[:, -top_n:].reshape(-1) 391 | row_indices = [i // top_n for i in range(len(col_indices))] 392 | mask[row_indices, col_indices] = 1 393 | return mask 394 | 395 | 396 | if __name__ == '__main__': 397 | main() 398 | -------------------------------------------------------------------------------- /imet/seresnet_partial.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code adapted from 3 | https://raw.githubusercontent.com/Cadene/pretrained-models.pytorch/master/pretrainedmodels/models/senet.py 4 | """ 5 | from collections import OrderedDict 6 | import math 7 | 8 | import numpy as np 9 | import torch.nn as nn 10 | from torch.utils import model_zoo 11 | 12 | from .partialconv2d import PartialConv2d 13 | 14 | __all__ = ['SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', 15 | 'se_resnext50_32x4d', 'se_resnext101_32x4d'] 16 | 17 | pretrained_settings = { 18 | 'senet154': { 19 | 'imagenet': { 20 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth', 21 | 'input_space': 'RGB', 22 | 'input_size': [3, 224, 224], 23 | 'input_range': [0, 1], 24 | 'mean': [0.485, 0.456, 0.406], 25 | 'std': [0.229, 0.224, 0.225], 26 | 'num_classes': 1000 27 | } 28 | }, 29 | 'se_resnet50': { 30 | 'imagenet': { 31 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth', 32 | 'input_space': 'RGB', 33 | 'input_size': [3, 224, 224], 34 | 'input_range': [0, 1], 35 | 'mean': [0.485, 0.456, 0.406], 36 | 'std': [0.229, 0.224, 0.225], 37 | 'num_classes': 1000 38 | } 39 | }, 40 | 'se_resnet101': { 41 | 'imagenet': { 42 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth', 43 | 'input_space': 'RGB', 44 | 'input_size': [3, 224, 224], 45 | 'input_range': [0, 1], 46 | 'mean': [0.485, 0.456, 0.406], 47 | 'std': [0.229, 0.224, 0.225], 48 | 'num_classes': 1000 49 | } 50 | }, 51 | 'se_resnet152': { 52 | 'imagenet': { 53 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth', 54 | 'input_space': 'RGB', 55 | 'input_size': [3, 224, 224], 56 | 'input_range': [0, 1], 57 | 'mean': [0.485, 0.456, 0.406], 58 | 'std': [0.229, 0.224, 0.225], 59 | 'num_classes': 1000 60 | } 61 | }, 62 | 'se_resnext50_32x4d': { 63 | 'imagenet': { 64 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth', 65 | 'input_space': 'RGB', 66 | 'input_size': [3, 224, 224], 67 | 'input_range': [0, 1], 68 | 'mean': [0.485, 0.456, 0.406], 69 | 'std': [0.229, 0.224, 0.225], 70 | 'num_classes': 1000 71 | } 72 | }, 73 | 'se_resnext101_32x4d': { 74 | 'imagenet': { 75 | 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth', 76 | 'input_space': 'RGB', 77 | 'input_size': [3, 224, 224], 78 | 'input_range': [0, 1], 79 | 'mean': [0.485, 0.456, 0.406], 80 | 'std': [0.229, 0.224, 0.225], 81 | 'num_classes': 1000 82 | } 83 | }, 84 | } 85 | 86 | ZERO_TRANSFORMED = np.sum( 87 | np.array([0.485, 0.456, 0.406]) / np.array([0.229, 0.224, 0.225])) * -1 88 | 89 | 90 | class FirstPartialConv2d(PartialConv2d): 91 | def forward(self, input, mask_in=None): 92 | eps = 1e-4 93 | mask = (input.sum(dim=1, keepdim=True) > 94 | ZERO_TRANSFORMED + eps).float() 95 | # print("%.4f" % (mask.sum() / input.size(3) / input.size(2) / input.size(0))) 96 | return super().forward(input, mask_in=mask) 97 | 98 | 99 | class SEModule(nn.Module): 100 | 101 | def __init__(self, channels, reduction): 102 | super(SEModule, self).__init__() 103 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 104 | self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, 105 | padding=0) 106 | self.relu = nn.ReLU(inplace=True) 107 | self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, 108 | padding=0) 109 | self.sigmoid = nn.Sigmoid() 110 | 111 | def forward(self, x): 112 | module_input = x 113 | x = self.avg_pool(x) 114 | x = self.fc1(x) 115 | x = self.relu(x) 116 | x = self.fc2(x) 117 | x = self.sigmoid(x) 118 | return module_input * x 119 | 120 | 121 | class Bottleneck(nn.Module): 122 | """ 123 | Base class for bottlenecks that implements `forward()` method. 124 | """ 125 | 126 | def forward(self, x): 127 | residual = x 128 | 129 | out = self.conv1(x) 130 | out = self.bn1(out) 131 | out = self.relu(out) 132 | 133 | out = self.conv2(out) 134 | out = self.bn2(out) 135 | out = self.relu(out) 136 | 137 | out = self.conv3(out) 138 | out = self.bn3(out) 139 | 140 | if self.downsample is not None: 141 | residual = self.downsample(x) 142 | 143 | out = self.se_module(out) + residual 144 | out = self.relu(out) 145 | 146 | return out 147 | 148 | 149 | class SEBottleneck(Bottleneck): 150 | """ 151 | Bottleneck for SENet154. 152 | """ 153 | expansion = 4 154 | 155 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 156 | downsample=None): 157 | super(SEBottleneck, self).__init__() 158 | self.conv1 = nn.Conv2d( 159 | inplanes, planes * 2, kernel_size=1, bias=False) 160 | self.bn1 = nn.BatchNorm2d(planes * 2) 161 | self.conv2 = PartialConv2d(planes * 2, planes * 4, kernel_size=3, 162 | stride=stride, padding=1, groups=groups, 163 | bias=False) 164 | self.bn2 = nn.BatchNorm2d(planes * 4) 165 | self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1, 166 | bias=False) 167 | self.bn3 = nn.BatchNorm2d(planes * 4) 168 | self.relu = nn.ReLU(inplace=True) 169 | self.se_module = SEModule(planes * 4, reduction=reduction) 170 | self.downsample = downsample 171 | self.stride = stride 172 | 173 | 174 | class SEResNetBottleneck(Bottleneck): 175 | """ 176 | ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe 177 | implementation and uses `stride=stride` in `conv1` and not in `conv2` 178 | (the latter is used in the torchvision implementation of ResNet). 179 | """ 180 | expansion = 4 181 | 182 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 183 | downsample=None): 184 | super(SEResNetBottleneck, self).__init__() 185 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, 186 | stride=stride) 187 | self.bn1 = nn.BatchNorm2d(planes) 188 | self.conv2 = PartialConv2d(planes, planes, kernel_size=3, padding=1, 189 | groups=groups, bias=False) 190 | self.bn2 = nn.BatchNorm2d(planes) 191 | self.conv3 = nn.Conv2d( 192 | planes, planes * 4, kernel_size=1, bias=False) 193 | self.bn3 = nn.BatchNorm2d(planes * 4) 194 | self.relu = nn.ReLU(inplace=True) 195 | self.se_module = SEModule(planes * 4, reduction=reduction) 196 | self.downsample = downsample 197 | self.stride = stride 198 | 199 | 200 | class SEResNeXtBottleneck(Bottleneck): 201 | """ 202 | ResNeXt bottleneck type C with a Squeeze-and-Excitation module. 203 | """ 204 | expansion = 4 205 | 206 | def __init__(self, inplanes, planes, groups, reduction, stride=1, 207 | downsample=None, base_width=4): 208 | super(SEResNeXtBottleneck, self).__init__() 209 | width = math.floor(planes * (base_width / 64)) * groups 210 | self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, 211 | stride=1) 212 | self.bn1 = nn.BatchNorm2d(width) 213 | self.conv2 = PartialConv2d(width, width, kernel_size=3, stride=stride, 214 | padding=1, groups=groups, bias=False) 215 | self.bn2 = nn.BatchNorm2d(width) 216 | self.conv3 = nn.Conv2d( 217 | width, planes * 4, kernel_size=1, bias=False) 218 | self.bn3 = nn.BatchNorm2d(planes * 4) 219 | self.relu = nn.ReLU(inplace=True) 220 | self.se_module = SEModule(planes * 4, reduction=reduction) 221 | self.downsample = downsample 222 | self.stride = stride 223 | 224 | 225 | class SENet(nn.Module): 226 | 227 | def __init__(self, block, layers, groups, reduction, dropout_p=0.2, 228 | inplanes=128, input_3x3=True, downsample_kernel_size=3, 229 | downsample_padding=1, num_classes=1000): 230 | """ 231 | Parameters 232 | ---------- 233 | block (nn.Module): Bottleneck class. 234 | - For SENet154: SEBottleneck 235 | - For SE-ResNet models: SEResNetBottleneck 236 | - For SE-ResNeXt models: SEResNeXtBottleneck 237 | layers (list of ints): Number of residual blocks for 4 layers of the 238 | network (layer1...layer4). 239 | groups (int): Number of groups for the 3x3 convolution in each 240 | bottleneck block. 241 | - For SENet154: 64 242 | - For SE-ResNet models: 1 243 | - For SE-ResNeXt models: 32 244 | reduction (int): Reduction ratio for Squeeze-and-Excitation modules. 245 | - For all models: 16 246 | dropout_p (float or None): Drop probability for the Dropout layer. 247 | If `None` the Dropout layer is not used. 248 | - For SENet154: 0.2 249 | - For SE-ResNet models: None 250 | - For SE-ResNeXt models: None 251 | inplanes (int): Number of input channels for layer1. 252 | - For SENet154: 128 253 | - For SE-ResNet models: 64 254 | - For SE-ResNeXt models: 64 255 | input_3x3 (bool): If `True`, use three 3x3 convolutions instead of 256 | a single 7x7 convolution in layer0. 257 | - For SENet154: True 258 | - For SE-ResNet models: False 259 | - For SE-ResNeXt models: False 260 | downsample_kernel_size (int): Kernel size for downsampling convolutions 261 | in layer2, layer3 and layer4. 262 | - For SENet154: 3 263 | - For SE-ResNet models: 1 264 | - For SE-ResNeXt models: 1 265 | downsample_padding (int): Padding for downsampling convolutions in 266 | layer2, layer3 and layer4. 267 | - For SENet154: 1 268 | - For SE-ResNet models: 0 269 | - For SE-ResNeXt models: 0 270 | num_classes (int): Number of outputs in `last_linear` layer. 271 | - For all models: 1000 272 | """ 273 | super(SENet, self).__init__() 274 | self.inplanes = inplanes 275 | if input_3x3: 276 | layer0_modules = [ 277 | ('conv1', FirstPartialConv2d(3, 64, 3, stride=2, padding=1, 278 | bias=False)), 279 | ('bn1', nn.BatchNorm2d(64)), 280 | ('relu1', nn.ReLU(inplace=True)), 281 | ('conv2', PartialConv2d(64, 64, 3, stride=1, padding=1, 282 | bias=False)), 283 | ('bn2', nn.BatchNorm2d(64)), 284 | ('relu2', nn.ReLU(inplace=True)), 285 | ('conv3', PartialConv2d(64, inplanes, 3, stride=1, padding=1, 286 | bias=False)), 287 | ('bn3', nn.BatchNorm2d(inplanes)), 288 | ('relu3', nn.ReLU(inplace=True)), 289 | ] 290 | else: 291 | layer0_modules = [ 292 | ('conv1', FirstPartialConv2d(3, inplanes, kernel_size=7, stride=2, 293 | padding=3, bias=False)), 294 | ('bn1', nn.BatchNorm2d(inplanes)), 295 | ('relu1', nn.ReLU(inplace=True)), 296 | ] 297 | # To preserve compatibility with Caffe weights `ceil_mode=True` 298 | # is used instead of `padding=1`. 299 | layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, 300 | ceil_mode=True))) 301 | self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) 302 | self.layer1 = self._make_layer( 303 | block, 304 | planes=64, 305 | blocks=layers[0], 306 | groups=groups, 307 | reduction=reduction, 308 | downsample_kernel_size=1, 309 | downsample_padding=0 310 | ) 311 | self.layer2 = self._make_layer( 312 | block, 313 | planes=128, 314 | blocks=layers[1], 315 | stride=2, 316 | groups=groups, 317 | reduction=reduction, 318 | downsample_kernel_size=downsample_kernel_size, 319 | downsample_padding=downsample_padding 320 | ) 321 | self.layer3 = self._make_layer( 322 | block, 323 | planes=256, 324 | blocks=layers[2], 325 | stride=2, 326 | groups=groups, 327 | reduction=reduction, 328 | downsample_kernel_size=downsample_kernel_size, 329 | downsample_padding=downsample_padding 330 | ) 331 | self.layer4 = self._make_layer( 332 | block, 333 | planes=512, 334 | blocks=layers[3], 335 | stride=2, 336 | groups=groups, 337 | reduction=reduction, 338 | downsample_kernel_size=downsample_kernel_size, 339 | downsample_padding=downsample_padding 340 | ) 341 | self.avg_pool = nn.AvgPool2d(7, stride=1) 342 | self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None 343 | self.last_linear = nn.Linear(512 * block.expansion, num_classes) 344 | 345 | def _make_layer(self, block, planes, blocks, groups, reduction, stride=1, 346 | downsample_kernel_size=1, downsample_padding=0): 347 | downsample = None 348 | if stride != 1 or self.inplanes != planes * block.expansion: 349 | downsample = nn.Sequential( 350 | PartialConv2d(self.inplanes, planes * block.expansion, 351 | kernel_size=downsample_kernel_size, stride=stride, 352 | padding=downsample_padding, bias=False), 353 | nn.BatchNorm2d(planes * block.expansion), 354 | ) 355 | 356 | layers = [] 357 | layers.append(block(self.inplanes, planes, groups, reduction, stride, 358 | downsample)) 359 | self.inplanes = planes * block.expansion 360 | for i in range(1, blocks): 361 | layers.append(block(self.inplanes, planes, groups, reduction)) 362 | 363 | return nn.Sequential(*layers) 364 | 365 | def features(self, x): 366 | x = self.layer0(x) 367 | x = self.layer1(x) 368 | x = self.layer2(x) 369 | x = self.layer3(x) 370 | x = self.layer4(x) 371 | return x 372 | 373 | def logits(self, x): 374 | x = self.avg_pool(x) 375 | if self.dropout is not None: 376 | x = self.dropout(x) 377 | x = x.view(x.size(0), -1) 378 | x = self.last_linear(x) 379 | return x 380 | 381 | def forward(self, x): 382 | x = self.features(x) 383 | x = self.logits(x) 384 | return x 385 | 386 | 387 | def initialize_pretrained_model(model, num_classes, settings): 388 | assert num_classes == settings['num_classes'], \ 389 | 'num_classes should be {}, but is {}'.format( 390 | settings['num_classes'], num_classes) 391 | model.load_state_dict(model_zoo.load_url(settings['url'])) 392 | model.input_space = settings['input_space'] 393 | model.input_size = settings['input_size'] 394 | model.input_range = settings['input_range'] 395 | model.mean = settings['mean'] 396 | model.std = settings['std'] 397 | 398 | 399 | def senet154(num_classes=1000, pretrained='imagenet'): 400 | model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16, 401 | dropout_p=0.2, num_classes=num_classes) 402 | if pretrained is not None: 403 | settings = pretrained_settings['senet154'][pretrained] 404 | initialize_pretrained_model(model, num_classes, settings) 405 | return model 406 | 407 | 408 | def se_resnet50(num_classes=1000, pretrained='imagenet'): 409 | model = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16, 410 | dropout_p=None, inplanes=64, input_3x3=False, 411 | downsample_kernel_size=1, downsample_padding=0, 412 | num_classes=num_classes) 413 | if pretrained is not None: 414 | settings = pretrained_settings['se_resnet50'][pretrained] 415 | initialize_pretrained_model(model, num_classes, settings) 416 | return model 417 | 418 | 419 | def se_resnet101(num_classes=1000, pretrained='imagenet'): 420 | model = SENet(SEResNetBottleneck, [3, 4, 23, 3], groups=1, reduction=16, 421 | dropout_p=None, inplanes=64, input_3x3=False, 422 | downsample_kernel_size=1, downsample_padding=0, 423 | num_classes=num_classes) 424 | if pretrained is not None: 425 | settings = pretrained_settings['se_resnet101'][pretrained] 426 | initialize_pretrained_model(model, num_classes, settings) 427 | return model 428 | 429 | 430 | def se_resnet152(num_classes=1000, pretrained='imagenet'): 431 | model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16, 432 | dropout_p=None, inplanes=64, input_3x3=False, 433 | downsample_kernel_size=1, downsample_padding=0, 434 | num_classes=num_classes) 435 | if pretrained is not None: 436 | settings = pretrained_settings['se_resnet152'][pretrained] 437 | initialize_pretrained_model(model, num_classes, settings) 438 | return model 439 | 440 | 441 | def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'): 442 | model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16, 443 | dropout_p=None, inplanes=64, input_3x3=False, 444 | downsample_kernel_size=1, downsample_padding=0, 445 | num_classes=num_classes) 446 | if pretrained is not None: 447 | settings = pretrained_settings['se_resnext50_32x4d'][pretrained] 448 | initialize_pretrained_model(model, num_classes, settings) 449 | return model 450 | 451 | 452 | def se_resnext101_32x4d(num_classes=1000, pretrained='imagenet'): 453 | model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16, 454 | dropout_p=None, inplanes=64, input_3x3=False, 455 | downsample_kernel_size=1, downsample_padding=0, 456 | num_classes=num_classes) 457 | if pretrained is not None: 458 | settings = pretrained_settings['se_resnext101_32x4d'][pretrained] 459 | initialize_pretrained_model(model, num_classes, settings) 460 | return model 461 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code. 6 | extension-pkg-whitelist=torch,numpy 7 | 8 | # Add files or directories to the blacklist. They should be base names, not 9 | # paths. 10 | ignore=CVS 11 | 12 | # Add files or directories matching the regex patterns to the blacklist. The 13 | # regex matches against base names, not paths. 14 | ignore-patterns= 15 | 16 | # Python code to execute, usually for sys.path manipulation such as 17 | # pygtk.require(). 18 | #init-hook= 19 | 20 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 21 | # number of processors available to use. 22 | jobs=1 23 | 24 | # Control the amount of potential inferred values when inferring a single 25 | # object. This can help the performance when dealing with large functions or 26 | # complex, nested conditions. 27 | limit-inference-results=100 28 | 29 | # List of plugins (as comma separated values of python modules names) to load, 30 | # usually to register additional checkers. 31 | load-plugins= 32 | 33 | # Pickle collected data for later comparisons. 34 | persistent=yes 35 | 36 | # Specify a configuration file. 37 | #rcfile= 38 | 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit 40 | # user-friendly hints instead of false-positive error messages. 41 | suggestion-mode=yes 42 | 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the 44 | # active Python interpreter and may run arbitrary code. 45 | unsafe-load-any-extension=no 46 | 47 | 48 | [MESSAGES CONTROL] 49 | 50 | # Only show warnings with the listed confidence levels. Leave empty to show 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 52 | confidence= 53 | 54 | # Disable the message, report, category or checker with the given id(s). You 55 | # can either give multiple identifiers separated by comma (,) or put this 56 | # option multiple times (only on the command line, not in the configuration 57 | # file where it should appear only once). You can also use "--disable=all" to 58 | # disable everything first and then reenable specific checks. For example, if 59 | # you want to run only the similarities checker, you can use "--disable=all 60 | # --enable=similarities". If you want to run only the classes checker, but have 61 | # no Warning level messages displayed, use "--disable=all --enable=classes 62 | # --disable=W". 63 | disable=print-statement, 64 | parameter-unpacking, 65 | unpacking-in-except, 66 | old-raise-syntax, 67 | backtick, 68 | long-suffix, 69 | old-ne-operator, 70 | old-octal-literal, 71 | import-star-module-level, 72 | non-ascii-bytes-literal, 73 | raw-checker-failed, 74 | bad-inline-option, 75 | locally-disabled, 76 | locally-enabled, 77 | file-ignored, 78 | suppressed-message, 79 | useless-suppression, 80 | deprecated-pragma, 81 | use-symbolic-message-instead, 82 | apply-builtin, 83 | basestring-builtin, 84 | buffer-builtin, 85 | cmp-builtin, 86 | coerce-builtin, 87 | execfile-builtin, 88 | file-builtin, 89 | long-builtin, 90 | raw_input-builtin, 91 | reduce-builtin, 92 | standarderror-builtin, 93 | unicode-builtin, 94 | xrange-builtin, 95 | coerce-method, 96 | delslice-method, 97 | getslice-method, 98 | setslice-method, 99 | no-absolute-import, 100 | old-division, 101 | dict-iter-method, 102 | dict-view-method, 103 | next-method-called, 104 | metaclass-assignment, 105 | indexing-exception, 106 | raising-string, 107 | reload-builtin, 108 | oct-method, 109 | hex-method, 110 | nonzero-method, 111 | cmp-method, 112 | input-builtin, 113 | round-builtin, 114 | intern-builtin, 115 | unichr-builtin, 116 | map-builtin-not-iterating, 117 | zip-builtin-not-iterating, 118 | range-builtin-not-iterating, 119 | filter-builtin-not-iterating, 120 | using-cmp-argument, 121 | eq-without-hash, 122 | div-method, 123 | idiv-method, 124 | rdiv-method, 125 | exception-message-attribute, 126 | invalid-str-codec, 127 | sys-max-int, 128 | bad-python3-import, 129 | deprecated-string-function, 130 | deprecated-str-translate-call, 131 | deprecated-itertools-function, 132 | deprecated-types-field, 133 | next-method-defined, 134 | dict-items-not-iterating, 135 | dict-keys-not-iterating, 136 | dict-values-not-iterating, 137 | deprecated-operator-function, 138 | deprecated-urllib-function, 139 | xreadlines-attribute, 140 | deprecated-sys-function, 141 | exception-escape, 142 | comprehension-escape, 143 | missing-docstring 144 | 145 | # Enable the message, report, category or checker with the given id(s). You can 146 | # either give multiple identifier separated by comma (,) or put this option 147 | # multiple time (only on the command line, not in the configuration file where 148 | # it should appear only once). See also the "--disable" option for examples. 149 | enable=c-extension-no-member 150 | 151 | 152 | [REPORTS] 153 | 154 | # Python expression which should return a note less than 10 (10 is the highest 155 | # note). You have access to the variables errors warning, statement which 156 | # respectively contain the number of errors / warnings messages and the total 157 | # number of statements analyzed. This is used by the global evaluation report 158 | # (RP0004). 159 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 160 | 161 | # Template used to display messages. This is a python new-style format string 162 | # used to format the message information. See doc for all details. 163 | #msg-template= 164 | 165 | # Set the output format. Available formats are text, parseable, colorized, json 166 | # and msvs (visual studio). You can also give a reporter class, e.g. 167 | # mypackage.mymodule.MyReporterClass. 168 | output-format=text 169 | 170 | # Tells whether to display a full report or only the messages. 171 | reports=no 172 | 173 | # Activate the evaluation score. 174 | score=yes 175 | 176 | 177 | [REFACTORING] 178 | 179 | # Maximum number of nested blocks for function / method body 180 | max-nested-blocks=5 181 | 182 | # Complete name of functions that never returns. When checking for 183 | # inconsistent-return-statements if a never returning function is called then 184 | # it will be considered as an explicit return statement and no message will be 185 | # printed. 186 | never-returning-functions=sys.exit 187 | 188 | 189 | [FORMAT] 190 | 191 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 192 | expected-line-ending-format= 193 | 194 | # Regexp for a line that is allowed to be longer than the limit. 195 | ignore-long-lines=^\s*(# )??$ 196 | 197 | # Number of spaces of indent required inside a hanging or continued line. 198 | indent-after-paren=4 199 | 200 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 201 | # tab). 202 | indent-string=' ' 203 | 204 | # Maximum number of characters on a single line. 205 | max-line-length=100 206 | 207 | # Maximum number of lines in a module. 208 | max-module-lines=1000 209 | 210 | # List of optional constructs for which whitespace checking is disabled. `dict- 211 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 212 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 213 | # `empty-line` allows space-only lines. 214 | no-space-check=trailing-comma, 215 | dict-separator 216 | 217 | # Allow the body of a class to be on the same line as the declaration if body 218 | # contains single statement. 219 | single-line-class-stmt=no 220 | 221 | # Allow the body of an if to be on the same line as the test if there is no 222 | # else. 223 | single-line-if-stmt=no 224 | 225 | 226 | [LOGGING] 227 | 228 | # Logging modules to check that the string format arguments are in logging 229 | # function parameter format. 230 | logging-modules=logging 231 | 232 | 233 | [VARIABLES] 234 | 235 | # List of additional names supposed to be defined in builtins. Remember that 236 | # you should avoid to define new builtins when possible. 237 | additional-builtins= 238 | 239 | # Tells whether unused global variables should be treated as a violation. 240 | allow-global-unused-variables=yes 241 | 242 | # List of strings which can identify a callback function by name. A callback 243 | # name must start or end with one of those strings. 244 | callbacks=cb_, 245 | _cb 246 | 247 | # A regular expression matching the name of dummy variables (i.e. expected to 248 | # not be used). 249 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 250 | 251 | # Argument names that match this expression will be ignored. Default to name 252 | # with leading underscore. 253 | ignored-argument-names=_.*|^ignored_|^unused_ 254 | 255 | # Tells whether we should check for unused import in __init__ files. 256 | init-import=no 257 | 258 | # List of qualified module names which can have objects that can redefine 259 | # builtins. 260 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 261 | 262 | 263 | [MISCELLANEOUS] 264 | 265 | # List of note tags to take in consideration, separated by a comma. 266 | notes=FIXME, 267 | XXX, 268 | TODO 269 | 270 | 271 | [TYPECHECK] 272 | 273 | # List of decorators that produce context managers, such as 274 | # contextlib.contextmanager. Add to this list to register other decorators that 275 | # produce valid context managers. 276 | contextmanager-decorators=contextlib.contextmanager 277 | 278 | # List of members which are set dynamically and missed by pylint inference 279 | # system, and so shouldn't trigger E1101 when accessed. Python regular 280 | # expressions are accepted. 281 | generated-members=numpy.*,torch.*,tensorflow.* 282 | 283 | # Tells whether missing members accessed in mixin class should be ignored. A 284 | # mixin class is detected if its name ends with "mixin" (case insensitive). 285 | ignore-mixin-members=yes 286 | 287 | # Tells whether to warn about missing members when the owner of the attribute 288 | # is inferred to be None. 289 | ignore-none=yes 290 | 291 | # This flag controls whether pylint should warn about no-member and similar 292 | # checks whenever an opaque object is returned when inferring. The inference 293 | # can return multiple potential results while evaluating a Python object, but 294 | # some branches might not be evaluated, which results in partial inference. In 295 | # that case, it might be useful to still emit no-member and other checks for 296 | # the rest of the inferred objects. 297 | ignore-on-opaque-inference=yes 298 | 299 | # List of class names for which member attributes should not be checked (useful 300 | # for classes with dynamically set attributes). This supports the use of 301 | # qualified names. 302 | ignored-classes=optparse.Values,thread._local,_thread._local 303 | 304 | # List of module names for which member attributes should not be checked 305 | # (useful for modules/projects where namespaces are manipulated during runtime 306 | # and thus existing member attributes cannot be deduced by static analysis. It 307 | # supports qualified module names, as well as Unix pattern matching. 308 | ignored-modules= 309 | 310 | # Show a hint with possible names when a member name was not found. The aspect 311 | # of finding the hint is based on edit distance. 312 | missing-member-hint=yes 313 | 314 | # The minimum edit distance a name should have in order to be considered a 315 | # similar match for a missing member name. 316 | missing-member-hint-distance=1 317 | 318 | # The total number of similar names that should be taken in consideration when 319 | # showing a hint for a missing member. 320 | missing-member-max-choices=1 321 | 322 | 323 | [SPELLING] 324 | 325 | # Limits count of emitted suggestions for spelling mistakes. 326 | max-spelling-suggestions=4 327 | 328 | # Spelling dictionary name. Available dictionaries: none. To make it working 329 | # install python-enchant package.. 330 | spelling-dict= 331 | 332 | # List of comma separated words that should not be checked. 333 | spelling-ignore-words= 334 | 335 | # A path to a file that contains private dictionary; one word per line. 336 | spelling-private-dict-file= 337 | 338 | # Tells whether to store unknown words to indicated private dictionary in 339 | # --spelling-private-dict-file option instead of raising a message. 340 | spelling-store-unknown-words=no 341 | 342 | 343 | [BASIC] 344 | 345 | # Naming style matching correct argument names. 346 | argument-naming-style=snake_case 347 | 348 | # Regular expression matching correct argument names. Overrides argument- 349 | # naming-style. 350 | #argument-rgx= 351 | 352 | # Naming style matching correct attribute names. 353 | attr-naming-style=snake_case 354 | 355 | # Regular expression matching correct attribute names. Overrides attr-naming- 356 | # style. 357 | #attr-rgx= 358 | 359 | # Bad variable names which should always be refused, separated by a comma. 360 | bad-names=foo, 361 | bar, 362 | baz, 363 | toto, 364 | tutu, 365 | tata 366 | 367 | # Naming style matching correct class attribute names. 368 | class-attribute-naming-style=any 369 | 370 | # Regular expression matching correct class attribute names. Overrides class- 371 | # attribute-naming-style. 372 | #class-attribute-rgx= 373 | 374 | # Naming style matching correct class names. 375 | class-naming-style=PascalCase 376 | 377 | # Regular expression matching correct class names. Overrides class-naming- 378 | # style. 379 | #class-rgx= 380 | 381 | # Naming style matching correct constant names. 382 | const-naming-style=UPPER_CASE 383 | 384 | # Regular expression matching correct constant names. Overrides const-naming- 385 | # style. 386 | #const-rgx= 387 | 388 | # Minimum line length for functions/classes that require docstrings, shorter 389 | # ones are exempt. 390 | docstring-min-length=-1 391 | 392 | # Naming style matching correct function names. 393 | function-naming-style=snake_case 394 | 395 | # Regular expression matching correct function names. Overrides function- 396 | # naming-style. 397 | #function-rgx= 398 | 399 | # Good variable names which should always be accepted, separated by a comma. 400 | good-names=i, 401 | j, 402 | k, 403 | ex, 404 | Run, 405 | _ 406 | 407 | # Include a hint for the correct naming format with invalid-name. 408 | include-naming-hint=no 409 | 410 | # Naming style matching correct inline iteration names. 411 | inlinevar-naming-style=any 412 | 413 | # Regular expression matching correct inline iteration names. Overrides 414 | # inlinevar-naming-style. 415 | #inlinevar-rgx= 416 | 417 | # Naming style matching correct method names. 418 | method-naming-style=snake_case 419 | 420 | # Regular expression matching correct method names. Overrides method-naming- 421 | # style. 422 | #method-rgx= 423 | 424 | # Naming style matching correct module names. 425 | module-naming-style=snake_case 426 | 427 | # Regular expression matching correct module names. Overrides module-naming- 428 | # style. 429 | #module-rgx= 430 | 431 | # Colon-delimited sets of names that determine each other's naming style when 432 | # the name regexes allow several styles. 433 | name-group= 434 | 435 | # Regular expression which should only match function or class names that do 436 | # not require a docstring. 437 | no-docstring-rgx=^_ 438 | 439 | # List of decorators that produce properties, such as abc.abstractproperty. Add 440 | # to this list to register other decorators that produce valid properties. 441 | # These decorators are taken in consideration only for invalid-name. 442 | property-classes=abc.abstractproperty 443 | 444 | # Naming style matching correct variable names. 445 | variable-naming-style=snake_case 446 | 447 | # Regular expression matching correct variable names. Overrides variable- 448 | # naming-style. 449 | #variable-rgx= 450 | 451 | 452 | [SIMILARITIES] 453 | 454 | # Ignore comments when computing similarities. 455 | ignore-comments=yes 456 | 457 | # Ignore docstrings when computing similarities. 458 | ignore-docstrings=yes 459 | 460 | # Ignore imports when computing similarities. 461 | ignore-imports=no 462 | 463 | # Minimum lines number of a similarity. 464 | min-similarity-lines=4 465 | 466 | 467 | [CLASSES] 468 | 469 | # List of method names used to declare (i.e. assign) instance attributes. 470 | defining-attr-methods=__init__, 471 | __new__, 472 | setUp 473 | 474 | # List of member names, which should be excluded from the protected access 475 | # warning. 476 | exclude-protected=_asdict, 477 | _fields, 478 | _replace, 479 | _source, 480 | _make 481 | 482 | # List of valid names for the first argument in a class method. 483 | valid-classmethod-first-arg=cls 484 | 485 | # List of valid names for the first argument in a metaclass class method. 486 | valid-metaclass-classmethod-first-arg=cls 487 | 488 | 489 | [DESIGN] 490 | 491 | # Maximum number of arguments for function / method. 492 | max-args=5 493 | 494 | # Maximum number of attributes for a class (see R0902). 495 | max-attributes=7 496 | 497 | # Maximum number of boolean expressions in an if statement. 498 | max-bool-expr=5 499 | 500 | # Maximum number of branch for function / method body. 501 | max-branches=12 502 | 503 | # Maximum number of locals for function / method body. 504 | max-locals=15 505 | 506 | # Maximum number of parents for a class (see R0901). 507 | max-parents=7 508 | 509 | # Maximum number of public methods for a class (see R0904). 510 | max-public-methods=20 511 | 512 | # Maximum number of return / yield for function / method body. 513 | max-returns=6 514 | 515 | # Maximum number of statements in function / method body. 516 | max-statements=50 517 | 518 | # Minimum number of public methods for a class (see R0903). 519 | min-public-methods=2 520 | 521 | 522 | [IMPORTS] 523 | 524 | # Allow wildcard imports from modules that define __all__. 525 | allow-wildcard-with-all=no 526 | 527 | # Analyse import fallback blocks. This can be used to support both Python 2 and 528 | # 3 compatible code, which means that the block might have code that exists 529 | # only in one or another interpreter, leading to false positives when analysed. 530 | analyse-fallback-blocks=no 531 | 532 | # Deprecated modules which should not be used, separated by a comma. 533 | deprecated-modules=optparse,tkinter.tix 534 | 535 | # Create a graph of external dependencies in the given file (report RP0402 must 536 | # not be disabled). 537 | ext-import-graph= 538 | 539 | # Create a graph of every (i.e. internal and external) dependencies in the 540 | # given file (report RP0402 must not be disabled). 541 | import-graph= 542 | 543 | # Create a graph of internal dependencies in the given file (report RP0402 must 544 | # not be disabled). 545 | int-import-graph= 546 | 547 | # Force import order to recognize a module as part of the standard 548 | # compatibility libraries. 549 | known-standard-library= 550 | 551 | # Force import order to recognize a module as part of a third party library. 552 | known-third-party=enchant 553 | 554 | 555 | [EXCEPTIONS] 556 | 557 | # Exceptions that will emit a warning when being caught. Defaults to 558 | # "Exception". 559 | overgeneral-exceptions=Exception 560 | --------------------------------------------------------------------------------