├── timm ├── version.py ├── data │ ├── parsers │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── parser.py │ │ ├── class_map.py │ │ ├── parser_factory.py │ │ ├── parser_image_folder.py │ │ └── parser_image_tar.py │ ├── constants.py │ ├── __init__.py │ ├── real_labels.py │ └── config.py ├── utils │ ├── random.py │ ├── __init__.py │ ├── misc.py │ ├── clip_grad.py │ ├── distributed.py │ ├── metrics.py │ ├── log.py │ ├── summary.py │ ├── agc.py │ ├── cuda.py │ └── jit.py ├── loss │ ├── __init__.py │ ├── cross_entropy.py │ ├── jsd.py │ ├── binary_cross_entropy.py │ └── asymmetric_loss.py ├── __init__.py ├── scheduler │ ├── __init__.py │ ├── step_lr.py │ ├── multistep_lr.py │ ├── scheduler_factory.py │ ├── tanh_lr.py │ ├── poly_lr.py │ ├── plateau_lr.py │ └── cosine_lr.py ├── models │ ├── layers │ │ ├── trace_utils.py │ │ ├── linear.py │ │ ├── helpers.py │ │ ├── norm.py │ │ ├── conv_bn_act.py │ │ ├── create_conv2d.py │ │ ├── patch_embed.py │ │ ├── conv2d_same.py │ │ ├── blur_pool.py │ │ ├── median_pool.py │ │ ├── space_to_depth.py │ │ ├── mixed_conv2d.py │ │ ├── test_time_pool.py │ │ ├── __init__.py │ │ ├── padding.py │ │ ├── classifier.py │ │ ├── global_context.py │ │ ├── separable_conv.py │ │ ├── activations_jit.py │ │ ├── squeeze_excite.py │ │ ├── pool2d_same.py │ │ ├── split_attn.py │ │ ├── config.py │ │ ├── weight_init.py │ │ ├── inplace_abn.py │ │ ├── split_batchnorm.py │ │ ├── create_norm_act.py │ │ ├── evo_norm.py │ │ ├── norm_act.py │ │ ├── create_attn.py │ │ ├── gather_excite.py │ │ ├── adaptive_avgmax_pool.py │ │ ├── mlp.py │ │ ├── activations.py │ │ └── cbam.py │ ├── __init__.py │ ├── fx_features.py │ ├── factory.py │ ├── convmixer.py │ └── pruned │ │ └── ecaresnet50d_pruned.txt └── optim │ ├── __init__.py │ ├── sgdp.py │ ├── lookahead.py │ ├── radam.py │ ├── adamp.py │ └── nadam.py ├── image ├── GT.png ├── I01.png ├── ball.jpg ├── bird.jpg ├── dis1.png ├── dis2.png ├── dis3.png ├── dis4.png ├── dog.jpg ├── kunkun.png ├── people.jpg ├── I01_01_02.png ├── I01_01_05.png ├── I01_06_05.png ├── I01_15_05.png ├── pipeline.png └── results.png ├── .gitignore ├── requirements.txt ├── config.py ├── data ├── PIPAL22 │ ├── pipal22_test.py │ └── pipal.py ├── kadid10k │ └── kadid10k.py └── koniq10k │ └── koniq10k.py ├── inference.py ├── utils └── inference_process.py └── predict_one_image.py /timm/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.5.5' 2 | -------------------------------------------------------------------------------- /image/GT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/GT.png -------------------------------------------------------------------------------- /image/I01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/I01.png -------------------------------------------------------------------------------- /image/ball.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/ball.jpg -------------------------------------------------------------------------------- /image/bird.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/bird.jpg -------------------------------------------------------------------------------- /image/dis1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/dis1.png -------------------------------------------------------------------------------- /image/dis2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/dis2.png -------------------------------------------------------------------------------- /image/dis3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/dis3.png -------------------------------------------------------------------------------- /image/dis4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/dis4.png -------------------------------------------------------------------------------- /image/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/dog.jpg -------------------------------------------------------------------------------- /timm/data/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from .parser_factory import create_parser 2 | -------------------------------------------------------------------------------- /timm/data/parsers/constants.py: -------------------------------------------------------------------------------- 1 | IMG_EXTENSIONS = ('.png', '.jpg', '.jpeg') 2 | -------------------------------------------------------------------------------- /image/kunkun.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/kunkun.png -------------------------------------------------------------------------------- /image/people.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/people.jpg -------------------------------------------------------------------------------- /image/I01_01_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/I01_01_02.png -------------------------------------------------------------------------------- /image/I01_01_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/I01_01_05.png -------------------------------------------------------------------------------- /image/I01_06_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/I01_06_05.png -------------------------------------------------------------------------------- /image/I01_15_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/I01_15_05.png -------------------------------------------------------------------------------- /image/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/pipeline.png -------------------------------------------------------------------------------- /image/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IIGROUP/MANIQA/HEAD/image/results.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | 3 | Dis/ 4 | 5 | *.zip 6 | *.pt 7 | 8 | output/ 9 | 10 | test_images/ -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | opencv-python 3 | torch 4 | torchvision 5 | torchsummary 6 | tensorboardX 7 | logging 8 | tqdm 9 | json 10 | einops 11 | pandas -------------------------------------------------------------------------------- /timm/utils/random.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | 5 | 6 | def random_seed(seed=42, rank=0): 7 | torch.manual_seed(seed + rank) 8 | np.random.seed(seed + rank) 9 | random.seed(seed + rank) 10 | -------------------------------------------------------------------------------- /timm/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .asymmetric_loss import AsymmetricLossMultiLabel, AsymmetricLossSingleLabel 2 | from .binary_cross_entropy import BinaryCrossEntropy 3 | from .cross_entropy import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy 4 | from .jsd import JsdCrossEntropy 5 | -------------------------------------------------------------------------------- /timm/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__ 2 | from .models import create_model, list_models, is_model, list_modules, model_entrypoint, \ 3 | is_scriptable, is_exportable, set_scriptable, set_exportable, has_model_default_key, is_model_default_key, \ 4 | get_model_default_value, is_model_pretrained 5 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | """ configuration json """ 4 | class Config(dict): 5 | __getattr__ = dict.__getitem__ 6 | __setattr__ = dict.__setitem__ 7 | 8 | @classmethod 9 | def load(cls, file): 10 | with open(file, 'r') as f: 11 | config = json.loads(f.read()) 12 | return Config(config) -------------------------------------------------------------------------------- /timm/data/constants.py: -------------------------------------------------------------------------------- 1 | DEFAULT_CROP_PCT = 0.875 2 | IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406) 3 | IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225) 4 | IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5) 5 | IMAGENET_INCEPTION_STD = (0.5, 0.5, 0.5) 6 | IMAGENET_DPN_MEAN = (124 / 255, 117 / 255, 104 / 255) 7 | IMAGENET_DPN_STD = tuple([1 / (.0167 * 255)] * 3) 8 | -------------------------------------------------------------------------------- /timm/scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | from .cosine_lr import CosineLRScheduler 2 | from .multistep_lr import MultiStepLRScheduler 3 | from .plateau_lr import PlateauLRScheduler 4 | from .poly_lr import PolyLRScheduler 5 | from .step_lr import StepLRScheduler 6 | from .tanh_lr import TanhLRScheduler 7 | 8 | from .scheduler_factory import create_scheduler 9 | -------------------------------------------------------------------------------- /timm/models/layers/trace_utils.py: -------------------------------------------------------------------------------- 1 | try: 2 | from torch import _assert 3 | except ImportError: 4 | def _assert(condition: bool, message: str): 5 | assert condition, message 6 | 7 | 8 | def _float_to_int(x: float) -> int: 9 | """ 10 | Symbolic tracing helper to substitute for inbuilt `int`. 11 | Hint: Inbuilt `int` can't accept an argument of type `Proxy` 12 | """ 13 | return int(x) 14 | -------------------------------------------------------------------------------- /timm/optim/__init__.py: -------------------------------------------------------------------------------- 1 | from .adabelief import AdaBelief 2 | from .adafactor import Adafactor 3 | from .adahessian import Adahessian 4 | from .adamp import AdamP 5 | from .adamw import AdamW 6 | from .lamb import Lamb 7 | from .lars import Lars 8 | from .lookahead import Lookahead 9 | from .madgrad import MADGRAD 10 | from .nadam import Nadam 11 | from .nvnovograd import NvNovoGrad 12 | from .radam import RAdam 13 | from .rmsprop_tf import RMSpropTF 14 | from .sgdp import SGDP 15 | from .optim_factory import create_optimizer, create_optimizer_v2, optimizer_kwargs 16 | -------------------------------------------------------------------------------- /timm/data/parsers/parser.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | 4 | class Parser: 5 | def __init__(self): 6 | pass 7 | 8 | @abstractmethod 9 | def _filename(self, index, basename=False, absolute=False): 10 | pass 11 | 12 | def filename(self, index, basename=False, absolute=False): 13 | return self._filename(index, basename=basename, absolute=absolute) 14 | 15 | def filenames(self, basename=False, absolute=False): 16 | return [self._filename(index, basename=basename, absolute=absolute) for index in range(len(self))] 17 | 18 | -------------------------------------------------------------------------------- /timm/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .auto_augment import RandAugment, AutoAugment, rand_augment_ops, auto_augment_policy,\ 2 | rand_augment_transform, auto_augment_transform 3 | from .config import resolve_data_config 4 | from .constants import * 5 | from .dataset import ImageDataset, IterableImageDataset, AugMixDataset 6 | from .dataset_factory import create_dataset 7 | from .loader import create_loader 8 | from .mixup import Mixup, FastCollateMixup 9 | from .parsers import create_parser 10 | from .real_labels import RealLabelsImagenet 11 | from .transforms import * 12 | from .transforms_factory import create_transform -------------------------------------------------------------------------------- /timm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .agc import adaptive_clip_grad 2 | from .checkpoint_saver import CheckpointSaver 3 | from .clip_grad import dispatch_clip_grad 4 | from .cuda import ApexScaler, NativeScaler 5 | from .distributed import distribute_bn, reduce_tensor 6 | from .jit import set_jit_legacy, set_jit_fuser 7 | from .log import setup_default_logging, FormatterNoInfo 8 | from .metrics import AverageMeter, accuracy 9 | from .misc import natural_key, add_bool_arg 10 | from .model import unwrap_model, get_state_dict, freeze, unfreeze 11 | from .model_ema import ModelEma, ModelEmaV2 12 | from .random import random_seed 13 | from .summary import update_summary, get_outdir 14 | -------------------------------------------------------------------------------- /timm/utils/misc.py: -------------------------------------------------------------------------------- 1 | """ Misc utils 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import re 6 | 7 | 8 | def natural_key(string_): 9 | """See http://www.codinghorror.com/blog/archives/001018.html""" 10 | return [int(s) if s.isdigit() else s for s in re.split(r'(\d+)', string_.lower())] 11 | 12 | 13 | def add_bool_arg(parser, name, default=False, help=''): 14 | dest_name = name.replace('-', '_') 15 | group = parser.add_mutually_exclusive_group(required=False) 16 | group.add_argument('--' + name, dest=dest_name, action='store_true', help=help) 17 | group.add_argument('--no-' + name, dest=dest_name, action='store_false', help=help) 18 | parser.set_defaults(**{dest_name: default}) 19 | -------------------------------------------------------------------------------- /timm/models/layers/linear.py: -------------------------------------------------------------------------------- 1 | """ Linear layer (alternate definition) 2 | """ 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn as nn 6 | 7 | 8 | class Linear(nn.Linear): 9 | r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b` 10 | 11 | Wraps torch.nn.Linear to support AMP + torchscript usage by manually casting 12 | weight & bias to input.dtype to work around an issue w/ torch.addmm in this use case. 13 | """ 14 | def forward(self, input: torch.Tensor) -> torch.Tensor: 15 | if torch.jit.is_scripting(): 16 | bias = self.bias.to(dtype=input.dtype) if self.bias is not None else None 17 | return F.linear(input, self.weight.to(dtype=input.dtype), bias=bias) 18 | else: 19 | return F.linear(input, self.weight, self.bias) 20 | -------------------------------------------------------------------------------- /timm/data/parsers/class_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def load_class_map(map_or_filename, root=''): 5 | if isinstance(map_or_filename, dict): 6 | assert dict, 'class_map dict must be non-empty' 7 | return map_or_filename 8 | class_map_path = map_or_filename 9 | if not os.path.exists(class_map_path): 10 | class_map_path = os.path.join(root, class_map_path) 11 | assert os.path.exists(class_map_path), 'Cannot locate specified class map file (%s)' % map_or_filename 12 | class_map_ext = os.path.splitext(map_or_filename)[-1].lower() 13 | if class_map_ext == '.txt': 14 | with open(class_map_path) as f: 15 | class_to_idx = {v.strip(): k for k, v in enumerate(f)} 16 | else: 17 | assert False, f'Unsupported class map file extension ({class_map_ext}).' 18 | return class_to_idx 19 | 20 | -------------------------------------------------------------------------------- /timm/models/layers/helpers.py: -------------------------------------------------------------------------------- 1 | """ Layer/Module Helpers 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from itertools import repeat 6 | import collections.abc 7 | 8 | 9 | # From PyTorch internals 10 | def _ntuple(n): 11 | def parse(x): 12 | if isinstance(x, collections.abc.Iterable): 13 | return x 14 | return tuple(repeat(x, n)) 15 | return parse 16 | 17 | 18 | to_1tuple = _ntuple(1) 19 | to_2tuple = _ntuple(2) 20 | to_3tuple = _ntuple(3) 21 | to_4tuple = _ntuple(4) 22 | to_ntuple = _ntuple 23 | 24 | 25 | def make_divisible(v, divisor=8, min_value=None, round_limit=.9): 26 | min_value = min_value or divisor 27 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 28 | # Make sure that round down does not go down by more than 10%. 29 | if new_v < round_limit * v: 30 | new_v += divisor 31 | return new_v 32 | -------------------------------------------------------------------------------- /timm/utils/clip_grad.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from timm.utils.agc import adaptive_clip_grad 4 | 5 | 6 | def dispatch_clip_grad(parameters, value: float, mode: str = 'norm', norm_type: float = 2.0): 7 | """ Dispatch to gradient clipping method 8 | 9 | Args: 10 | parameters (Iterable): model parameters to clip 11 | value (float): clipping value/factor/norm, mode dependant 12 | mode (str): clipping mode, one of 'norm', 'value', 'agc' 13 | norm_type (float): p-norm, default 2.0 14 | """ 15 | if mode == 'norm': 16 | torch.nn.utils.clip_grad_norm_(parameters, value, norm_type=norm_type) 17 | elif mode == 'value': 18 | torch.nn.utils.clip_grad_value_(parameters, value) 19 | elif mode == 'agc': 20 | adaptive_clip_grad(parameters, value, norm_type=norm_type) 21 | else: 22 | assert False, f"Unknown clip mode ({mode})." 23 | 24 | -------------------------------------------------------------------------------- /timm/models/layers/norm.py: -------------------------------------------------------------------------------- 1 | """ Normalization layers and wrappers 2 | """ 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class GroupNorm(nn.GroupNorm): 9 | def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True): 10 | # NOTE num_channels is swapped to first arg for consistency in swapping norm layers with BN 11 | super().__init__(num_groups, num_channels, eps=eps, affine=affine) 12 | 13 | def forward(self, x): 14 | return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps) 15 | 16 | 17 | class LayerNorm2d(nn.LayerNorm): 18 | """ LayerNorm for channels of '2D' spatial BCHW tensors """ 19 | def __init__(self, num_channels): 20 | super().__init__(num_channels) 21 | 22 | def forward(self, x: torch.Tensor) -> torch.Tensor: 23 | return F.layer_norm( 24 | x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2) 25 | -------------------------------------------------------------------------------- /timm/utils/distributed.py: -------------------------------------------------------------------------------- 1 | """ Distributed training/validation utils 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | from torch import distributed as dist 7 | 8 | from .model import unwrap_model 9 | 10 | 11 | def reduce_tensor(tensor, n): 12 | rt = tensor.clone() 13 | dist.all_reduce(rt, op=dist.ReduceOp.SUM) 14 | rt /= n 15 | return rt 16 | 17 | 18 | def distribute_bn(model, world_size, reduce=False): 19 | # ensure every node has the same running bn stats 20 | for bn_name, bn_buf in unwrap_model(model).named_buffers(recurse=True): 21 | if ('running_mean' in bn_name) or ('running_var' in bn_name): 22 | if reduce: 23 | # average bn stats across whole group 24 | torch.distributed.all_reduce(bn_buf, op=dist.ReduceOp.SUM) 25 | bn_buf /= float(world_size) 26 | else: 27 | # broadcast bn stats from rank 0 to whole group 28 | torch.distributed.broadcast(bn_buf, 0) 29 | -------------------------------------------------------------------------------- /timm/utils/metrics.py: -------------------------------------------------------------------------------- 1 | """ Eval metrics and related 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | 6 | 7 | class AverageMeter: 8 | """Computes and stores the average and current value""" 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | self.avg = self.sum / self.count 23 | 24 | 25 | def accuracy(output, target, topk=(1,)): 26 | """Computes the accuracy over the k top predictions for the specified values of k""" 27 | maxk = min(max(topk), output.size()[1]) 28 | batch_size = target.size(0) 29 | _, pred = output.topk(maxk, 1, True, True) 30 | pred = pred.t() 31 | correct = pred.eq(target.reshape(1, -1).expand_as(pred)) 32 | return [correct[:min(k, maxk)].reshape(-1).float().sum(0) * 100. / batch_size for k in topk] 33 | -------------------------------------------------------------------------------- /timm/utils/log.py: -------------------------------------------------------------------------------- 1 | """ Logging helpers 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import logging 6 | import logging.handlers 7 | 8 | 9 | class FormatterNoInfo(logging.Formatter): 10 | def __init__(self, fmt='%(levelname)s: %(message)s'): 11 | logging.Formatter.__init__(self, fmt) 12 | 13 | def format(self, record): 14 | if record.levelno == logging.INFO: 15 | return str(record.getMessage()) 16 | return logging.Formatter.format(self, record) 17 | 18 | 19 | def setup_default_logging(default_level=logging.INFO, log_path=''): 20 | console_handler = logging.StreamHandler() 21 | console_handler.setFormatter(FormatterNoInfo()) 22 | logging.root.addHandler(console_handler) 23 | logging.root.setLevel(default_level) 24 | if log_path: 25 | file_handler = logging.handlers.RotatingFileHandler(log_path, maxBytes=(1024 ** 2 * 2), backupCount=3) 26 | file_formatter = logging.Formatter("%(asctime)s - %(name)20s: [%(levelname)8s] - %(message)s") 27 | file_handler.setFormatter(file_formatter) 28 | logging.root.addHandler(file_handler) 29 | -------------------------------------------------------------------------------- /data/PIPAL22/pipal22_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import cv2 5 | 6 | 7 | class PIPAL22(torch.utils.data.Dataset): 8 | def __init__(self, dis_path, transform): 9 | super(PIPAL22, self).__init__() 10 | self.dis_path = dis_path 11 | self.transform = transform 12 | 13 | dis_files_data = [] 14 | for dis in os.listdir(dis_path): 15 | dis_files_data.append(dis) 16 | self.data_dict = {'d_img_list': dis_files_data} 17 | 18 | def __len__(self): 19 | return len(self.data_dict['d_img_list']) 20 | 21 | def __getitem__(self, idx): 22 | d_img_name = self.data_dict['d_img_list'][idx] 23 | d_img = cv2.imread(os.path.join(self.dis_path, d_img_name), cv2.IMREAD_COLOR) 24 | d_img = cv2.cvtColor(d_img, cv2.COLOR_BGR2RGB) 25 | d_img = np.array(d_img).astype('float32') / 255 26 | d_img = np.transpose(d_img, (2, 0, 1)) 27 | sample = { 28 | 'd_img_org': d_img, 29 | 'd_name': d_img_name 30 | } 31 | if self.transform: 32 | sample = self.transform(sample) 33 | return sample 34 | -------------------------------------------------------------------------------- /timm/data/parsers/parser_factory.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from .parser_image_folder import ParserImageFolder 4 | from .parser_image_tar import ParserImageTar 5 | from .parser_image_in_tar import ParserImageInTar 6 | 7 | 8 | def create_parser(name, root, split='train', **kwargs): 9 | name = name.lower() 10 | name = name.split('/', 2) 11 | prefix = '' 12 | if len(name) > 1: 13 | prefix = name[0] 14 | name = name[-1] 15 | 16 | # FIXME improve the selection right now just tfds prefix or fallback path, will need options to 17 | # explicitly select other options shortly 18 | if prefix == 'tfds': 19 | from .parser_tfds import ParserTfds # defer tensorflow import 20 | parser = ParserTfds(root, name, split=split, **kwargs) 21 | else: 22 | assert os.path.exists(root) 23 | # default fallback path (backwards compat), use image tar if root is a .tar file, otherwise image folder 24 | # FIXME support split here, in parser? 25 | if os.path.isfile(root) and os.path.splitext(root)[1] == '.tar': 26 | parser = ParserImageInTar(root, **kwargs) 27 | else: 28 | parser = ParserImageFolder(root, **kwargs) 29 | return parser 30 | -------------------------------------------------------------------------------- /timm/loss/cross_entropy.py: -------------------------------------------------------------------------------- 1 | """ Cross Entropy w/ smoothing or soft targets 2 | 3 | Hacked together by / Copyright 2021 Ross Wightman 4 | """ 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class LabelSmoothingCrossEntropy(nn.Module): 12 | """ NLL loss with label smoothing. 13 | """ 14 | def __init__(self, smoothing=0.1): 15 | super(LabelSmoothingCrossEntropy, self).__init__() 16 | assert smoothing < 1.0 17 | self.smoothing = smoothing 18 | self.confidence = 1. - smoothing 19 | 20 | def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor: 21 | logprobs = F.log_softmax(x, dim=-1) 22 | nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) 23 | nll_loss = nll_loss.squeeze(1) 24 | smooth_loss = -logprobs.mean(dim=-1) 25 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 26 | return loss.mean() 27 | 28 | 29 | class SoftTargetCrossEntropy(nn.Module): 30 | 31 | def __init__(self): 32 | super(SoftTargetCrossEntropy, self).__init__() 33 | 34 | def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor: 35 | loss = torch.sum(-target * F.log_softmax(x, dim=-1), dim=-1) 36 | return loss.mean() 37 | -------------------------------------------------------------------------------- /timm/utils/summary.py: -------------------------------------------------------------------------------- 1 | """ Summary utilities 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import csv 6 | import os 7 | from collections import OrderedDict 8 | try: 9 | import wandb 10 | except ImportError: 11 | pass 12 | 13 | def get_outdir(path, *paths, inc=False): 14 | outdir = os.path.join(path, *paths) 15 | if not os.path.exists(outdir): 16 | os.makedirs(outdir) 17 | elif inc: 18 | count = 1 19 | outdir_inc = outdir + '-' + str(count) 20 | while os.path.exists(outdir_inc): 21 | count = count + 1 22 | outdir_inc = outdir + '-' + str(count) 23 | assert count < 100 24 | outdir = outdir_inc 25 | os.makedirs(outdir) 26 | return outdir 27 | 28 | 29 | def update_summary(epoch, train_metrics, eval_metrics, filename, write_header=False, log_wandb=False): 30 | rowd = OrderedDict(epoch=epoch) 31 | rowd.update([('train_' + k, v) for k, v in train_metrics.items()]) 32 | rowd.update([('eval_' + k, v) for k, v in eval_metrics.items()]) 33 | if log_wandb: 34 | wandb.log(rowd) 35 | with open(filename, mode='a') as cf: 36 | dw = csv.DictWriter(cf, fieldnames=rowd.keys()) 37 | if write_header: # first iteration (epoch == 1 can't be used) 38 | dw.writeheader() 39 | dw.writerow(rowd) 40 | -------------------------------------------------------------------------------- /timm/models/layers/conv_bn_act.py: -------------------------------------------------------------------------------- 1 | """ Conv2d + BN + Act 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from torch import nn as nn 6 | 7 | from .create_conv2d import create_conv2d 8 | from .create_norm_act import convert_norm_act 9 | 10 | 11 | class ConvBnAct(nn.Module): 12 | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding='', dilation=1, groups=1, 13 | bias=False, apply_act=True, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, aa_layer=None, 14 | drop_block=None): 15 | super(ConvBnAct, self).__init__() 16 | use_aa = aa_layer is not None 17 | 18 | self.conv = create_conv2d( 19 | in_channels, out_channels, kernel_size, stride=1 if use_aa else stride, 20 | padding=padding, dilation=dilation, groups=groups, bias=bias) 21 | 22 | # NOTE for backwards compatibility with models that use separate norm and act layer definitions 23 | norm_act_layer = convert_norm_act(norm_layer, act_layer) 24 | self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block) 25 | self.aa = aa_layer(channels=out_channels) if stride == 2 and use_aa else None 26 | 27 | @property 28 | def in_channels(self): 29 | return self.conv.in_channels 30 | 31 | @property 32 | def out_channels(self): 33 | return self.conv.out_channels 34 | 35 | def forward(self, x): 36 | x = self.conv(x) 37 | x = self.bn(x) 38 | if self.aa is not None: 39 | x = self.aa(x) 40 | return x 41 | -------------------------------------------------------------------------------- /timm/models/layers/create_conv2d.py: -------------------------------------------------------------------------------- 1 | """ Create Conv2d Factory Method 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | 6 | from .mixed_conv2d import MixedConv2d 7 | from .cond_conv2d import CondConv2d 8 | from .conv2d_same import create_conv2d_pad 9 | 10 | 11 | def create_conv2d(in_channels, out_channels, kernel_size, **kwargs): 12 | """ Select a 2d convolution implementation based on arguments 13 | Creates and returns one of torch.nn.Conv2d, Conv2dSame, MixedConv2d, or CondConv2d. 14 | 15 | Used extensively by EfficientNet, MobileNetv3 and related networks. 16 | """ 17 | if isinstance(kernel_size, list): 18 | assert 'num_experts' not in kwargs # MixNet + CondConv combo not supported currently 19 | assert 'groups' not in kwargs # MixedConv groups are defined by kernel list 20 | # We're going to use only lists for defining the MixedConv2d kernel groups, 21 | # ints, tuples, other iterables will continue to pass to normal conv and specify h, w. 22 | m = MixedConv2d(in_channels, out_channels, kernel_size, **kwargs) 23 | else: 24 | depthwise = kwargs.pop('depthwise', False) 25 | # for DW out_channels must be multiple of in_channels as must have out_channels % groups == 0 26 | groups = in_channels if depthwise else kwargs.pop('groups', 1) 27 | if 'num_experts' in kwargs and kwargs['num_experts'] > 0: 28 | m = CondConv2d(in_channels, out_channels, kernel_size, groups=groups, **kwargs) 29 | else: 30 | m = create_conv2d_pad(in_channels, out_channels, kernel_size, groups=groups, **kwargs) 31 | return m 32 | -------------------------------------------------------------------------------- /timm/models/layers/patch_embed.py: -------------------------------------------------------------------------------- 1 | """ Image to Patch Embedding using Conv2d 2 | 3 | A convolution based approach to patchifying a 2D image w/ embedding projection. 4 | 5 | Based on the impl in https://github.com/google-research/vision_transformer 6 | 7 | Hacked together by / Copyright 2020 Ross Wightman 8 | """ 9 | from torch import nn as nn 10 | 11 | from .helpers import to_2tuple 12 | from .trace_utils import _assert 13 | 14 | 15 | class PatchEmbed(nn.Module): 16 | """ 2D Image to Patch Embedding 17 | """ 18 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None, flatten=True): 19 | super().__init__() 20 | img_size = to_2tuple(img_size) 21 | patch_size = to_2tuple(patch_size) 22 | self.img_size = img_size 23 | self.patch_size = patch_size 24 | self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) 25 | self.num_patches = self.grid_size[0] * self.grid_size[1] 26 | self.flatten = flatten 27 | 28 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) 29 | self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() 30 | 31 | def forward(self, x): 32 | B, C, H, W = x.shape 33 | _assert(H == self.img_size[0], f"Input image height ({H}) doesn't match model ({self.img_size[0]}).") 34 | _assert(W == self.img_size[1], f"Input image width ({W}) doesn't match model ({self.img_size[1]}).") 35 | x = self.proj(x) 36 | if self.flatten: 37 | x = x.flatten(2).transpose(1, 2) # BCHW -> BNC 38 | x = self.norm(x) 39 | return x 40 | -------------------------------------------------------------------------------- /timm/models/layers/conv2d_same.py: -------------------------------------------------------------------------------- 1 | """ Conv2d w/ Same Padding 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from typing import Tuple, Optional 9 | 10 | from .padding import pad_same, get_padding_value 11 | 12 | 13 | def conv2d_same( 14 | x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, stride: Tuple[int, int] = (1, 1), 15 | padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1): 16 | x = pad_same(x, weight.shape[-2:], stride, dilation) 17 | return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups) 18 | 19 | 20 | class Conv2dSame(nn.Conv2d): 21 | """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions 22 | """ 23 | 24 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 25 | padding=0, dilation=1, groups=1, bias=True): 26 | super(Conv2dSame, self).__init__( 27 | in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) 28 | 29 | def forward(self, x): 30 | return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 31 | 32 | 33 | def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs): 34 | padding = kwargs.pop('padding', '') 35 | kwargs.setdefault('bias', False) 36 | padding, is_dynamic = get_padding_value(padding, kernel_size, **kwargs) 37 | if is_dynamic: 38 | return Conv2dSame(in_chs, out_chs, kernel_size, **kwargs) 39 | else: 40 | return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs) 41 | 42 | 43 | -------------------------------------------------------------------------------- /timm/loss/jsd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .cross_entropy import LabelSmoothingCrossEntropy 6 | 7 | 8 | class JsdCrossEntropy(nn.Module): 9 | """ Jensen-Shannon Divergence + Cross-Entropy Loss 10 | 11 | Based on impl here: https://github.com/google-research/augmix/blob/master/imagenet.py 12 | From paper: 'AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - 13 | https://arxiv.org/abs/1912.02781 14 | 15 | Hacked together by / Copyright 2020 Ross Wightman 16 | """ 17 | def __init__(self, num_splits=3, alpha=12, smoothing=0.1): 18 | super().__init__() 19 | self.num_splits = num_splits 20 | self.alpha = alpha 21 | if smoothing is not None and smoothing > 0: 22 | self.cross_entropy_loss = LabelSmoothingCrossEntropy(smoothing) 23 | else: 24 | self.cross_entropy_loss = torch.nn.CrossEntropyLoss() 25 | 26 | def __call__(self, output, target): 27 | split_size = output.shape[0] // self.num_splits 28 | assert split_size * self.num_splits == output.shape[0] 29 | logits_split = torch.split(output, split_size) 30 | 31 | # Cross-entropy is only computed on clean images 32 | loss = self.cross_entropy_loss(logits_split[0], target[:split_size]) 33 | probs = [F.softmax(logits, dim=1) for logits in logits_split] 34 | 35 | # Clamp mixture distribution to avoid exploding KL divergence 36 | logp_mixture = torch.clamp(torch.stack(probs).mean(axis=0), 1e-7, 1).log() 37 | loss += self.alpha * sum([F.kl_div( 38 | logp_mixture, p_split, reduction='batchmean') for p_split in probs]) / len(probs) 39 | return loss 40 | -------------------------------------------------------------------------------- /timm/data/real_labels.py: -------------------------------------------------------------------------------- 1 | """ Real labels evaluator for ImageNet 2 | Paper: `Are we done with ImageNet?` - https://arxiv.org/abs/2006.07159 3 | Based on Numpy example at https://github.com/google-research/reassessed-imagenet 4 | 5 | Hacked together by / Copyright 2020 Ross Wightman 6 | """ 7 | import os 8 | import json 9 | import numpy as np 10 | 11 | 12 | class RealLabelsImagenet: 13 | 14 | def __init__(self, filenames, real_json='real.json', topk=(1, 5)): 15 | with open(real_json) as real_labels: 16 | real_labels = json.load(real_labels) 17 | real_labels = {f'ILSVRC2012_val_{i + 1:08d}.JPEG': labels for i, labels in enumerate(real_labels)} 18 | self.real_labels = real_labels 19 | self.filenames = filenames 20 | assert len(self.filenames) == len(self.real_labels) 21 | self.topk = topk 22 | self.is_correct = {k: [] for k in topk} 23 | self.sample_idx = 0 24 | 25 | def add_result(self, output): 26 | maxk = max(self.topk) 27 | _, pred_batch = output.topk(maxk, 1, True, True) 28 | pred_batch = pred_batch.cpu().numpy() 29 | for pred in pred_batch: 30 | filename = self.filenames[self.sample_idx] 31 | filename = os.path.basename(filename) 32 | if self.real_labels[filename]: 33 | for k in self.topk: 34 | self.is_correct[k].append( 35 | any([p in self.real_labels[filename] for p in pred[:k]])) 36 | self.sample_idx += 1 37 | 38 | def get_accuracy(self, k=None): 39 | if k is None: 40 | return {k: float(np.mean(self.is_correct[k])) * 100 for k in self.topk} 41 | else: 42 | return float(np.mean(self.is_correct[k])) * 100 43 | -------------------------------------------------------------------------------- /timm/models/layers/blur_pool.py: -------------------------------------------------------------------------------- 1 | """ 2 | BlurPool layer inspired by 3 | - Kornia's Max_BlurPool2d 4 | - Making Convolutional Networks Shift-Invariant Again :cite:`zhang2019shiftinvar` 5 | 6 | Hacked together by Chris Ha and Ross Wightman 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import numpy as np 13 | from .padding import get_padding 14 | 15 | 16 | class BlurPool2d(nn.Module): 17 | r"""Creates a module that computes blurs and downsample a given feature map. 18 | See :cite:`zhang2019shiftinvar` for more details. 19 | Corresponds to the Downsample class, which does blurring and subsampling 20 | 21 | Args: 22 | channels = Number of input channels 23 | filt_size (int): binomial filter size for blurring. currently supports 3 (default) and 5. 24 | stride (int): downsampling filter stride 25 | 26 | Returns: 27 | torch.Tensor: the transformed tensor. 28 | """ 29 | def __init__(self, channels, filt_size=3, stride=2) -> None: 30 | super(BlurPool2d, self).__init__() 31 | assert filt_size > 1 32 | self.channels = channels 33 | self.filt_size = filt_size 34 | self.stride = stride 35 | self.padding = [get_padding(filt_size, stride, dilation=1)] * 4 36 | coeffs = torch.tensor((np.poly1d((0.5, 0.5)) ** (self.filt_size - 1)).coeffs.astype(np.float32)) 37 | blur_filter = (coeffs[:, None] * coeffs[None, :])[None, None, :, :].repeat(self.channels, 1, 1, 1) 38 | self.register_buffer('filt', blur_filter, persistent=False) 39 | 40 | def forward(self, x: torch.Tensor) -> torch.Tensor: 41 | x = F.pad(x, self.padding, 'reflect') 42 | return F.conv2d(x, self.filt, stride=self.stride, groups=x.shape[1]) 43 | -------------------------------------------------------------------------------- /timm/utils/agc.py: -------------------------------------------------------------------------------- 1 | """ Adaptive Gradient Clipping 2 | 3 | An impl of AGC, as per (https://arxiv.org/abs/2102.06171): 4 | 5 | @article{brock2021high, 6 | author={Andrew Brock and Soham De and Samuel L. Smith and Karen Simonyan}, 7 | title={High-Performance Large-Scale Image Recognition Without Normalization}, 8 | journal={arXiv preprint arXiv:}, 9 | year={2021} 10 | } 11 | 12 | Code references: 13 | * Official JAX impl (paper authors): https://github.com/deepmind/deepmind-research/tree/master/nfnets 14 | * Phil Wang's PyTorch gist: https://gist.github.com/lucidrains/0d6560077edac419ab5d3aa29e674d5c 15 | 16 | Hacked together by / Copyright 2021 Ross Wightman 17 | """ 18 | import torch 19 | 20 | 21 | def unitwise_norm(x, norm_type=2.0): 22 | if x.ndim <= 1: 23 | return x.norm(norm_type) 24 | else: 25 | # works for nn.ConvNd and nn,Linear where output dim is first in the kernel/weight tensor 26 | # might need special cases for other weights (possibly MHA) where this may not be true 27 | return x.norm(norm_type, dim=tuple(range(1, x.ndim)), keepdim=True) 28 | 29 | 30 | def adaptive_clip_grad(parameters, clip_factor=0.01, eps=1e-3, norm_type=2.0): 31 | if isinstance(parameters, torch.Tensor): 32 | parameters = [parameters] 33 | for p in parameters: 34 | if p.grad is None: 35 | continue 36 | p_data = p.detach() 37 | g_data = p.grad.detach() 38 | max_norm = unitwise_norm(p_data, norm_type=norm_type).clamp_(min=eps).mul_(clip_factor) 39 | grad_norm = unitwise_norm(g_data, norm_type=norm_type) 40 | clipped_grad = g_data * (max_norm / grad_norm.clamp(min=1e-6)) 41 | new_grads = torch.where(grad_norm < max_norm, g_data, clipped_grad) 42 | p.grad.detach().copy_(new_grads) 43 | -------------------------------------------------------------------------------- /timm/utils/cuda.py: -------------------------------------------------------------------------------- 1 | """ CUDA / AMP utils 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | 7 | try: 8 | from apex import amp 9 | has_apex = True 10 | except ImportError: 11 | amp = None 12 | has_apex = False 13 | 14 | from .clip_grad import dispatch_clip_grad 15 | 16 | 17 | class ApexScaler: 18 | state_dict_key = "amp" 19 | 20 | def __call__(self, loss, optimizer, clip_grad=None, clip_mode='norm', parameters=None, create_graph=False): 21 | with amp.scale_loss(loss, optimizer) as scaled_loss: 22 | scaled_loss.backward(create_graph=create_graph) 23 | if clip_grad is not None: 24 | dispatch_clip_grad(amp.master_params(optimizer), clip_grad, mode=clip_mode) 25 | optimizer.step() 26 | 27 | def state_dict(self): 28 | if 'state_dict' in amp.__dict__: 29 | return amp.state_dict() 30 | 31 | def load_state_dict(self, state_dict): 32 | if 'load_state_dict' in amp.__dict__: 33 | amp.load_state_dict(state_dict) 34 | 35 | 36 | class NativeScaler: 37 | state_dict_key = "amp_scaler" 38 | 39 | def __init__(self): 40 | self._scaler = torch.cuda.amp.GradScaler() 41 | 42 | def __call__(self, loss, optimizer, clip_grad=None, clip_mode='norm', parameters=None, create_graph=False): 43 | self._scaler.scale(loss).backward(create_graph=create_graph) 44 | if clip_grad is not None: 45 | assert parameters is not None 46 | self._scaler.unscale_(optimizer) # unscale the gradients of optimizer's assigned params in-place 47 | dispatch_clip_grad(parameters, clip_grad, mode=clip_mode) 48 | self._scaler.step(optimizer) 49 | self._scaler.update() 50 | 51 | def state_dict(self): 52 | return self._scaler.state_dict() 53 | 54 | def load_state_dict(self, state_dict): 55 | self._scaler.load_state_dict(state_dict) 56 | -------------------------------------------------------------------------------- /timm/models/layers/median_pool.py: -------------------------------------------------------------------------------- 1 | """ Median Pool 2 | Hacked together by / Copyright 2020 Ross Wightman 3 | """ 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from .helpers import to_2tuple, to_4tuple 7 | 8 | 9 | class MedianPool2d(nn.Module): 10 | """ Median pool (usable as median filter when stride=1) module. 11 | 12 | Args: 13 | kernel_size: size of pooling kernel, int or 2-tuple 14 | stride: pool stride, int or 2-tuple 15 | padding: pool padding, int or 4-tuple (l, r, t, b) as in pytorch F.pad 16 | same: override padding and enforce same padding, boolean 17 | """ 18 | def __init__(self, kernel_size=3, stride=1, padding=0, same=False): 19 | super(MedianPool2d, self).__init__() 20 | self.k = to_2tuple(kernel_size) 21 | self.stride = to_2tuple(stride) 22 | self.padding = to_4tuple(padding) # convert to l, r, t, b 23 | self.same = same 24 | 25 | def _padding(self, x): 26 | if self.same: 27 | ih, iw = x.size()[2:] 28 | if ih % self.stride[0] == 0: 29 | ph = max(self.k[0] - self.stride[0], 0) 30 | else: 31 | ph = max(self.k[0] - (ih % self.stride[0]), 0) 32 | if iw % self.stride[1] == 0: 33 | pw = max(self.k[1] - self.stride[1], 0) 34 | else: 35 | pw = max(self.k[1] - (iw % self.stride[1]), 0) 36 | pl = pw // 2 37 | pr = pw - pl 38 | pt = ph // 2 39 | pb = ph - pt 40 | padding = (pl, pr, pt, pb) 41 | else: 42 | padding = self.padding 43 | return padding 44 | 45 | def forward(self, x): 46 | x = F.pad(x, self._padding(x), mode='reflect') 47 | x = x.unfold(2, self.k[0], self.stride[0]).unfold(3, self.k[1], self.stride[1]) 48 | x = x.contiguous().view(x.size()[:4] + (-1,)).median(dim=-1)[0] 49 | return x 50 | -------------------------------------------------------------------------------- /data/kadid10k/kadid10k.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import cv2 5 | 6 | 7 | class Kadid10k(torch.utils.data.Dataset): 8 | def __init__(self, dis_path, txt_file_name, list_name, transform, keep_ratio): 9 | super(Kadid10k, self).__init__() 10 | self.dis_path = dis_path 11 | self.txt_file_name = txt_file_name 12 | self.transform = transform 13 | 14 | dis_files_data, score_data = [], [] 15 | with open(self.txt_file_name, 'r') as listFile: 16 | for line in listFile: 17 | dis, score = line.split() 18 | dis = dis[:-1] 19 | if dis[1:3] in list_name: 20 | score = float(score) 21 | dis_files_data.append(dis) 22 | score_data.append(score) 23 | 24 | # reshape score_list (1xn -> nx1) 25 | score_data = np.array(score_data) 26 | score_data = self.normalization(score_data) 27 | score_data = score_data.astype('float').reshape(-1, 1) 28 | self.data_dict = {'d_img_list': dis_files_data, 'score_list': score_data} 29 | 30 | def normalization(self, data): 31 | range = np.max(data) - np.min(data) 32 | return (data - np.min(data)) / range 33 | 34 | def __len__(self): 35 | return len(self.data_dict['d_img_list']) 36 | 37 | def __getitem__(self, idx): 38 | d_img_name = self.data_dict['d_img_list'][idx] 39 | d_img = cv2.imread(os.path.join(self.dis_path, d_img_name), cv2.IMREAD_COLOR) 40 | d_img = cv2.cvtColor(d_img, cv2.COLOR_BGR2RGB) 41 | d_img = np.array(d_img).astype('float32') / 255 42 | d_img = np.transpose(d_img, (2, 0, 1)) 43 | 44 | score = self.data_dict['score_list'][idx] 45 | sample = { 46 | 'd_img_org': d_img, 47 | 'score': score 48 | } 49 | if self.transform: 50 | sample = self.transform(sample) 51 | return sample 52 | -------------------------------------------------------------------------------- /timm/models/layers/space_to_depth.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SpaceToDepth(nn.Module): 6 | def __init__(self, block_size=4): 7 | super().__init__() 8 | assert block_size == 4 9 | self.bs = block_size 10 | 11 | def forward(self, x): 12 | N, C, H, W = x.size() 13 | x = x.view(N, C, H // self.bs, self.bs, W // self.bs, self.bs) # (N, C, H//bs, bs, W//bs, bs) 14 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs) 15 | x = x.view(N, C * (self.bs ** 2), H // self.bs, W // self.bs) # (N, C*bs^2, H//bs, W//bs) 16 | return x 17 | 18 | 19 | @torch.jit.script 20 | class SpaceToDepthJit(object): 21 | def __call__(self, x: torch.Tensor): 22 | # assuming hard-coded that block_size==4 for acceleration 23 | N, C, H, W = x.size() 24 | x = x.view(N, C, H // 4, 4, W // 4, 4) # (N, C, H//bs, bs, W//bs, bs) 25 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs) 26 | x = x.view(N, C * 16, H // 4, W // 4) # (N, C*bs^2, H//bs, W//bs) 27 | return x 28 | 29 | 30 | class SpaceToDepthModule(nn.Module): 31 | def __init__(self, no_jit=False): 32 | super().__init__() 33 | if not no_jit: 34 | self.op = SpaceToDepthJit() 35 | else: 36 | self.op = SpaceToDepth() 37 | 38 | def forward(self, x): 39 | return self.op(x) 40 | 41 | 42 | class DepthToSpace(nn.Module): 43 | 44 | def __init__(self, block_size): 45 | super().__init__() 46 | self.bs = block_size 47 | 48 | def forward(self, x): 49 | N, C, H, W = x.size() 50 | x = x.view(N, self.bs, self.bs, C // (self.bs ** 2), H, W) # (N, bs, bs, C//bs^2, H, W) 51 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # (N, C//bs^2, H, bs, W, bs) 52 | x = x.view(N, C // (self.bs ** 2), H * self.bs, W * self.bs) # (N, C//bs^2, H * bs, W * bs) 53 | return x 54 | -------------------------------------------------------------------------------- /timm/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .beit import * 2 | from .byoanet import * 3 | from .byobnet import * 4 | from .cait import * 5 | from .coat import * 6 | from .convit import * 7 | from .convmixer import * 8 | from .convnext import * 9 | from .crossvit import * 10 | from .cspnet import * 11 | from .densenet import * 12 | from .dla import * 13 | from .dpn import * 14 | from .efficientnet import * 15 | from .ghostnet import * 16 | from .gluon_resnet import * 17 | from .gluon_xception import * 18 | from .hardcorenas import * 19 | from .hrnet import * 20 | from .inception_resnet_v2 import * 21 | from .inception_v3 import * 22 | from .inception_v4 import * 23 | from .levit import * 24 | from .mlp_mixer import * 25 | from .mobilenetv3 import * 26 | from .nasnet import * 27 | from .nest import * 28 | from .nfnet import * 29 | from .pit import * 30 | from .pnasnet import * 31 | from .regnet import * 32 | from .res2net import * 33 | from .resnest import * 34 | from .resnet import * 35 | from .resnetv2 import * 36 | from .rexnet import * 37 | from .selecsls import * 38 | from .senet import * 39 | from .sknet import * 40 | from .swin_transformer import * 41 | from .tnt import * 42 | from .tresnet import * 43 | from .twins import * 44 | from .vgg import * 45 | from .visformer import * 46 | from .vision_transformer import * 47 | from .vision_transformer_hybrid import * 48 | from .vovnet import * 49 | from .xception import * 50 | from .xception_aligned import * 51 | from .xcit import * 52 | 53 | from .factory import create_model, split_model_name, safe_model_name 54 | from .helpers import load_checkpoint, resume_checkpoint, model_parameters 55 | from .layers import TestTimePoolHead, apply_test_time_pool 56 | from .layers import convert_splitbn_model 57 | from .layers import is_scriptable, is_exportable, set_scriptable, set_exportable, is_no_jit, set_no_jit 58 | from .registry import register_model, model_entrypoint, list_models, is_model, list_modules, is_model_in_modules,\ 59 | has_model_default_key, is_model_default_key, get_model_default_value, is_model_pretrained 60 | -------------------------------------------------------------------------------- /timm/models/layers/mixed_conv2d.py: -------------------------------------------------------------------------------- 1 | """ PyTorch Mixed Convolution 2 | 3 | Paper: MixConv: Mixed Depthwise Convolutional Kernels (https://arxiv.org/abs/1907.09595) 4 | 5 | Hacked together by / Copyright 2020 Ross Wightman 6 | """ 7 | 8 | import torch 9 | from torch import nn as nn 10 | 11 | from .conv2d_same import create_conv2d_pad 12 | 13 | 14 | def _split_channels(num_chan, num_groups): 15 | split = [num_chan // num_groups for _ in range(num_groups)] 16 | split[0] += num_chan - sum(split) 17 | return split 18 | 19 | 20 | class MixedConv2d(nn.ModuleDict): 21 | """ Mixed Grouped Convolution 22 | 23 | Based on MDConv and GroupedConv in MixNet impl: 24 | https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mixnet/custom_layers.py 25 | """ 26 | def __init__(self, in_channels, out_channels, kernel_size=3, 27 | stride=1, padding='', dilation=1, depthwise=False, **kwargs): 28 | super(MixedConv2d, self).__init__() 29 | 30 | kernel_size = kernel_size if isinstance(kernel_size, list) else [kernel_size] 31 | num_groups = len(kernel_size) 32 | in_splits = _split_channels(in_channels, num_groups) 33 | out_splits = _split_channels(out_channels, num_groups) 34 | self.in_channels = sum(in_splits) 35 | self.out_channels = sum(out_splits) 36 | for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)): 37 | conv_groups = in_ch if depthwise else 1 38 | # use add_module to keep key space clean 39 | self.add_module( 40 | str(idx), 41 | create_conv2d_pad( 42 | in_ch, out_ch, k, stride=stride, 43 | padding=padding, dilation=dilation, groups=conv_groups, **kwargs) 44 | ) 45 | self.splits = in_splits 46 | 47 | def forward(self, x): 48 | x_split = torch.split(x, self.splits, 1) 49 | x_out = [c(x_split[i]) for i, c in enumerate(self.values())] 50 | x = torch.cat(x_out, 1) 51 | return x 52 | -------------------------------------------------------------------------------- /data/koniq10k/koniq10k.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import cv2 5 | import torch.nn.functional as F 6 | 7 | 8 | class Koniq10k(torch.utils.data.Dataset): 9 | def __init__(self, dis_path, txt_file_name, list_name, transform, keep_ratio): 10 | super(Koniq10k, self).__init__() 11 | self.dis_path = dis_path 12 | self.txt_file_name = txt_file_name 13 | self.transform = transform 14 | 15 | dis_files_data, score_data = [], [] 16 | with open(self.txt_file_name, 'r') as listFile: 17 | for line in listFile: 18 | dis, score = line.split() 19 | if dis in list_name: 20 | score = float(score) 21 | dis_files_data.append(dis) 22 | score_data.append(score) 23 | 24 | # reshape score_list (1xn -> nx1) 25 | score_data = np.array(score_data) 26 | score_data = self.normalization(score_data) 27 | score_data = list(score_data.astype('float').reshape(-1, 1)) 28 | 29 | self.data_dict = {'d_img_list': dis_files_data, 'score_list': score_data} 30 | 31 | def normalization(self, data): 32 | range = np.max(data) - np.min(data) 33 | return (data - np.min(data)) / range 34 | 35 | def __len__(self): 36 | return len(self.data_dict['d_img_list']) 37 | 38 | def __getitem__(self, idx): 39 | d_img_name = self.data_dict['d_img_list'][idx] 40 | d_img = cv2.imread(os.path.join(self.dis_path, d_img_name), cv2.IMREAD_COLOR) 41 | d_img = cv2.resize(d_img, (224, 224), interpolation=cv2.INTER_CUBIC) 42 | d_img = cv2.cvtColor(d_img, cv2.COLOR_BGR2RGB) 43 | d_img = np.array(d_img).astype('float32') / 255 44 | d_img = np.transpose(d_img, (2, 0, 1)) 45 | score = self.data_dict['score_list'][idx] 46 | 47 | sample = { 48 | 'd_img_org': d_img, 49 | 'score': score 50 | } 51 | if self.transform: 52 | sample = self.transform(sample) 53 | return sample 54 | -------------------------------------------------------------------------------- /timm/scheduler/step_lr.py: -------------------------------------------------------------------------------- 1 | """ Step Scheduler 2 | 3 | Basic step LR schedule with warmup, noise. 4 | 5 | Hacked together by / Copyright 2020 Ross Wightman 6 | """ 7 | import math 8 | import torch 9 | 10 | from .scheduler import Scheduler 11 | 12 | 13 | class StepLRScheduler(Scheduler): 14 | """ 15 | """ 16 | 17 | def __init__(self, 18 | optimizer: torch.optim.Optimizer, 19 | decay_t: float, 20 | decay_rate: float = 1., 21 | warmup_t=0, 22 | warmup_lr_init=0, 23 | t_in_epochs=True, 24 | noise_range_t=None, 25 | noise_pct=0.67, 26 | noise_std=1.0, 27 | noise_seed=42, 28 | initialize=True, 29 | ) -> None: 30 | super().__init__( 31 | optimizer, param_group_field="lr", 32 | noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed, 33 | initialize=initialize) 34 | 35 | self.decay_t = decay_t 36 | self.decay_rate = decay_rate 37 | self.warmup_t = warmup_t 38 | self.warmup_lr_init = warmup_lr_init 39 | self.t_in_epochs = t_in_epochs 40 | if self.warmup_t: 41 | self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values] 42 | super().update_groups(self.warmup_lr_init) 43 | else: 44 | self.warmup_steps = [1 for _ in self.base_values] 45 | 46 | def _get_lr(self, t): 47 | if t < self.warmup_t: 48 | lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps] 49 | else: 50 | lrs = [v * (self.decay_rate ** (t // self.decay_t)) for v in self.base_values] 51 | return lrs 52 | 53 | def get_epoch_values(self, epoch: int): 54 | if self.t_in_epochs: 55 | return self._get_lr(epoch) 56 | else: 57 | return None 58 | 59 | def get_update_values(self, num_updates: int): 60 | if not self.t_in_epochs: 61 | return self._get_lr(num_updates) 62 | else: 63 | return None 64 | -------------------------------------------------------------------------------- /timm/utils/jit.py: -------------------------------------------------------------------------------- 1 | """ JIT scripting/tracing utils 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import os 6 | 7 | import torch 8 | 9 | 10 | def set_jit_legacy(): 11 | """ Set JIT executor to legacy w/ support for op fusion 12 | This is hopefully a temporary need in 1.5/1.5.1/1.6 to restore performance due to changes 13 | in the JIT exectutor. These API are not supported so could change. 14 | """ 15 | # 16 | assert hasattr(torch._C, '_jit_set_profiling_executor'), "Old JIT behavior doesn't exist!" 17 | torch._C._jit_set_profiling_executor(False) 18 | torch._C._jit_set_profiling_mode(False) 19 | torch._C._jit_override_can_fuse_on_gpu(True) 20 | #torch._C._jit_set_texpr_fuser_enabled(True) 21 | 22 | 23 | def set_jit_fuser(fuser): 24 | if fuser == "te": 25 | # default fuser should be == 'te' 26 | torch._C._jit_set_profiling_executor(True) 27 | torch._C._jit_set_profiling_mode(True) 28 | torch._C._jit_override_can_fuse_on_cpu(False) 29 | torch._C._jit_override_can_fuse_on_gpu(True) 30 | torch._C._jit_set_texpr_fuser_enabled(True) 31 | elif fuser == "old" or fuser == "legacy": 32 | torch._C._jit_set_profiling_executor(False) 33 | torch._C._jit_set_profiling_mode(False) 34 | torch._C._jit_override_can_fuse_on_gpu(True) 35 | torch._C._jit_set_texpr_fuser_enabled(False) 36 | elif fuser == "nvfuser" or fuser == "nvf": 37 | os.environ['PYTORCH_CUDA_FUSER_DISABLE_FALLBACK'] = '1' 38 | os.environ['PYTORCH_CUDA_FUSER_DISABLE_FMA'] = '1' 39 | os.environ['PYTORCH_CUDA_FUSER_JIT_OPT_LEVEL'] = '0' 40 | torch._C._jit_set_texpr_fuser_enabled(False) 41 | torch._C._jit_set_profiling_executor(True) 42 | torch._C._jit_set_profiling_mode(True) 43 | torch._C._jit_can_fuse_on_cpu() 44 | torch._C._jit_can_fuse_on_gpu() 45 | torch._C._jit_override_can_fuse_on_cpu(False) 46 | torch._C._jit_override_can_fuse_on_gpu(False) 47 | torch._C._jit_set_nvfuser_guard_mode(True) 48 | torch._C._jit_set_nvfuser_enabled(True) 49 | else: 50 | assert False, f"Invalid jit fuser ({fuser})" 51 | -------------------------------------------------------------------------------- /timm/models/layers/test_time_pool.py: -------------------------------------------------------------------------------- 1 | """ Test Time Pooling (Average-Max Pool) 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | 6 | import logging 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | from .adaptive_avgmax_pool import adaptive_avgmax_pool2d 11 | 12 | 13 | _logger = logging.getLogger(__name__) 14 | 15 | 16 | class TestTimePoolHead(nn.Module): 17 | def __init__(self, base, original_pool=7): 18 | super(TestTimePoolHead, self).__init__() 19 | self.base = base 20 | self.original_pool = original_pool 21 | base_fc = self.base.get_classifier() 22 | if isinstance(base_fc, nn.Conv2d): 23 | self.fc = base_fc 24 | else: 25 | self.fc = nn.Conv2d( 26 | self.base.num_features, self.base.num_classes, kernel_size=1, bias=True) 27 | self.fc.weight.data.copy_(base_fc.weight.data.view(self.fc.weight.size())) 28 | self.fc.bias.data.copy_(base_fc.bias.data.view(self.fc.bias.size())) 29 | self.base.reset_classifier(0) # delete original fc layer 30 | 31 | def forward(self, x): 32 | x = self.base.forward_features(x) 33 | x = F.avg_pool2d(x, kernel_size=self.original_pool, stride=1) 34 | x = self.fc(x) 35 | x = adaptive_avgmax_pool2d(x, 1) 36 | return x.view(x.size(0), -1) 37 | 38 | 39 | def apply_test_time_pool(model, config, use_test_size=True): 40 | test_time_pool = False 41 | if not hasattr(model, 'default_cfg') or not model.default_cfg: 42 | return model, False 43 | if use_test_size and 'test_input_size' in model.default_cfg: 44 | df_input_size = model.default_cfg['test_input_size'] 45 | else: 46 | df_input_size = model.default_cfg['input_size'] 47 | if config['input_size'][-1] > df_input_size[-1] and config['input_size'][-2] > df_input_size[-2]: 48 | _logger.info('Target input size %s > pretrained default %s, using test time pooling' % 49 | (str(config['input_size'][-2:]), str(df_input_size[-2:]))) 50 | model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size']) 51 | test_time_pool = True 52 | return model, test_time_pool 53 | -------------------------------------------------------------------------------- /timm/loss/binary_cross_entropy.py: -------------------------------------------------------------------------------- 1 | """ Binary Cross Entropy w/ a few extras 2 | 3 | Hacked together by / Copyright 2021 Ross Wightman 4 | """ 5 | from typing import Optional 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | class BinaryCrossEntropy(nn.Module): 13 | """ BCE with optional one-hot from dense targets, label smoothing, thresholding 14 | NOTE for experiments comparing CE to BCE /w label smoothing, may remove 15 | """ 16 | def __init__( 17 | self, smoothing=0.1, target_threshold: Optional[float] = None, weight: Optional[torch.Tensor] = None, 18 | reduction: str = 'mean', pos_weight: Optional[torch.Tensor] = None): 19 | super(BinaryCrossEntropy, self).__init__() 20 | assert 0. <= smoothing < 1.0 21 | self.smoothing = smoothing 22 | self.target_threshold = target_threshold 23 | self.reduction = reduction 24 | self.register_buffer('weight', weight) 25 | self.register_buffer('pos_weight', pos_weight) 26 | 27 | def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor: 28 | assert x.shape[0] == target.shape[0] 29 | if target.shape != x.shape: 30 | # NOTE currently assume smoothing or other label softening is applied upstream if targets are already sparse 31 | num_classes = x.shape[-1] 32 | # FIXME should off/on be different for smoothing w/ BCE? Other impl out there differ 33 | off_value = self.smoothing / num_classes 34 | on_value = 1. - self.smoothing + off_value 35 | target = target.long().view(-1, 1) 36 | target = torch.full( 37 | (target.size()[0], num_classes), 38 | off_value, 39 | device=x.device, dtype=x.dtype).scatter_(1, target, on_value) 40 | if self.target_threshold is not None: 41 | # Make target 0, or 1 if threshold set 42 | target = target.gt(self.target_threshold).to(dtype=target.dtype) 43 | return F.binary_cross_entropy_with_logits( 44 | x, target, 45 | self.weight, 46 | pos_weight=self.pos_weight, 47 | reduction=self.reduction) 48 | -------------------------------------------------------------------------------- /timm/models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .activations import * 2 | from .adaptive_avgmax_pool import \ 3 | adaptive_avgmax_pool2d, select_adaptive_pool2d, AdaptiveAvgMaxPool2d, SelectAdaptivePool2d 4 | from .blur_pool import BlurPool2d 5 | from .classifier import ClassifierHead, create_classifier 6 | from .cond_conv2d import CondConv2d, get_condconv_initializer 7 | from .config import is_exportable, is_scriptable, is_no_jit, set_exportable, set_scriptable, set_no_jit,\ 8 | set_layer_config 9 | from .conv2d_same import Conv2dSame, conv2d_same 10 | from .conv_bn_act import ConvBnAct 11 | from .create_act import create_act_layer, get_act_layer, get_act_fn 12 | from .create_attn import get_attn, create_attn 13 | from .create_conv2d import create_conv2d 14 | from .create_norm_act import get_norm_act_layer, create_norm_act, convert_norm_act 15 | from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path 16 | from .eca import EcaModule, CecaModule, EfficientChannelAttn, CircularEfficientChannelAttn 17 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d 18 | from .gather_excite import GatherExcite 19 | from .global_context import GlobalContext 20 | from .helpers import to_ntuple, to_2tuple, to_3tuple, to_4tuple, make_divisible 21 | from .inplace_abn import InplaceAbn 22 | from .linear import Linear 23 | from .mixed_conv2d import MixedConv2d 24 | from .mlp import Mlp, GluMlp, GatedMlp, ConvMlp 25 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn 26 | from .norm import GroupNorm, LayerNorm2d 27 | from .norm_act import BatchNormAct2d, GroupNormAct 28 | from .padding import get_padding, get_same_padding, pad_same 29 | from .patch_embed import PatchEmbed 30 | from .pool2d_same import AvgPool2dSame, create_pool2d 31 | from .squeeze_excite import SEModule, SqueezeExcite, EffectiveSEModule, EffectiveSqueezeExcite 32 | from .selective_kernel import SelectiveKernel 33 | from .separable_conv import SeparableConv2d, SeparableConvBnAct 34 | from .space_to_depth import SpaceToDepthModule 35 | from .split_attn import SplitAttn 36 | from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model 37 | from .std_conv import StdConv2d, StdConv2dSame, ScaledStdConv2d, ScaledStdConv2dSame 38 | from .test_time_pool import TestTimePoolHead, apply_test_time_pool 39 | from .trace_utils import _assert, _float_to_int 40 | from .weight_init import trunc_normal_, variance_scaling_, lecun_normal_ 41 | -------------------------------------------------------------------------------- /timm/scheduler/multistep_lr.py: -------------------------------------------------------------------------------- 1 | """ MultiStep LR Scheduler 2 | 3 | Basic multi step LR schedule with warmup, noise. 4 | """ 5 | import torch 6 | import bisect 7 | from timm.scheduler.scheduler import Scheduler 8 | from typing import List 9 | 10 | class MultiStepLRScheduler(Scheduler): 11 | """ 12 | """ 13 | 14 | def __init__(self, 15 | optimizer: torch.optim.Optimizer, 16 | decay_t: List[int], 17 | decay_rate: float = 1., 18 | warmup_t=0, 19 | warmup_lr_init=0, 20 | t_in_epochs=True, 21 | noise_range_t=None, 22 | noise_pct=0.67, 23 | noise_std=1.0, 24 | noise_seed=42, 25 | initialize=True, 26 | ) -> None: 27 | super().__init__( 28 | optimizer, param_group_field="lr", 29 | noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed, 30 | initialize=initialize) 31 | 32 | self.decay_t = decay_t 33 | self.decay_rate = decay_rate 34 | self.warmup_t = warmup_t 35 | self.warmup_lr_init = warmup_lr_init 36 | self.t_in_epochs = t_in_epochs 37 | if self.warmup_t: 38 | self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values] 39 | super().update_groups(self.warmup_lr_init) 40 | else: 41 | self.warmup_steps = [1 for _ in self.base_values] 42 | 43 | def get_curr_decay_steps(self, t): 44 | # find where in the array t goes, 45 | # assumes self.decay_t is sorted 46 | return bisect.bisect_right(self.decay_t, t+1) 47 | 48 | def _get_lr(self, t): 49 | if t < self.warmup_t: 50 | lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps] 51 | else: 52 | lrs = [v * (self.decay_rate ** self.get_curr_decay_steps(t)) for v in self.base_values] 53 | return lrs 54 | 55 | def get_epoch_values(self, epoch: int): 56 | if self.t_in_epochs: 57 | return self._get_lr(epoch) 58 | else: 59 | return None 60 | 61 | def get_update_values(self, num_updates: int): 62 | if not self.t_in_epochs: 63 | return self._get_lr(num_updates) 64 | else: 65 | return None 66 | -------------------------------------------------------------------------------- /timm/models/layers/padding.py: -------------------------------------------------------------------------------- 1 | """ Padding Helpers 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import math 6 | from typing import List, Tuple 7 | 8 | import torch.nn.functional as F 9 | 10 | 11 | # Calculate symmetric padding for a convolution 12 | def get_padding(kernel_size: int, stride: int = 1, dilation: int = 1, **_) -> int: 13 | padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2 14 | return padding 15 | 16 | 17 | # Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution 18 | def get_same_padding(x: int, k: int, s: int, d: int): 19 | return max((math.ceil(x / s) - 1) * s + (k - 1) * d + 1 - x, 0) 20 | 21 | 22 | # Can SAME padding for given args be done statically? 23 | def is_static_pad(kernel_size: int, stride: int = 1, dilation: int = 1, **_): 24 | return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0 25 | 26 | 27 | # Dynamically pad input x with 'SAME' padding for conv with specified args 28 | def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0): 29 | ih, iw = x.size()[-2:] 30 | pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding(iw, k[1], s[1], d[1]) 31 | if pad_h > 0 or pad_w > 0: 32 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value) 33 | return x 34 | 35 | 36 | def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bool]: 37 | dynamic = False 38 | if isinstance(padding, str): 39 | # for any string padding, the padding will be calculated for you, one of three ways 40 | padding = padding.lower() 41 | if padding == 'same': 42 | # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact 43 | if is_static_pad(kernel_size, **kwargs): 44 | # static case, no extra overhead 45 | padding = get_padding(kernel_size, **kwargs) 46 | else: 47 | # dynamic 'SAME' padding, has runtime/GPU memory overhead 48 | padding = 0 49 | dynamic = True 50 | elif padding == 'valid': 51 | # 'VALID' padding, same as padding=0 52 | padding = 0 53 | else: 54 | # Default to PyTorch style 'same'-ish symmetric padding 55 | padding = get_padding(kernel_size, **kwargs) 56 | return padding, dynamic 57 | -------------------------------------------------------------------------------- /timm/models/layers/classifier.py: -------------------------------------------------------------------------------- 1 | """ Classifier head and layer factory 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from torch import nn as nn 6 | from torch.nn import functional as F 7 | 8 | from .adaptive_avgmax_pool import SelectAdaptivePool2d 9 | 10 | 11 | def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False): 12 | flatten_in_pool = not use_conv # flatten when we use a Linear layer after pooling 13 | if not pool_type: 14 | assert num_classes == 0 or use_conv,\ 15 | 'Pooling can only be disabled if classifier is also removed or conv classifier is used' 16 | flatten_in_pool = False # disable flattening if pooling is pass-through (no pooling) 17 | global_pool = SelectAdaptivePool2d(pool_type=pool_type, flatten=flatten_in_pool) 18 | num_pooled_features = num_features * global_pool.feat_mult() 19 | return global_pool, num_pooled_features 20 | 21 | 22 | def _create_fc(num_features, num_classes, use_conv=False): 23 | if num_classes <= 0: 24 | fc = nn.Identity() # pass-through (no classifier) 25 | elif use_conv: 26 | fc = nn.Conv2d(num_features, num_classes, 1, bias=True) 27 | else: 28 | fc = nn.Linear(num_features, num_classes, bias=True) 29 | return fc 30 | 31 | 32 | def create_classifier(num_features, num_classes, pool_type='avg', use_conv=False): 33 | global_pool, num_pooled_features = _create_pool(num_features, num_classes, pool_type, use_conv=use_conv) 34 | fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv) 35 | return global_pool, fc 36 | 37 | 38 | class ClassifierHead(nn.Module): 39 | """Classifier head w/ configurable global pooling and dropout.""" 40 | 41 | def __init__(self, in_chs, num_classes, pool_type='avg', drop_rate=0., use_conv=False): 42 | super(ClassifierHead, self).__init__() 43 | self.drop_rate = drop_rate 44 | self.global_pool, num_pooled_features = _create_pool(in_chs, num_classes, pool_type, use_conv=use_conv) 45 | self.fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv) 46 | self.flatten = nn.Flatten(1) if use_conv and pool_type else nn.Identity() 47 | 48 | def forward(self, x): 49 | x = self.global_pool(x) 50 | if self.drop_rate: 51 | x = F.dropout(x, p=float(self.drop_rate), training=self.training) 52 | x = self.fc(x) 53 | x = self.flatten(x) 54 | return x 55 | -------------------------------------------------------------------------------- /timm/optim/sgdp.py: -------------------------------------------------------------------------------- 1 | """ 2 | SGDP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/sgdp.py 3 | 4 | Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217 5 | Code: https://github.com/clovaai/AdamP 6 | 7 | Copyright (c) 2020-present NAVER Corp. 8 | MIT license 9 | """ 10 | 11 | import torch 12 | import torch.nn.functional as F 13 | from torch.optim.optimizer import Optimizer, required 14 | import math 15 | 16 | from .adamp import projection 17 | 18 | 19 | class SGDP(Optimizer): 20 | def __init__(self, params, lr=required, momentum=0, dampening=0, 21 | weight_decay=0, nesterov=False, eps=1e-8, delta=0.1, wd_ratio=0.1): 22 | defaults = dict( 23 | lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay, 24 | nesterov=nesterov, eps=eps, delta=delta, wd_ratio=wd_ratio) 25 | super(SGDP, self).__init__(params, defaults) 26 | 27 | @torch.no_grad() 28 | def step(self, closure=None): 29 | loss = None 30 | if closure is not None: 31 | with torch.enable_grad(): 32 | loss = closure() 33 | 34 | for group in self.param_groups: 35 | weight_decay = group['weight_decay'] 36 | momentum = group['momentum'] 37 | dampening = group['dampening'] 38 | nesterov = group['nesterov'] 39 | 40 | for p in group['params']: 41 | if p.grad is None: 42 | continue 43 | grad = p.grad 44 | state = self.state[p] 45 | 46 | # State initialization 47 | if len(state) == 0: 48 | state['momentum'] = torch.zeros_like(p) 49 | 50 | # SGD 51 | buf = state['momentum'] 52 | buf.mul_(momentum).add_(grad, alpha=1. - dampening) 53 | if nesterov: 54 | d_p = grad + momentum * buf 55 | else: 56 | d_p = buf 57 | 58 | # Projection 59 | wd_ratio = 1. 60 | if len(p.shape) > 1: 61 | d_p, wd_ratio = projection(p, grad, d_p, group['delta'], group['wd_ratio'], group['eps']) 62 | 63 | # Weight decay 64 | if weight_decay != 0: 65 | p.mul_(1. - group['lr'] * group['weight_decay'] * wd_ratio / (1-momentum)) 66 | 67 | # Step 68 | p.add_(d_p, alpha=-group['lr']) 69 | 70 | return loss 71 | -------------------------------------------------------------------------------- /timm/optim/lookahead.py: -------------------------------------------------------------------------------- 1 | """ Lookahead Optimizer Wrapper. 2 | Implementation modified from: https://github.com/alphadl/lookahead.pytorch 3 | Paper: `Lookahead Optimizer: k steps forward, 1 step back` - https://arxiv.org/abs/1907.08610 4 | 5 | Hacked together by / Copyright 2020 Ross Wightman 6 | """ 7 | import torch 8 | from torch.optim.optimizer import Optimizer 9 | from collections import defaultdict 10 | 11 | 12 | class Lookahead(Optimizer): 13 | def __init__(self, base_optimizer, alpha=0.5, k=6): 14 | # NOTE super().__init__() not called on purpose 15 | if not 0.0 <= alpha <= 1.0: 16 | raise ValueError(f'Invalid slow update rate: {alpha}') 17 | if not 1 <= k: 18 | raise ValueError(f'Invalid lookahead steps: {k}') 19 | defaults = dict(lookahead_alpha=alpha, lookahead_k=k, lookahead_step=0) 20 | self._base_optimizer = base_optimizer 21 | self.param_groups = base_optimizer.param_groups 22 | self.defaults = base_optimizer.defaults 23 | self.defaults.update(defaults) 24 | self.state = defaultdict(dict) 25 | # manually add our defaults to the param groups 26 | for name, default in defaults.items(): 27 | for group in self._base_optimizer.param_groups: 28 | group.setdefault(name, default) 29 | 30 | @torch.no_grad() 31 | def update_slow(self, group): 32 | for fast_p in group["params"]: 33 | if fast_p.grad is None: 34 | continue 35 | param_state = self._base_optimizer.state[fast_p] 36 | if 'lookahead_slow_buff' not in param_state: 37 | param_state['lookahead_slow_buff'] = torch.empty_like(fast_p) 38 | param_state['lookahead_slow_buff'].copy_(fast_p) 39 | slow = param_state['lookahead_slow_buff'] 40 | slow.add_(fast_p - slow, alpha=group['lookahead_alpha']) 41 | fast_p.copy_(slow) 42 | 43 | def sync_lookahead(self): 44 | for group in self._base_optimizer.param_groups: 45 | self.update_slow(group) 46 | 47 | @torch.no_grad() 48 | def step(self, closure=None): 49 | loss = self._base_optimizer.step(closure) 50 | for group in self._base_optimizer.param_groups: 51 | group['lookahead_step'] += 1 52 | if group['lookahead_step'] % group['lookahead_k'] == 0: 53 | self.update_slow(group) 54 | return loss 55 | 56 | def state_dict(self): 57 | return self._base_optimizer.state_dict() 58 | 59 | def load_state_dict(self, state_dict): 60 | self._base_optimizer.load_state_dict(state_dict) 61 | self.param_groups = self._base_optimizer.param_groups 62 | -------------------------------------------------------------------------------- /timm/models/layers/global_context.py: -------------------------------------------------------------------------------- 1 | """ Global Context Attention Block 2 | 3 | Paper: `GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond` 4 | - https://arxiv.org/abs/1904.11492 5 | 6 | Official code consulted as reference: https://github.com/xvjiarui/GCNet 7 | 8 | Hacked together by / Copyright 2021 Ross Wightman 9 | """ 10 | from torch import nn as nn 11 | import torch.nn.functional as F 12 | 13 | from .create_act import create_act_layer, get_act_layer 14 | from .helpers import make_divisible 15 | from .mlp import ConvMlp 16 | from .norm import LayerNorm2d 17 | 18 | 19 | class GlobalContext(nn.Module): 20 | 21 | def __init__(self, channels, use_attn=True, fuse_add=False, fuse_scale=True, init_last_zero=False, 22 | rd_ratio=1./8, rd_channels=None, rd_divisor=1, act_layer=nn.ReLU, gate_layer='sigmoid'): 23 | super(GlobalContext, self).__init__() 24 | act_layer = get_act_layer(act_layer) 25 | 26 | self.conv_attn = nn.Conv2d(channels, 1, kernel_size=1, bias=True) if use_attn else None 27 | 28 | if rd_channels is None: 29 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 30 | if fuse_add: 31 | self.mlp_add = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d) 32 | else: 33 | self.mlp_add = None 34 | if fuse_scale: 35 | self.mlp_scale = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d) 36 | else: 37 | self.mlp_scale = None 38 | 39 | self.gate = create_act_layer(gate_layer) 40 | self.init_last_zero = init_last_zero 41 | self.reset_parameters() 42 | 43 | def reset_parameters(self): 44 | if self.conv_attn is not None: 45 | nn.init.kaiming_normal_(self.conv_attn.weight, mode='fan_in', nonlinearity='relu') 46 | if self.mlp_add is not None: 47 | nn.init.zeros_(self.mlp_add.fc2.weight) 48 | 49 | def forward(self, x): 50 | B, C, H, W = x.shape 51 | 52 | if self.conv_attn is not None: 53 | attn = self.conv_attn(x).reshape(B, 1, H * W) # (B, 1, H * W) 54 | attn = F.softmax(attn, dim=-1).unsqueeze(3) # (B, 1, H * W, 1) 55 | context = x.reshape(B, C, H * W).unsqueeze(1) @ attn 56 | context = context.view(B, C, 1, 1) 57 | else: 58 | context = x.mean(dim=(2, 3), keepdim=True) 59 | 60 | if self.mlp_scale is not None: 61 | mlp_x = self.mlp_scale(context) 62 | x = x * self.gate(mlp_x) 63 | if self.mlp_add is not None: 64 | mlp_x = self.mlp_add(context) 65 | x = x + mlp_x 66 | 67 | return x 68 | -------------------------------------------------------------------------------- /timm/data/parsers/parser_image_folder.py: -------------------------------------------------------------------------------- 1 | """ A dataset parser that reads images from folders 2 | 3 | Folders are scannerd recursively to find image files. Labels are based 4 | on the folder hierarchy, just leaf folders by default. 5 | 6 | Hacked together by / Copyright 2020 Ross Wightman 7 | """ 8 | import os 9 | 10 | from timm.utils.misc import natural_key 11 | 12 | from .parser import Parser 13 | from .class_map import load_class_map 14 | from .constants import IMG_EXTENSIONS 15 | 16 | 17 | def find_images_and_targets(folder, types=IMG_EXTENSIONS, class_to_idx=None, leaf_name_only=True, sort=True): 18 | labels = [] 19 | filenames = [] 20 | for root, subdirs, files in os.walk(folder, topdown=False, followlinks=True): 21 | rel_path = os.path.relpath(root, folder) if (root != folder) else '' 22 | label = os.path.basename(rel_path) if leaf_name_only else rel_path.replace(os.path.sep, '_') 23 | for f in files: 24 | base, ext = os.path.splitext(f) 25 | if ext.lower() in types: 26 | filenames.append(os.path.join(root, f)) 27 | labels.append(label) 28 | if class_to_idx is None: 29 | # building class index 30 | unique_labels = set(labels) 31 | sorted_labels = list(sorted(unique_labels, key=natural_key)) 32 | class_to_idx = {c: idx for idx, c in enumerate(sorted_labels)} 33 | images_and_targets = [(f, class_to_idx[l]) for f, l in zip(filenames, labels) if l in class_to_idx] 34 | if sort: 35 | images_and_targets = sorted(images_and_targets, key=lambda k: natural_key(k[0])) 36 | return images_and_targets, class_to_idx 37 | 38 | 39 | class ParserImageFolder(Parser): 40 | 41 | def __init__( 42 | self, 43 | root, 44 | class_map=''): 45 | super().__init__() 46 | 47 | self.root = root 48 | class_to_idx = None 49 | if class_map: 50 | class_to_idx = load_class_map(class_map, root) 51 | self.samples, self.class_to_idx = find_images_and_targets(root, class_to_idx=class_to_idx) 52 | if len(self.samples) == 0: 53 | raise RuntimeError( 54 | f'Found 0 images in subfolders of {root}. Supported image extensions are {", ".join(IMG_EXTENSIONS)}') 55 | 56 | def __getitem__(self, index): 57 | path, target = self.samples[index] 58 | return open(path, 'rb'), target 59 | 60 | def __len__(self): 61 | return len(self.samples) 62 | 63 | def _filename(self, index, basename=False, absolute=False): 64 | filename = self.samples[index][0] 65 | if basename: 66 | filename = os.path.basename(filename) 67 | elif not absolute: 68 | filename = os.path.relpath(filename, self.root) 69 | return filename 70 | -------------------------------------------------------------------------------- /timm/models/layers/separable_conv.py: -------------------------------------------------------------------------------- 1 | """ Depthwise Separable Conv Modules 2 | 3 | Basic DWS convs. Other variations of DWS exist with batch norm or activations between the 4 | DW and PW convs such as the Depthwise modules in MobileNetV2 / EfficientNet and Xception. 5 | 6 | Hacked together by / Copyright 2020 Ross Wightman 7 | """ 8 | from torch import nn as nn 9 | 10 | from .create_conv2d import create_conv2d 11 | from .create_norm_act import convert_norm_act 12 | 13 | 14 | class SeparableConvBnAct(nn.Module): 15 | """ Separable Conv w/ trailing Norm and Activation 16 | """ 17 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, 18 | channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, 19 | apply_act=True, drop_block=None): 20 | super(SeparableConvBnAct, self).__init__() 21 | 22 | self.conv_dw = create_conv2d( 23 | in_channels, int(in_channels * channel_multiplier), kernel_size, 24 | stride=stride, dilation=dilation, padding=padding, depthwise=True) 25 | 26 | self.conv_pw = create_conv2d( 27 | int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) 28 | 29 | norm_act_layer = convert_norm_act(norm_layer, act_layer) 30 | self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block) 31 | 32 | @property 33 | def in_channels(self): 34 | return self.conv_dw.in_channels 35 | 36 | @property 37 | def out_channels(self): 38 | return self.conv_pw.out_channels 39 | 40 | def forward(self, x): 41 | x = self.conv_dw(x) 42 | x = self.conv_pw(x) 43 | if self.bn is not None: 44 | x = self.bn(x) 45 | return x 46 | 47 | 48 | class SeparableConv2d(nn.Module): 49 | """ Separable Conv 50 | """ 51 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, 52 | channel_multiplier=1.0, pw_kernel_size=1): 53 | super(SeparableConv2d, self).__init__() 54 | 55 | self.conv_dw = create_conv2d( 56 | in_channels, int(in_channels * channel_multiplier), kernel_size, 57 | stride=stride, dilation=dilation, padding=padding, depthwise=True) 58 | 59 | self.conv_pw = create_conv2d( 60 | int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) 61 | 62 | @property 63 | def in_channels(self): 64 | return self.conv_dw.in_channels 65 | 66 | @property 67 | def out_channels(self): 68 | return self.conv_pw.out_channels 69 | 70 | def forward(self, x): 71 | x = self.conv_dw(x) 72 | x = self.conv_pw(x) 73 | return x 74 | -------------------------------------------------------------------------------- /timm/data/parsers/parser_image_tar.py: -------------------------------------------------------------------------------- 1 | """ A dataset parser that reads single tarfile based datasets 2 | 3 | This parser can read datasets consisting if a single tarfile containing images. 4 | I am planning to deprecated it in favour of ParerImageInTar. 5 | 6 | Hacked together by / Copyright 2020 Ross Wightman 7 | """ 8 | import os 9 | import tarfile 10 | 11 | from .parser import Parser 12 | from .class_map import load_class_map 13 | from .constants import IMG_EXTENSIONS 14 | from timm.utils.misc import natural_key 15 | 16 | 17 | def extract_tarinfo(tarfile, class_to_idx=None, sort=True): 18 | files = [] 19 | labels = [] 20 | for ti in tarfile.getmembers(): 21 | if not ti.isfile(): 22 | continue 23 | dirname, basename = os.path.split(ti.path) 24 | label = os.path.basename(dirname) 25 | ext = os.path.splitext(basename)[1] 26 | if ext.lower() in IMG_EXTENSIONS: 27 | files.append(ti) 28 | labels.append(label) 29 | if class_to_idx is None: 30 | unique_labels = set(labels) 31 | sorted_labels = list(sorted(unique_labels, key=natural_key)) 32 | class_to_idx = {c: idx for idx, c in enumerate(sorted_labels)} 33 | tarinfo_and_targets = [(f, class_to_idx[l]) for f, l in zip(files, labels) if l in class_to_idx] 34 | if sort: 35 | tarinfo_and_targets = sorted(tarinfo_and_targets, key=lambda k: natural_key(k[0].path)) 36 | return tarinfo_and_targets, class_to_idx 37 | 38 | 39 | class ParserImageTar(Parser): 40 | """ Single tarfile dataset where classes are mapped to folders within tar 41 | NOTE: This class is being deprecated in favour of the more capable ParserImageInTar that can 42 | operate on folders of tars or tars in tars. 43 | """ 44 | def __init__(self, root, class_map=''): 45 | super().__init__() 46 | 47 | class_to_idx = None 48 | if class_map: 49 | class_to_idx = load_class_map(class_map, root) 50 | assert os.path.isfile(root) 51 | self.root = root 52 | 53 | with tarfile.open(root) as tf: # cannot keep this open across processes, reopen later 54 | self.samples, self.class_to_idx = extract_tarinfo(tf, class_to_idx) 55 | self.imgs = self.samples 56 | self.tarfile = None # lazy init in __getitem__ 57 | 58 | def __getitem__(self, index): 59 | if self.tarfile is None: 60 | self.tarfile = tarfile.open(self.root) 61 | tarinfo, target = self.samples[index] 62 | fileobj = self.tarfile.extractfile(tarinfo) 63 | return fileobj, target 64 | 65 | def __len__(self): 66 | return len(self.samples) 67 | 68 | def _filename(self, index, basename=False, absolute=False): 69 | filename = self.samples[index][0].name 70 | if basename: 71 | filename = os.path.basename(filename) 72 | return filename 73 | -------------------------------------------------------------------------------- /timm/models/layers/activations_jit.py: -------------------------------------------------------------------------------- 1 | """ Activations 2 | 3 | A collection of jit-scripted activations fn and modules with a common interface so that they can 4 | easily be swapped. All have an `inplace` arg even if not used. 5 | 6 | All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not 7 | currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted 8 | versions if they contain in-place ops. 9 | 10 | Hacked together by / Copyright 2020 Ross Wightman 11 | """ 12 | 13 | import torch 14 | from torch import nn as nn 15 | from torch.nn import functional as F 16 | 17 | 18 | @torch.jit.script 19 | def swish_jit(x, inplace: bool = False): 20 | """Swish - Described in: https://arxiv.org/abs/1710.05941 21 | """ 22 | return x.mul(x.sigmoid()) 23 | 24 | 25 | @torch.jit.script 26 | def mish_jit(x, _inplace: bool = False): 27 | """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 28 | """ 29 | return x.mul(F.softplus(x).tanh()) 30 | 31 | 32 | class SwishJit(nn.Module): 33 | def __init__(self, inplace: bool = False): 34 | super(SwishJit, self).__init__() 35 | 36 | def forward(self, x): 37 | return swish_jit(x) 38 | 39 | 40 | class MishJit(nn.Module): 41 | def __init__(self, inplace: bool = False): 42 | super(MishJit, self).__init__() 43 | 44 | def forward(self, x): 45 | return mish_jit(x) 46 | 47 | 48 | @torch.jit.script 49 | def hard_sigmoid_jit(x, inplace: bool = False): 50 | # return F.relu6(x + 3.) / 6. 51 | return (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? 52 | 53 | 54 | class HardSigmoidJit(nn.Module): 55 | def __init__(self, inplace: bool = False): 56 | super(HardSigmoidJit, self).__init__() 57 | 58 | def forward(self, x): 59 | return hard_sigmoid_jit(x) 60 | 61 | 62 | @torch.jit.script 63 | def hard_swish_jit(x, inplace: bool = False): 64 | # return x * (F.relu6(x + 3.) / 6) 65 | return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? 66 | 67 | 68 | class HardSwishJit(nn.Module): 69 | def __init__(self, inplace: bool = False): 70 | super(HardSwishJit, self).__init__() 71 | 72 | def forward(self, x): 73 | return hard_swish_jit(x) 74 | 75 | 76 | @torch.jit.script 77 | def hard_mish_jit(x, inplace: bool = False): 78 | """ Hard Mish 79 | Experimental, based on notes by Mish author Diganta Misra at 80 | https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md 81 | """ 82 | return 0.5 * x * (x + 2).clamp(min=0, max=2) 83 | 84 | 85 | class HardMishJit(nn.Module): 86 | def __init__(self, inplace: bool = False): 87 | super(HardMishJit, self).__init__() 88 | 89 | def forward(self, x): 90 | return hard_mish_jit(x) 91 | -------------------------------------------------------------------------------- /timm/models/fx_features.py: -------------------------------------------------------------------------------- 1 | """ PyTorch FX Based Feature Extraction Helpers 2 | Using https://pytorch.org/vision/stable/feature_extraction.html 3 | """ 4 | from typing import Callable 5 | from torch import nn 6 | 7 | from .features import _get_feature_info 8 | 9 | try: 10 | from torchvision.models.feature_extraction import create_feature_extractor 11 | has_fx_feature_extraction = True 12 | except ImportError: 13 | has_fx_feature_extraction = False 14 | 15 | # Layers we went to treat as leaf modules 16 | from .layers import Conv2dSame, ScaledStdConv2dSame, BatchNormAct2d, BlurPool2d, CondConv2d, StdConv2dSame, DropPath 17 | from .layers.non_local_attn import BilinearAttnTransform 18 | from .layers.pool2d_same import MaxPool2dSame, AvgPool2dSame 19 | 20 | # NOTE: By default, any modules from timm.models.layers that we want to treat as leaf modules go here 21 | # BUT modules from timm.models should use the registration mechanism below 22 | _leaf_modules = { 23 | BatchNormAct2d, # reason: flow control for jit scripting 24 | BilinearAttnTransform, # reason: flow control t <= 1 25 | BlurPool2d, # reason: TypeError: F.conv2d received Proxy in groups=x.shape[1] 26 | # Reason: get_same_padding has a max which raises a control flow error 27 | Conv2dSame, MaxPool2dSame, ScaledStdConv2dSame, StdConv2dSame, AvgPool2dSame, 28 | CondConv2d, # reason: TypeError: F.conv2d received Proxy in groups=self.groups * B (because B = x.shape[0]) 29 | DropPath, # reason: TypeError: rand recieved Proxy in `size` argument 30 | } 31 | 32 | try: 33 | from .layers import InplaceAbn 34 | _leaf_modules.add(InplaceAbn) 35 | except ImportError: 36 | pass 37 | 38 | 39 | def register_notrace_module(module: nn.Module): 40 | """ 41 | Any module not under timm.models.layers should get this decorator if we don't want to trace through it. 42 | """ 43 | _leaf_modules.add(module) 44 | return module 45 | 46 | 47 | # Functions we want to autowrap (treat them as leaves) 48 | _autowrap_functions = set() 49 | 50 | 51 | def register_notrace_function(func: Callable): 52 | """ 53 | Decorator for functions which ought not to be traced through 54 | """ 55 | _autowrap_functions.add(func) 56 | return func 57 | 58 | 59 | class FeatureGraphNet(nn.Module): 60 | def __init__(self, model, out_indices, out_map=None): 61 | super().__init__() 62 | assert has_fx_feature_extraction, 'Please update to PyTorch 1.10+, torchvision 0.11+ for FX feature extraction' 63 | self.feature_info = _get_feature_info(model, out_indices) 64 | if out_map is not None: 65 | assert len(out_map) == len(out_indices) 66 | return_nodes = {info['module']: out_map[i] if out_map is not None else info['module'] 67 | for i, info in enumerate(self.feature_info) if i in out_indices} 68 | self.graph_module = create_feature_extractor( 69 | model, return_nodes, 70 | tracer_kwargs={'leaf_modules': list(_leaf_modules), 'autowrap_functions': list(_autowrap_functions)}) 71 | 72 | def forward(self, x): 73 | return list(self.graph_module(x).values()) 74 | -------------------------------------------------------------------------------- /data/PIPAL22/pipal.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import cv2 5 | 6 | 7 | class PIPAL(torch.utils.data.Dataset): 8 | def __init__(self, dis_path, txt_file_name, transform, keep_ratio): 9 | super(PIPAL, self).__init__() 10 | self.dis_path = dis_path 11 | self.txt_file_name = txt_file_name 12 | self.transform = transform 13 | 14 | dis_files_data, score_data = [], [] 15 | name_type = {} 16 | 17 | with open(self.txt_file_name, 'r') as listFile: 18 | for line in listFile: 19 | dis, score = line.split() 20 | dis = dis[:-1] 21 | 22 | # obtain the spliting parts 23 | name = dis[:-4] 24 | split_list = dis.split('_') 25 | img_name, dis_type, level = split_list[0], split_list[1], split_list[2] 26 | 27 | if img_name + '_' + dis_type not in name_type.keys(): 28 | name_type[img_name + '_' + dis_type] = 1 29 | else: 30 | name_type[img_name + '_' + dis_type] += 1 31 | 32 | count_name_type = {} 33 | with open(self.txt_file_name, 'r') as listFile: 34 | for line in listFile: 35 | dis, score = line.split() 36 | dis = dis[:-1] 37 | 38 | name = dis[:-4] 39 | split_list = dis.split('_') 40 | img_name, dis_type, level = split_list[0], split_list[1], split_list[2] 41 | 42 | if img_name + '_' + dis_type not in count_name_type.keys(): 43 | count_name_type[img_name + '_' + dis_type] = 1 44 | else: 45 | count_name_type[img_name + '_' + dis_type] += 1 46 | 47 | if count_name_type[img_name + '_' + dis_type] <= int(name_type[img_name + '_' + dis_type] * keep_ratio): 48 | score = float(score) 49 | dis_files_data.append(dis) 50 | score_data.append(score) 51 | 52 | # reshape score_list (1xn -> nx1) 53 | score_data = np.array(score_data) 54 | score_data = self.normalization(score_data) 55 | score_data = score_data.astype('float').reshape(-1, 1) 56 | 57 | self.data_dict = {'d_img_list': dis_files_data, 'score_list': score_data} 58 | 59 | def normalization(self, data): 60 | range = np.max(data) - np.min(data) 61 | return (data - np.min(data)) / range 62 | 63 | def __len__(self): 64 | return len(self.data_dict['d_img_list']) 65 | 66 | def __getitem__(self, idx): 67 | d_img_name = self.data_dict['d_img_list'][idx] 68 | d_img = cv2.imread(os.path.join(self.dis_path, d_img_name), cv2.IMREAD_COLOR) 69 | d_img = cv2.cvtColor(d_img, cv2.COLOR_BGR2RGB) 70 | d_img = np.array(d_img).astype('float32') / 255 71 | d_img = np.transpose(d_img, (2, 0, 1)) 72 | 73 | score = self.data_dict['score_list'][idx] 74 | sample = { 75 | 'd_img_org': d_img, 76 | 'score': score 77 | } 78 | if self.transform: 79 | sample = self.transform(sample) 80 | return sample 81 | -------------------------------------------------------------------------------- /timm/data/config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from .constants import * 3 | 4 | 5 | _logger = logging.getLogger(__name__) 6 | 7 | 8 | def resolve_data_config(args, default_cfg={}, model=None, use_test_size=False, verbose=False): 9 | new_config = {} 10 | default_cfg = default_cfg 11 | if not default_cfg and model is not None and hasattr(model, 'default_cfg'): 12 | default_cfg = model.default_cfg 13 | 14 | # Resolve input/image size 15 | in_chans = 3 16 | if 'chans' in args and args['chans'] is not None: 17 | in_chans = args['chans'] 18 | 19 | input_size = (in_chans, 224, 224) 20 | if 'input_size' in args and args['input_size'] is not None: 21 | assert isinstance(args['input_size'], (tuple, list)) 22 | assert len(args['input_size']) == 3 23 | input_size = tuple(args['input_size']) 24 | in_chans = input_size[0] # input_size overrides in_chans 25 | elif 'img_size' in args and args['img_size'] is not None: 26 | assert isinstance(args['img_size'], int) 27 | input_size = (in_chans, args['img_size'], args['img_size']) 28 | else: 29 | if use_test_size and 'test_input_size' in default_cfg: 30 | input_size = default_cfg['test_input_size'] 31 | elif 'input_size' in default_cfg: 32 | input_size = default_cfg['input_size'] 33 | new_config['input_size'] = input_size 34 | 35 | # resolve interpolation method 36 | new_config['interpolation'] = 'bicubic' 37 | if 'interpolation' in args and args['interpolation']: 38 | new_config['interpolation'] = args['interpolation'] 39 | elif 'interpolation' in default_cfg: 40 | new_config['interpolation'] = default_cfg['interpolation'] 41 | 42 | # resolve dataset + model mean for normalization 43 | new_config['mean'] = IMAGENET_DEFAULT_MEAN 44 | if 'mean' in args and args['mean'] is not None: 45 | mean = tuple(args['mean']) 46 | if len(mean) == 1: 47 | mean = tuple(list(mean) * in_chans) 48 | else: 49 | assert len(mean) == in_chans 50 | new_config['mean'] = mean 51 | elif 'mean' in default_cfg: 52 | new_config['mean'] = default_cfg['mean'] 53 | 54 | # resolve dataset + model std deviation for normalization 55 | new_config['std'] = IMAGENET_DEFAULT_STD 56 | if 'std' in args and args['std'] is not None: 57 | std = tuple(args['std']) 58 | if len(std) == 1: 59 | std = tuple(list(std) * in_chans) 60 | else: 61 | assert len(std) == in_chans 62 | new_config['std'] = std 63 | elif 'std' in default_cfg: 64 | new_config['std'] = default_cfg['std'] 65 | 66 | # resolve default crop percentage 67 | new_config['crop_pct'] = DEFAULT_CROP_PCT 68 | if 'crop_pct' in args and args['crop_pct'] is not None: 69 | new_config['crop_pct'] = args['crop_pct'] 70 | elif 'crop_pct' in default_cfg: 71 | new_config['crop_pct'] = default_cfg['crop_pct'] 72 | 73 | if verbose: 74 | _logger.info('Data processing configuration for current model + dataset:') 75 | for n, v in new_config.items(): 76 | _logger.info('\t%s: %s' % (n, str(v))) 77 | 78 | return new_config 79 | -------------------------------------------------------------------------------- /timm/models/layers/squeeze_excite.py: -------------------------------------------------------------------------------- 1 | """ Squeeze-and-Excitation Channel Attention 2 | 3 | An SE implementation originally based on PyTorch SE-Net impl. 4 | Has since evolved with additional functionality / configuration. 5 | 6 | Paper: `Squeeze-and-Excitation Networks` - https://arxiv.org/abs/1709.01507 7 | 8 | Also included is Effective Squeeze-Excitation (ESE). 9 | Paper: `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 10 | 11 | Hacked together by / Copyright 2021 Ross Wightman 12 | """ 13 | from torch import nn as nn 14 | 15 | from .create_act import create_act_layer 16 | from .helpers import make_divisible 17 | 18 | 19 | class SEModule(nn.Module): 20 | """ SE Module as defined in original SE-Nets with a few additions 21 | Additions include: 22 | * divisor can be specified to keep channels % div == 0 (default: 8) 23 | * reduction channels can be specified directly by arg (if rd_channels is set) 24 | * reduction channels can be specified by float rd_ratio (default: 1/16) 25 | * global max pooling can be added to the squeeze aggregation 26 | * customizable activation, normalization, and gate layer 27 | """ 28 | def __init__( 29 | self, channels, rd_ratio=1. / 16, rd_channels=None, rd_divisor=8, add_maxpool=False, 30 | act_layer=nn.ReLU, norm_layer=None, gate_layer='sigmoid'): 31 | super(SEModule, self).__init__() 32 | self.add_maxpool = add_maxpool 33 | if not rd_channels: 34 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 35 | self.fc1 = nn.Conv2d(channels, rd_channels, kernel_size=1, bias=True) 36 | self.bn = norm_layer(rd_channels) if norm_layer else nn.Identity() 37 | self.act = create_act_layer(act_layer, inplace=True) 38 | self.fc2 = nn.Conv2d(rd_channels, channels, kernel_size=1, bias=True) 39 | self.gate = create_act_layer(gate_layer) 40 | 41 | def forward(self, x): 42 | x_se = x.mean((2, 3), keepdim=True) 43 | if self.add_maxpool: 44 | # experimental codepath, may remove or change 45 | x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True) 46 | x_se = self.fc1(x_se) 47 | x_se = self.act(self.bn(x_se)) 48 | x_se = self.fc2(x_se) 49 | return x * self.gate(x_se) 50 | 51 | 52 | SqueezeExcite = SEModule # alias 53 | 54 | 55 | class EffectiveSEModule(nn.Module): 56 | """ 'Effective Squeeze-Excitation 57 | From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 58 | """ 59 | def __init__(self, channels, add_maxpool=False, gate_layer='hard_sigmoid', **_): 60 | super(EffectiveSEModule, self).__init__() 61 | self.add_maxpool = add_maxpool 62 | self.fc = nn.Conv2d(channels, channels, kernel_size=1, padding=0) 63 | self.gate = create_act_layer(gate_layer) 64 | 65 | def forward(self, x): 66 | x_se = x.mean((2, 3), keepdim=True) 67 | if self.add_maxpool: 68 | # experimental codepath, may remove or change 69 | x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True) 70 | x_se = self.fc(x_se) 71 | return x * self.gate(x_se) 72 | 73 | 74 | EffectiveSqueezeExcite = EffectiveSEModule # alias 75 | -------------------------------------------------------------------------------- /timm/models/layers/pool2d_same.py: -------------------------------------------------------------------------------- 1 | """ AvgPool2d w/ Same Padding 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from typing import List, Tuple, Optional 9 | 10 | from .helpers import to_2tuple 11 | from .padding import pad_same, get_padding_value 12 | 13 | 14 | def avg_pool2d_same(x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0), 15 | ceil_mode: bool = False, count_include_pad: bool = True): 16 | # FIXME how to deal with count_include_pad vs not for external padding? 17 | x = pad_same(x, kernel_size, stride) 18 | return F.avg_pool2d(x, kernel_size, stride, (0, 0), ceil_mode, count_include_pad) 19 | 20 | 21 | class AvgPool2dSame(nn.AvgPool2d): 22 | """ Tensorflow like 'SAME' wrapper for 2D average pooling 23 | """ 24 | def __init__(self, kernel_size: int, stride=None, padding=0, ceil_mode=False, count_include_pad=True): 25 | kernel_size = to_2tuple(kernel_size) 26 | stride = to_2tuple(stride) 27 | super(AvgPool2dSame, self).__init__(kernel_size, stride, (0, 0), ceil_mode, count_include_pad) 28 | 29 | def forward(self, x): 30 | x = pad_same(x, self.kernel_size, self.stride) 31 | return F.avg_pool2d( 32 | x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad) 33 | 34 | 35 | def max_pool2d_same( 36 | x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0), 37 | dilation: List[int] = (1, 1), ceil_mode: bool = False): 38 | x = pad_same(x, kernel_size, stride, value=-float('inf')) 39 | return F.max_pool2d(x, kernel_size, stride, (0, 0), dilation, ceil_mode) 40 | 41 | 42 | class MaxPool2dSame(nn.MaxPool2d): 43 | """ Tensorflow like 'SAME' wrapper for 2D max pooling 44 | """ 45 | def __init__(self, kernel_size: int, stride=None, padding=0, dilation=1, ceil_mode=False): 46 | kernel_size = to_2tuple(kernel_size) 47 | stride = to_2tuple(stride) 48 | dilation = to_2tuple(dilation) 49 | super(MaxPool2dSame, self).__init__(kernel_size, stride, (0, 0), dilation, ceil_mode) 50 | 51 | def forward(self, x): 52 | x = pad_same(x, self.kernel_size, self.stride, value=-float('inf')) 53 | return F.max_pool2d(x, self.kernel_size, self.stride, (0, 0), self.dilation, self.ceil_mode) 54 | 55 | 56 | def create_pool2d(pool_type, kernel_size, stride=None, **kwargs): 57 | stride = stride or kernel_size 58 | padding = kwargs.pop('padding', '') 59 | padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, **kwargs) 60 | if is_dynamic: 61 | if pool_type == 'avg': 62 | return AvgPool2dSame(kernel_size, stride=stride, **kwargs) 63 | elif pool_type == 'max': 64 | return MaxPool2dSame(kernel_size, stride=stride, **kwargs) 65 | else: 66 | assert False, f'Unsupported pool type {pool_type}' 67 | else: 68 | if pool_type == 'avg': 69 | return nn.AvgPool2d(kernel_size, stride=stride, padding=padding, **kwargs) 70 | elif pool_type == 'max': 71 | return nn.MaxPool2d(kernel_size, stride=stride, padding=padding, **kwargs) 72 | else: 73 | assert False, f'Unsupported pool type {pool_type}' 74 | -------------------------------------------------------------------------------- /timm/models/layers/split_attn.py: -------------------------------------------------------------------------------- 1 | """ Split Attention Conv2d (for ResNeSt Models) 2 | 3 | Paper: `ResNeSt: Split-Attention Networks` - /https://arxiv.org/abs/2004.08955 4 | 5 | Adapted from original PyTorch impl at https://github.com/zhanghang1989/ResNeSt 6 | 7 | Modified for torchscript compat, performance, and consistency with timm by Ross Wightman 8 | """ 9 | import torch 10 | import torch.nn.functional as F 11 | from torch import nn 12 | 13 | from .helpers import make_divisible 14 | 15 | 16 | class RadixSoftmax(nn.Module): 17 | def __init__(self, radix, cardinality): 18 | super(RadixSoftmax, self).__init__() 19 | self.radix = radix 20 | self.cardinality = cardinality 21 | 22 | def forward(self, x): 23 | batch = x.size(0) 24 | if self.radix > 1: 25 | x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2) 26 | x = F.softmax(x, dim=1) 27 | x = x.reshape(batch, -1) 28 | else: 29 | x = torch.sigmoid(x) 30 | return x 31 | 32 | 33 | class SplitAttn(nn.Module): 34 | """Split-Attention (aka Splat) 35 | """ 36 | def __init__(self, in_channels, out_channels=None, kernel_size=3, stride=1, padding=None, 37 | dilation=1, groups=1, bias=False, radix=2, rd_ratio=0.25, rd_channels=None, rd_divisor=8, 38 | act_layer=nn.ReLU, norm_layer=None, drop_block=None, **kwargs): 39 | super(SplitAttn, self).__init__() 40 | out_channels = out_channels or in_channels 41 | self.radix = radix 42 | self.drop_block = drop_block 43 | mid_chs = out_channels * radix 44 | if rd_channels is None: 45 | attn_chs = make_divisible(in_channels * radix * rd_ratio, min_value=32, divisor=rd_divisor) 46 | else: 47 | attn_chs = rd_channels * radix 48 | 49 | padding = kernel_size // 2 if padding is None else padding 50 | self.conv = nn.Conv2d( 51 | in_channels, mid_chs, kernel_size, stride, padding, dilation, 52 | groups=groups * radix, bias=bias, **kwargs) 53 | self.bn0 = norm_layer(mid_chs) if norm_layer else nn.Identity() 54 | self.act0 = act_layer(inplace=True) 55 | self.fc1 = nn.Conv2d(out_channels, attn_chs, 1, groups=groups) 56 | self.bn1 = norm_layer(attn_chs) if norm_layer else nn.Identity() 57 | self.act1 = act_layer(inplace=True) 58 | self.fc2 = nn.Conv2d(attn_chs, mid_chs, 1, groups=groups) 59 | self.rsoftmax = RadixSoftmax(radix, groups) 60 | 61 | def forward(self, x): 62 | x = self.conv(x) 63 | x = self.bn0(x) 64 | if self.drop_block is not None: 65 | x = self.drop_block(x) 66 | x = self.act0(x) 67 | 68 | B, RC, H, W = x.shape 69 | if self.radix > 1: 70 | x = x.reshape((B, self.radix, RC // self.radix, H, W)) 71 | x_gap = x.sum(dim=1) 72 | else: 73 | x_gap = x 74 | x_gap = x_gap.mean((2, 3), keepdim=True) 75 | x_gap = self.fc1(x_gap) 76 | x_gap = self.bn1(x_gap) 77 | x_gap = self.act1(x_gap) 78 | x_attn = self.fc2(x_gap) 79 | 80 | x_attn = self.rsoftmax(x_attn).view(B, -1, 1, 1) 81 | if self.radix > 1: 82 | out = (x * x_attn.reshape((B, self.radix, RC // self.radix, 1, 1))).sum(dim=1) 83 | else: 84 | out = x * x_attn 85 | return out.contiguous() 86 | -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import random 5 | 6 | from torchvision import transforms 7 | from torch.utils.data import DataLoader 8 | from config import Config 9 | from utils.inference_process import ToTensor, Normalize, five_point_crop, sort_file 10 | from data.pipal22_test import PIPAL22 11 | from tqdm import tqdm 12 | 13 | 14 | os.environ['CUDA_VISIBLE_DEVICES'] = '5' 15 | 16 | 17 | def setup_seed(seed): 18 | random.seed(seed) 19 | os.environ['PYTHONHASHSEED'] = str(seed) 20 | np.random.seed(seed) 21 | torch.manual_seed(seed) 22 | torch.cuda.manual_seed(seed) 23 | torch.cuda.manual_seed_all(seed) 24 | torch.backends.cudnn.benchmark = False 25 | torch.backends.cudnn.deterministic = True 26 | 27 | 28 | def eval_epoch(config, net, test_loader): 29 | with torch.no_grad(): 30 | net.eval() 31 | name_list = [] 32 | pred_list = [] 33 | with open(config.valid_path + '/output.txt', 'w') as f: 34 | for data in tqdm(test_loader): 35 | pred = 0 36 | for i in range(config.num_avg_val): 37 | x_d = data['d_img_org'].cuda() 38 | x_d = five_point_crop(i, d_img=x_d, config=config) 39 | pred += net(x_d) 40 | 41 | pred /= config.num_avg_val 42 | d_name = data['d_name'] 43 | pred = pred.cpu().numpy() 44 | name_list.extend(d_name) 45 | pred_list.extend(pred) 46 | for i in range(len(name_list)): 47 | f.write(name_list[i] + ',' + str(pred_list[i]) + '\n') 48 | print(len(name_list)) 49 | f.close() 50 | 51 | 52 | if __name__ == '__main__': 53 | cpu_num = 1 54 | os.environ['OMP_NUM_THREADS'] = str(cpu_num) 55 | os.environ['OPENBLAS_NUM_THREADS'] = str(cpu_num) 56 | os.environ['MKL_NUM_THREADS'] = str(cpu_num) 57 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(cpu_num) 58 | os.environ['NUMEXPR_NUM_THREADS'] = str(cpu_num) 59 | torch.set_num_threads(cpu_num) 60 | 61 | setup_seed(20) 62 | 63 | # config file 64 | config = Config({ 65 | # dataset path 66 | "db_name": "PIPAL", 67 | "test_dis_path": "/mnt/data_16TB/ysd21/IQA/NTIRE2022_NR_Valid_Dis/", 68 | 69 | # optimization 70 | "batch_size": 10, 71 | "num_avg_val": 1, 72 | "crop_size": 224, 73 | 74 | # device 75 | "num_workers": 8, 76 | 77 | # load & save checkpoint 78 | "valid": "./output/valid", 79 | "valid_path": "./output/valid/inference_valid", 80 | "model_path": "./output/models/model_maniqa/epoch1" 81 | }) 82 | 83 | if not os.path.exists(config.valid): 84 | os.mkdir(config.valid) 85 | 86 | if not os.path.exists(config.valid_path): 87 | os.mkdir(config.valid_path) 88 | 89 | # data load 90 | test_dataset = PIPAL22( 91 | dis_path=config.test_dis_path, 92 | transform=transforms.Compose([Normalize(0.5, 0.5), ToTensor()]), 93 | ) 94 | test_loader = DataLoader( 95 | dataset=test_dataset, 96 | batch_size=config.batch_size, 97 | num_workers=config.num_workers, 98 | drop_last=True, 99 | shuffle=False 100 | ) 101 | net = torch.load(config.model_path) 102 | net = net.cuda() 103 | 104 | losses, scores = [], [] 105 | eval_epoch(config, net, test_loader) 106 | sort_file(config.valid_path + '/output.txt') 107 | -------------------------------------------------------------------------------- /timm/models/factory.py: -------------------------------------------------------------------------------- 1 | from .registry import is_model, is_model_in_modules, model_entrypoint 2 | from .helpers import load_checkpoint 3 | from .layers import set_layer_config 4 | from .hub import load_model_config_from_hf 5 | 6 | 7 | def split_model_name(model_name): 8 | model_split = model_name.split(':', 1) 9 | if len(model_split) == 1: 10 | return '', model_split[0] 11 | else: 12 | source_name, model_name = model_split 13 | assert source_name in ('timm', 'hf_hub') 14 | return source_name, model_name 15 | 16 | 17 | def safe_model_name(model_name, remove_source=True): 18 | def make_safe(name): 19 | return ''.join(c if c.isalnum() else '_' for c in name).rstrip('_') 20 | if remove_source: 21 | model_name = split_model_name(model_name)[-1] 22 | return make_safe(model_name) 23 | 24 | 25 | def create_model( 26 | model_name, 27 | pretrained=False, 28 | checkpoint_path='', 29 | scriptable=None, 30 | exportable=None, 31 | no_jit=None, 32 | **kwargs): 33 | """Create a model 34 | 35 | Args: 36 | model_name (str): name of model to instantiate 37 | pretrained (bool): load pretrained ImageNet-1k weights if true 38 | checkpoint_path (str): path of checkpoint to load after model is initialized 39 | scriptable (bool): set layer config so that model is jit scriptable (not working for all models yet) 40 | exportable (bool): set layer config so that model is traceable / ONNX exportable (not fully impl/obeyed yet) 41 | no_jit (bool): set layer config so that model doesn't utilize jit scripted layers (so far activations only) 42 | 43 | Keyword Args: 44 | drop_rate (float): dropout rate for training (default: 0.0) 45 | global_pool (str): global pool type (default: 'avg') 46 | **: other kwargs are model specific 47 | """ 48 | source_name, model_name = split_model_name(model_name) 49 | 50 | # handle backwards compat with drop_connect -> drop_path change 51 | drop_connect_rate = kwargs.pop('drop_connect_rate', None) 52 | if drop_connect_rate is not None and kwargs.get('drop_path_rate', None) is None: 53 | print("WARNING: 'drop_connect' as an argument is deprecated, please use 'drop_path'." 54 | " Setting drop_path to %f." % drop_connect_rate) 55 | kwargs['drop_path_rate'] = drop_connect_rate 56 | 57 | # Parameters that aren't supported by all models or are intended to only override model defaults if set 58 | # should default to None in command line args/cfg. Remove them if they are present and not set so that 59 | # non-supporting models don't break and default args remain in effect. 60 | kwargs = {k: v for k, v in kwargs.items() if v is not None} 61 | 62 | if source_name == 'hf_hub': 63 | # For model names specified in the form `hf_hub:path/architecture_name#revision`, 64 | # load model weights + default_cfg from Hugging Face hub. 65 | hf_default_cfg, model_name = load_model_config_from_hf(model_name) 66 | kwargs['external_default_cfg'] = hf_default_cfg # FIXME revamp default_cfg interface someday 67 | 68 | if is_model(model_name): 69 | create_fn = model_entrypoint(model_name) 70 | else: 71 | raise RuntimeError('Unknown model (%s)' % model_name) 72 | 73 | with set_layer_config(scriptable=scriptable, exportable=exportable, no_jit=no_jit): 74 | model = create_fn(pretrained=pretrained, **kwargs) 75 | 76 | if checkpoint_path: 77 | load_checkpoint(model, checkpoint_path) 78 | 79 | return model 80 | -------------------------------------------------------------------------------- /timm/loss/asymmetric_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class AsymmetricLossMultiLabel(nn.Module): 6 | def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disable_torch_grad_focal_loss=False): 7 | super(AsymmetricLossMultiLabel, self).__init__() 8 | 9 | self.gamma_neg = gamma_neg 10 | self.gamma_pos = gamma_pos 11 | self.clip = clip 12 | self.disable_torch_grad_focal_loss = disable_torch_grad_focal_loss 13 | self.eps = eps 14 | 15 | def forward(self, x, y): 16 | """" 17 | Parameters 18 | ---------- 19 | x: input logits 20 | y: targets (multi-label binarized vector) 21 | """ 22 | 23 | # Calculating Probabilities 24 | x_sigmoid = torch.sigmoid(x) 25 | xs_pos = x_sigmoid 26 | xs_neg = 1 - x_sigmoid 27 | 28 | # Asymmetric Clipping 29 | if self.clip is not None and self.clip > 0: 30 | xs_neg = (xs_neg + self.clip).clamp(max=1) 31 | 32 | # Basic CE calculation 33 | los_pos = y * torch.log(xs_pos.clamp(min=self.eps)) 34 | los_neg = (1 - y) * torch.log(xs_neg.clamp(min=self.eps)) 35 | loss = los_pos + los_neg 36 | 37 | # Asymmetric Focusing 38 | if self.gamma_neg > 0 or self.gamma_pos > 0: 39 | if self.disable_torch_grad_focal_loss: 40 | torch._C.set_grad_enabled(False) 41 | pt0 = xs_pos * y 42 | pt1 = xs_neg * (1 - y) # pt = p if t > 0 else 1-p 43 | pt = pt0 + pt1 44 | one_sided_gamma = self.gamma_pos * y + self.gamma_neg * (1 - y) 45 | one_sided_w = torch.pow(1 - pt, one_sided_gamma) 46 | if self.disable_torch_grad_focal_loss: 47 | torch._C.set_grad_enabled(True) 48 | loss *= one_sided_w 49 | 50 | return -loss.sum() 51 | 52 | 53 | class AsymmetricLossSingleLabel(nn.Module): 54 | def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reduction='mean'): 55 | super(AsymmetricLossSingleLabel, self).__init__() 56 | 57 | self.eps = eps 58 | self.logsoftmax = nn.LogSoftmax(dim=-1) 59 | self.targets_classes = [] # prevent gpu repeated memory allocation 60 | self.gamma_pos = gamma_pos 61 | self.gamma_neg = gamma_neg 62 | self.reduction = reduction 63 | 64 | def forward(self, inputs, target, reduction=None): 65 | """" 66 | Parameters 67 | ---------- 68 | x: input logits 69 | y: targets (1-hot vector) 70 | """ 71 | 72 | num_classes = inputs.size()[-1] 73 | log_preds = self.logsoftmax(inputs) 74 | self.targets_classes = torch.zeros_like(inputs).scatter_(1, target.long().unsqueeze(1), 1) 75 | 76 | # ASL weights 77 | targets = self.targets_classes 78 | anti_targets = 1 - targets 79 | xs_pos = torch.exp(log_preds) 80 | xs_neg = 1 - xs_pos 81 | xs_pos = xs_pos * targets 82 | xs_neg = xs_neg * anti_targets 83 | asymmetric_w = torch.pow(1 - xs_pos - xs_neg, 84 | self.gamma_pos * targets + self.gamma_neg * anti_targets) 85 | log_preds = log_preds * asymmetric_w 86 | 87 | if self.eps > 0: # label smoothing 88 | self.targets_classes.mul_(1 - self.eps).add_(self.eps / num_classes) 89 | 90 | # loss calculation 91 | loss = - self.targets_classes.mul(log_preds) 92 | 93 | loss = loss.sum(dim=-1) 94 | if self.reduction == 'mean': 95 | loss = loss.mean() 96 | 97 | return loss 98 | -------------------------------------------------------------------------------- /timm/models/layers/config.py: -------------------------------------------------------------------------------- 1 | """ Model / Layer Config singleton state 2 | """ 3 | from typing import Any, Optional 4 | 5 | __all__ = [ 6 | 'is_exportable', 'is_scriptable', 'is_no_jit', 7 | 'set_exportable', 'set_scriptable', 'set_no_jit', 'set_layer_config' 8 | ] 9 | 10 | # Set to True if prefer to have layers with no jit optimization (includes activations) 11 | _NO_JIT = False 12 | 13 | # Set to True if prefer to have activation layers with no jit optimization 14 | # NOTE not currently used as no difference between no_jit and no_activation jit as only layers obeying 15 | # the jit flags so far are activations. This will change as more layers are updated and/or added. 16 | _NO_ACTIVATION_JIT = False 17 | 18 | # Set to True if exporting a model with Same padding via ONNX 19 | _EXPORTABLE = False 20 | 21 | # Set to True if wanting to use torch.jit.script on a model 22 | _SCRIPTABLE = False 23 | 24 | 25 | def is_no_jit(): 26 | return _NO_JIT 27 | 28 | 29 | class set_no_jit: 30 | def __init__(self, mode: bool) -> None: 31 | global _NO_JIT 32 | self.prev = _NO_JIT 33 | _NO_JIT = mode 34 | 35 | def __enter__(self) -> None: 36 | pass 37 | 38 | def __exit__(self, *args: Any) -> bool: 39 | global _NO_JIT 40 | _NO_JIT = self.prev 41 | return False 42 | 43 | 44 | def is_exportable(): 45 | return _EXPORTABLE 46 | 47 | 48 | class set_exportable: 49 | def __init__(self, mode: bool) -> None: 50 | global _EXPORTABLE 51 | self.prev = _EXPORTABLE 52 | _EXPORTABLE = mode 53 | 54 | def __enter__(self) -> None: 55 | pass 56 | 57 | def __exit__(self, *args: Any) -> bool: 58 | global _EXPORTABLE 59 | _EXPORTABLE = self.prev 60 | return False 61 | 62 | 63 | def is_scriptable(): 64 | return _SCRIPTABLE 65 | 66 | 67 | class set_scriptable: 68 | def __init__(self, mode: bool) -> None: 69 | global _SCRIPTABLE 70 | self.prev = _SCRIPTABLE 71 | _SCRIPTABLE = mode 72 | 73 | def __enter__(self) -> None: 74 | pass 75 | 76 | def __exit__(self, *args: Any) -> bool: 77 | global _SCRIPTABLE 78 | _SCRIPTABLE = self.prev 79 | return False 80 | 81 | 82 | class set_layer_config: 83 | """ Layer config context manager that allows setting all layer config flags at once. 84 | If a flag arg is None, it will not change the current value. 85 | """ 86 | def __init__( 87 | self, 88 | scriptable: Optional[bool] = None, 89 | exportable: Optional[bool] = None, 90 | no_jit: Optional[bool] = None, 91 | no_activation_jit: Optional[bool] = None): 92 | global _SCRIPTABLE 93 | global _EXPORTABLE 94 | global _NO_JIT 95 | global _NO_ACTIVATION_JIT 96 | self.prev = _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT 97 | if scriptable is not None: 98 | _SCRIPTABLE = scriptable 99 | if exportable is not None: 100 | _EXPORTABLE = exportable 101 | if no_jit is not None: 102 | _NO_JIT = no_jit 103 | if no_activation_jit is not None: 104 | _NO_ACTIVATION_JIT = no_activation_jit 105 | 106 | def __enter__(self) -> None: 107 | pass 108 | 109 | def __exit__(self, *args: Any) -> bool: 110 | global _SCRIPTABLE 111 | global _EXPORTABLE 112 | global _NO_JIT 113 | global _NO_ACTIVATION_JIT 114 | _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT = self.prev 115 | return False 116 | -------------------------------------------------------------------------------- /timm/models/layers/weight_init.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import warnings 4 | 5 | from torch.nn.init import _calculate_fan_in_and_fan_out 6 | 7 | 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 9 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 10 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 11 | def norm_cdf(x): 12 | # Computes standard normal cumulative distribution function 13 | return (1. + math.erf(x / math.sqrt(2.))) / 2. 14 | 15 | if (mean < a - 2 * std) or (mean > b + 2 * std): 16 | warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 17 | "The distribution of values may be incorrect.", 18 | stacklevel=2) 19 | 20 | with torch.no_grad(): 21 | # Values are generated by using a truncated uniform distribution and 22 | # then using the inverse CDF for the normal distribution. 23 | # Get upper and lower cdf values 24 | l = norm_cdf((a - mean) / std) 25 | u = norm_cdf((b - mean) / std) 26 | 27 | # Uniformly fill tensor with values from [l, u], then translate to 28 | # [2l-1, 2u-1]. 29 | tensor.uniform_(2 * l - 1, 2 * u - 1) 30 | 31 | # Use inverse cdf transform for normal distribution to get truncated 32 | # standard normal 33 | tensor.erfinv_() 34 | 35 | # Transform to proper mean, std 36 | tensor.mul_(std * math.sqrt(2.)) 37 | tensor.add_(mean) 38 | 39 | # Clamp to ensure it's in the proper range 40 | tensor.clamp_(min=a, max=b) 41 | return tensor 42 | 43 | 44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): 45 | # type: (Tensor, float, float, float, float) -> Tensor 46 | r"""Fills the input Tensor with values drawn from a truncated 47 | normal distribution. The values are effectively drawn from the 48 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 49 | with values outside :math:`[a, b]` redrawn until they are within 50 | the bounds. The method used for generating the random values works 51 | best when :math:`a \leq \text{mean} \leq b`. 52 | Args: 53 | tensor: an n-dimensional `torch.Tensor` 54 | mean: the mean of the normal distribution 55 | std: the standard deviation of the normal distribution 56 | a: the minimum cutoff value 57 | b: the maximum cutoff value 58 | Examples: 59 | >>> w = torch.empty(3, 5) 60 | >>> nn.init.trunc_normal_(w) 61 | """ 62 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 63 | 64 | 65 | def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='normal'): 66 | fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) 67 | if mode == 'fan_in': 68 | denom = fan_in 69 | elif mode == 'fan_out': 70 | denom = fan_out 71 | elif mode == 'fan_avg': 72 | denom = (fan_in + fan_out) / 2 73 | 74 | variance = scale / denom 75 | 76 | if distribution == "truncated_normal": 77 | # constant is stddev of standard normal truncated to (-2, 2) 78 | trunc_normal_(tensor, std=math.sqrt(variance) / .87962566103423978) 79 | elif distribution == "normal": 80 | tensor.normal_(std=math.sqrt(variance)) 81 | elif distribution == "uniform": 82 | bound = math.sqrt(3 * variance) 83 | tensor.uniform_(-bound, bound) 84 | else: 85 | raise ValueError(f"invalid distribution {distribution}") 86 | 87 | 88 | def lecun_normal_(tensor): 89 | variance_scaling_(tensor, mode='fan_in', distribution='truncated_normal') 90 | -------------------------------------------------------------------------------- /timm/models/layers/inplace_abn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn as nn 3 | 4 | try: 5 | from inplace_abn.functions import inplace_abn, inplace_abn_sync 6 | has_iabn = True 7 | except ImportError: 8 | has_iabn = False 9 | 10 | def inplace_abn(x, weight, bias, running_mean, running_var, 11 | training=True, momentum=0.1, eps=1e-05, activation="leaky_relu", activation_param=0.01): 12 | raise ImportError( 13 | "Please install InplaceABN:'pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.12'") 14 | 15 | def inplace_abn_sync(**kwargs): 16 | inplace_abn(**kwargs) 17 | 18 | 19 | class InplaceAbn(nn.Module): 20 | """Activated Batch Normalization 21 | 22 | This gathers a BatchNorm and an activation function in a single module 23 | 24 | Parameters 25 | ---------- 26 | num_features : int 27 | Number of feature channels in the input and output. 28 | eps : float 29 | Small constant to prevent numerical issues. 30 | momentum : float 31 | Momentum factor applied to compute running statistics. 32 | affine : bool 33 | If `True` apply learned scale and shift transformation after normalization. 34 | act_layer : str or nn.Module type 35 | Name or type of the activation functions, one of: `leaky_relu`, `elu` 36 | act_param : float 37 | Negative slope for the `leaky_relu` activation. 38 | """ 39 | 40 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, apply_act=True, 41 | act_layer="leaky_relu", act_param=0.01, drop_block=None): 42 | super(InplaceAbn, self).__init__() 43 | self.num_features = num_features 44 | self.affine = affine 45 | self.eps = eps 46 | self.momentum = momentum 47 | if apply_act: 48 | if isinstance(act_layer, str): 49 | assert act_layer in ('leaky_relu', 'elu', 'identity', '') 50 | self.act_name = act_layer if act_layer else 'identity' 51 | else: 52 | # convert act layer passed as type to string 53 | if act_layer == nn.ELU: 54 | self.act_name = 'elu' 55 | elif act_layer == nn.LeakyReLU: 56 | self.act_name = 'leaky_relu' 57 | elif act_layer == nn.Identity: 58 | self.act_name = 'identity' 59 | else: 60 | assert False, f'Invalid act layer {act_layer.__name__} for IABN' 61 | else: 62 | self.act_name = 'identity' 63 | self.act_param = act_param 64 | if self.affine: 65 | self.weight = nn.Parameter(torch.ones(num_features)) 66 | self.bias = nn.Parameter(torch.zeros(num_features)) 67 | else: 68 | self.register_parameter('weight', None) 69 | self.register_parameter('bias', None) 70 | self.register_buffer('running_mean', torch.zeros(num_features)) 71 | self.register_buffer('running_var', torch.ones(num_features)) 72 | self.reset_parameters() 73 | 74 | def reset_parameters(self): 75 | nn.init.constant_(self.running_mean, 0) 76 | nn.init.constant_(self.running_var, 1) 77 | if self.affine: 78 | nn.init.constant_(self.weight, 1) 79 | nn.init.constant_(self.bias, 0) 80 | 81 | def forward(self, x): 82 | output = inplace_abn( 83 | x, self.weight, self.bias, self.running_mean, self.running_var, 84 | self.training, self.momentum, self.eps, self.act_name, self.act_param) 85 | if isinstance(output, tuple): 86 | output = output[0] 87 | return output 88 | -------------------------------------------------------------------------------- /timm/models/layers/split_batchnorm.py: -------------------------------------------------------------------------------- 1 | """ Split BatchNorm 2 | 3 | A PyTorch BatchNorm layer that splits input batch into N equal parts and passes each through 4 | a separate BN layer. The first split is passed through the parent BN layers with weight/bias 5 | keys the same as the original BN. All other splits pass through BN sub-layers under the '.aux_bn' 6 | namespace. 7 | 8 | This allows easily removing the auxiliary BN layers after training to efficiently 9 | achieve the 'Auxiliary BatchNorm' as described in the AdvProp Paper, section 4.2, 10 | 'Disentangled Learning via An Auxiliary BN' 11 | 12 | Hacked together by / Copyright 2020 Ross Wightman 13 | """ 14 | import torch 15 | import torch.nn as nn 16 | 17 | 18 | class SplitBatchNorm2d(torch.nn.BatchNorm2d): 19 | 20 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, 21 | track_running_stats=True, num_splits=2): 22 | super().__init__(num_features, eps, momentum, affine, track_running_stats) 23 | assert num_splits > 1, 'Should have at least one aux BN layer (num_splits at least 2)' 24 | self.num_splits = num_splits 25 | self.aux_bn = nn.ModuleList([ 26 | nn.BatchNorm2d(num_features, eps, momentum, affine, track_running_stats) for _ in range(num_splits - 1)]) 27 | 28 | def forward(self, input: torch.Tensor): 29 | if self.training: # aux BN only relevant while training 30 | split_size = input.shape[0] // self.num_splits 31 | assert input.shape[0] == split_size * self.num_splits, "batch size must be evenly divisible by num_splits" 32 | split_input = input.split(split_size) 33 | x = [super().forward(split_input[0])] 34 | for i, a in enumerate(self.aux_bn): 35 | x.append(a(split_input[i + 1])) 36 | return torch.cat(x, dim=0) 37 | else: 38 | return super().forward(input) 39 | 40 | 41 | def convert_splitbn_model(module, num_splits=2): 42 | """ 43 | Recursively traverse module and its children to replace all instances of 44 | ``torch.nn.modules.batchnorm._BatchNorm`` with `SplitBatchnorm2d`. 45 | Args: 46 | module (torch.nn.Module): input module 47 | num_splits: number of separate batchnorm layers to split input across 48 | Example:: 49 | >>> # model is an instance of torch.nn.Module 50 | >>> model = timm.models.convert_splitbn_model(model, num_splits=2) 51 | """ 52 | mod = module 53 | if isinstance(module, torch.nn.modules.instancenorm._InstanceNorm): 54 | return module 55 | if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): 56 | mod = SplitBatchNorm2d( 57 | module.num_features, module.eps, module.momentum, module.affine, 58 | module.track_running_stats, num_splits=num_splits) 59 | mod.running_mean = module.running_mean 60 | mod.running_var = module.running_var 61 | mod.num_batches_tracked = module.num_batches_tracked 62 | if module.affine: 63 | mod.weight.data = module.weight.data.clone().detach() 64 | mod.bias.data = module.bias.data.clone().detach() 65 | for aux in mod.aux_bn: 66 | aux.running_mean = module.running_mean.clone() 67 | aux.running_var = module.running_var.clone() 68 | aux.num_batches_tracked = module.num_batches_tracked.clone() 69 | if module.affine: 70 | aux.weight.data = module.weight.data.clone().detach() 71 | aux.bias.data = module.bias.data.clone().detach() 72 | for name, child in module.named_children(): 73 | mod.add_module(name, convert_splitbn_model(child, num_splits=num_splits)) 74 | del module 75 | return mod 76 | -------------------------------------------------------------------------------- /utils/inference_process.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def sort_file(file_path): 6 | f2 = open(file_path, "r") 7 | lines = f2.readlines() 8 | ret = [] 9 | for line in lines: 10 | line = line[:-1] 11 | ret.append(line) 12 | ret.sort() 13 | 14 | with open('./output.txt', 'w') as f: 15 | for i in ret: 16 | f.write(i + '\n') 17 | 18 | 19 | def five_point_crop(idx, d_img, config): 20 | new_h = config.crop_size 21 | new_w = config.crop_size 22 | b, c, h, w = d_img.shape 23 | if idx == 0: 24 | top = 0 25 | left = 0 26 | elif idx == 1: 27 | top = 0 28 | left = w - new_w 29 | elif idx == 2: 30 | top = h - new_h 31 | left = 0 32 | elif idx == 3: 33 | top = h - new_h 34 | left = w - new_w 35 | elif idx == 4: 36 | center_h = h // 2 37 | center_w = w // 2 38 | top = center_h - new_h // 2 39 | left = center_w - new_w // 2 40 | d_img_org = crop_image(top, left, config.crop_size, img=d_img) 41 | 42 | return d_img_org 43 | 44 | 45 | def random_crop(d_img, config): 46 | b, c, h, w = d_img.shape 47 | top = np.random.randint(0, h - config.crop_size) 48 | left = np.random.randint(0, w - config.crop_size) 49 | d_img_org = crop_image(top, left, config.crop_size, img=d_img) 50 | return d_img_org 51 | 52 | 53 | def crop_image(top, left, patch_size, img=None): 54 | tmp_img = img[:, :, top:top + patch_size, left:left + patch_size] 55 | return tmp_img 56 | 57 | 58 | class RandCrop(object): 59 | def __init__(self, patch_size): 60 | self.patch_size = patch_size 61 | 62 | def __call__(self, sample): 63 | # r_img : C x H x W (numpy) 64 | d_img = sample['d_img_org'] 65 | d_name = sample['d_name'] 66 | 67 | c, h, w = d_img.shape 68 | new_h = self.patch_size 69 | new_w = self.patch_size 70 | 71 | top = np.random.randint(0, h - new_h) 72 | left = np.random.randint(0, w - new_w) 73 | ret_d_img = d_img[:, top: top + new_h, left: left + new_w] 74 | sample = { 75 | 'd_img_org': ret_d_img, 76 | 'd_name': d_name 77 | } 78 | 79 | return sample 80 | 81 | 82 | class Normalize(object): 83 | def __init__(self, mean, var): 84 | self.mean = mean 85 | self.var = var 86 | 87 | def __call__(self, sample): 88 | # r_img: C x H x W (numpy) 89 | d_img = sample['d_img_org'] 90 | d_name = sample['d_name'] 91 | 92 | d_img = (d_img - self.mean) / self.var 93 | 94 | sample = {'d_img_org': d_img, 'd_name': d_name} 95 | return sample 96 | 97 | 98 | class RandHorizontalFlip(object): 99 | def __init__(self): 100 | pass 101 | 102 | def __call__(self, sample): 103 | d_img = sample['d_img_org'] 104 | d_name = sample['d_name'] 105 | prob_lr = np.random.random() 106 | # np.fliplr needs HxWxC 107 | if prob_lr > 0.5: 108 | d_img = np.fliplr(d_img).copy() 109 | 110 | sample = { 111 | 'd_img_org': d_img, 112 | 'd_name': d_name 113 | } 114 | return sample 115 | 116 | 117 | class ToTensor(object): 118 | def __init__(self): 119 | pass 120 | 121 | def __call__(self, sample): 122 | d_img = sample['d_img_org'] 123 | d_name = sample['d_name'] 124 | d_img = torch.from_numpy(d_img).type(torch.FloatTensor) 125 | sample = { 126 | 'd_img_org': d_img, 127 | 'd_name': d_name 128 | } 129 | return sample -------------------------------------------------------------------------------- /timm/models/layers/create_norm_act.py: -------------------------------------------------------------------------------- 1 | """ NormAct (Normalizaiton + Activation Layer) Factory 2 | 3 | Create norm + act combo modules that attempt to be backwards compatible with separate norm + act 4 | isntances in models. Where these are used it will be possible to swap separate BN + act layers with 5 | combined modules like IABN or EvoNorms. 6 | 7 | Hacked together by / Copyright 2020 Ross Wightman 8 | """ 9 | import types 10 | import functools 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d 16 | from .norm_act import BatchNormAct2d, GroupNormAct 17 | from .inplace_abn import InplaceAbn 18 | 19 | _NORM_ACT_TYPES = {BatchNormAct2d, GroupNormAct, EvoNormBatch2d, EvoNormSample2d, InplaceAbn} 20 | _NORM_ACT_REQUIRES_ARG = {BatchNormAct2d, GroupNormAct, InplaceAbn} # requires act_layer arg to define act type 21 | 22 | 23 | def get_norm_act_layer(layer_class): 24 | layer_class = layer_class.replace('_', '').lower() 25 | if layer_class.startswith("batchnorm"): 26 | layer = BatchNormAct2d 27 | elif layer_class.startswith("groupnorm"): 28 | layer = GroupNormAct 29 | elif layer_class == "evonormbatch": 30 | layer = EvoNormBatch2d 31 | elif layer_class == "evonormsample": 32 | layer = EvoNormSample2d 33 | elif layer_class == "iabn" or layer_class == "inplaceabn": 34 | layer = InplaceAbn 35 | else: 36 | assert False, "Invalid norm_act layer (%s)" % layer_class 37 | return layer 38 | 39 | 40 | def create_norm_act(layer_type, num_features, apply_act=True, jit=False, **kwargs): 41 | layer_parts = layer_type.split('-') # e.g. batchnorm-leaky_relu 42 | assert len(layer_parts) in (1, 2) 43 | layer = get_norm_act_layer(layer_parts[0]) 44 | #activation_class = layer_parts[1].lower() if len(layer_parts) > 1 else '' # FIXME support string act selection? 45 | layer_instance = layer(num_features, apply_act=apply_act, **kwargs) 46 | if jit: 47 | layer_instance = torch.jit.script(layer_instance) 48 | return layer_instance 49 | 50 | 51 | def convert_norm_act(norm_layer, act_layer): 52 | assert isinstance(norm_layer, (type, str, types.FunctionType, functools.partial)) 53 | assert act_layer is None or isinstance(act_layer, (type, str, types.FunctionType, functools.partial)) 54 | norm_act_kwargs = {} 55 | 56 | # unbind partial fn, so args can be rebound later 57 | if isinstance(norm_layer, functools.partial): 58 | norm_act_kwargs.update(norm_layer.keywords) 59 | norm_layer = norm_layer.func 60 | 61 | if isinstance(norm_layer, str): 62 | norm_act_layer = get_norm_act_layer(norm_layer) 63 | elif norm_layer in _NORM_ACT_TYPES: 64 | norm_act_layer = norm_layer 65 | elif isinstance(norm_layer, types.FunctionType): 66 | # if function type, must be a lambda/fn that creates a norm_act layer 67 | norm_act_layer = norm_layer 68 | else: 69 | type_name = norm_layer.__name__.lower() 70 | if type_name.startswith('batchnorm'): 71 | norm_act_layer = BatchNormAct2d 72 | elif type_name.startswith('groupnorm'): 73 | norm_act_layer = GroupNormAct 74 | else: 75 | assert False, f"No equivalent norm_act layer for {type_name}" 76 | 77 | if norm_act_layer in _NORM_ACT_REQUIRES_ARG: 78 | # pass `act_layer` through for backwards compat where `act_layer=None` implies no activation. 79 | # In the future, may force use of `apply_act` with `act_layer` arg bound to relevant NormAct types 80 | norm_act_kwargs.setdefault('act_layer', act_layer) 81 | if norm_act_kwargs: 82 | norm_act_layer = functools.partial(norm_act_layer, **norm_act_kwargs) # bind/rebind args 83 | return norm_act_layer 84 | -------------------------------------------------------------------------------- /timm/optim/radam.py: -------------------------------------------------------------------------------- 1 | """RAdam Optimizer. 2 | Implementation lifted from: https://github.com/LiyuanLucasLiu/RAdam 3 | Paper: `On the Variance of the Adaptive Learning Rate and Beyond` - https://arxiv.org/abs/1908.03265 4 | """ 5 | import math 6 | import torch 7 | from torch.optim.optimizer import Optimizer 8 | 9 | 10 | class RAdam(Optimizer): 11 | 12 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): 13 | defaults = dict( 14 | lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, 15 | buffer=[[None, None, None] for _ in range(10)]) 16 | super(RAdam, self).__init__(params, defaults) 17 | 18 | def __setstate__(self, state): 19 | super(RAdam, self).__setstate__(state) 20 | 21 | @torch.no_grad() 22 | def step(self, closure=None): 23 | loss = None 24 | if closure is not None: 25 | with torch.enable_grad(): 26 | loss = closure() 27 | 28 | for group in self.param_groups: 29 | 30 | for p in group['params']: 31 | if p.grad is None: 32 | continue 33 | grad = p.grad.float() 34 | if grad.is_sparse: 35 | raise RuntimeError('RAdam does not support sparse gradients') 36 | 37 | p_fp32 = p.float() 38 | 39 | state = self.state[p] 40 | 41 | if len(state) == 0: 42 | state['step'] = 0 43 | state['exp_avg'] = torch.zeros_like(p_fp32) 44 | state['exp_avg_sq'] = torch.zeros_like(p_fp32) 45 | else: 46 | state['exp_avg'] = state['exp_avg'].type_as(p_fp32) 47 | state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_fp32) 48 | 49 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 50 | beta1, beta2 = group['betas'] 51 | 52 | exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) 53 | exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) 54 | 55 | state['step'] += 1 56 | buffered = group['buffer'][int(state['step'] % 10)] 57 | if state['step'] == buffered[0]: 58 | num_sma, step_size = buffered[1], buffered[2] 59 | else: 60 | buffered[0] = state['step'] 61 | beta2_t = beta2 ** state['step'] 62 | num_sma_max = 2 / (1 - beta2) - 1 63 | num_sma = num_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t) 64 | buffered[1] = num_sma 65 | 66 | # more conservative since it's an approximated value 67 | if num_sma >= 5: 68 | step_size = group['lr'] * math.sqrt( 69 | (1 - beta2_t) * 70 | (num_sma - 4) / (num_sma_max - 4) * 71 | (num_sma - 2) / num_sma * 72 | num_sma_max / (num_sma_max - 2)) / (1 - beta1 ** state['step']) 73 | else: 74 | step_size = group['lr'] / (1 - beta1 ** state['step']) 75 | buffered[2] = step_size 76 | 77 | if group['weight_decay'] != 0: 78 | p_fp32.add_(p_fp32, alpha=-group['weight_decay'] * group['lr']) 79 | 80 | # more conservative since it's an approximated value 81 | if num_sma >= 5: 82 | denom = exp_avg_sq.sqrt().add_(group['eps']) 83 | p_fp32.addcdiv_(exp_avg, denom, value=-step_size) 84 | else: 85 | p_fp32.add_(exp_avg, alpha=-step_size) 86 | 87 | p.copy_(p_fp32) 88 | 89 | return loss 90 | -------------------------------------------------------------------------------- /timm/models/layers/evo_norm.py: -------------------------------------------------------------------------------- 1 | """EvoNormB0 (Batched) and EvoNormS0 (Sample) in PyTorch 2 | 3 | An attempt at getting decent performing EvoNorms running in PyTorch. 4 | While currently faster than other impl, still quite a ways off the built-in BN 5 | in terms of memory usage and throughput (roughly 5x mem, 1/2 - 1/3x speed). 6 | 7 | Still very much a WIP, fiddling with buffer usage, in-place/jit optimizations, and layouts. 8 | 9 | Hacked together by / Copyright 2020 Ross Wightman 10 | """ 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | from .trace_utils import _assert 16 | 17 | 18 | class EvoNormBatch2d(nn.Module): 19 | def __init__(self, num_features, apply_act=True, momentum=0.1, eps=1e-5, drop_block=None): 20 | super(EvoNormBatch2d, self).__init__() 21 | self.apply_act = apply_act # apply activation (non-linearity) 22 | self.momentum = momentum 23 | self.eps = eps 24 | self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True) 25 | self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True) 26 | self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None 27 | self.register_buffer('running_var', torch.ones(num_features)) 28 | self.reset_parameters() 29 | 30 | def reset_parameters(self): 31 | nn.init.ones_(self.weight) 32 | nn.init.zeros_(self.bias) 33 | if self.apply_act: 34 | nn.init.ones_(self.v) 35 | 36 | def forward(self, x): 37 | _assert(x.dim() == 4, 'expected 4D input') 38 | x_type = x.dtype 39 | if self.v is not None: 40 | running_var = self.running_var.view(1, -1, 1, 1) 41 | if self.training: 42 | var = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True) 43 | n = x.numel() / x.shape[1] 44 | running_var = var.detach() * self.momentum * (n / (n - 1)) + running_var * (1 - self.momentum) 45 | self.running_var.copy_(running_var.view(self.running_var.shape)) 46 | else: 47 | var = running_var 48 | v = self.v.to(dtype=x_type).reshape(1, -1, 1, 1) 49 | d = x * v + (x.var(dim=(2, 3), unbiased=False, keepdim=True) + self.eps).sqrt().to(dtype=x_type) 50 | d = d.max((var + self.eps).sqrt().to(dtype=x_type)) 51 | x = x / d 52 | return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1) 53 | 54 | 55 | class EvoNormSample2d(nn.Module): 56 | def __init__(self, num_features, apply_act=True, groups=32, eps=1e-5, drop_block=None): 57 | super(EvoNormSample2d, self).__init__() 58 | self.apply_act = apply_act # apply activation (non-linearity) 59 | self.groups = groups 60 | self.eps = eps 61 | self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True) 62 | self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True) 63 | self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None 64 | self.reset_parameters() 65 | 66 | def reset_parameters(self): 67 | nn.init.ones_(self.weight) 68 | nn.init.zeros_(self.bias) 69 | if self.apply_act: 70 | nn.init.ones_(self.v) 71 | 72 | def forward(self, x): 73 | _assert(x.dim() == 4, 'expected 4D input') 74 | B, C, H, W = x.shape 75 | _assert(C % self.groups == 0, '') 76 | if self.v is not None: 77 | n = x * (x * self.v.view(1, -1, 1, 1)).sigmoid() 78 | x = x.reshape(B, self.groups, -1) 79 | x = n.reshape(B, self.groups, -1) / (x.var(dim=-1, unbiased=False, keepdim=True) + self.eps).sqrt() 80 | x = x.reshape(B, C, H, W) 81 | return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1) 82 | -------------------------------------------------------------------------------- /timm/models/layers/norm_act.py: -------------------------------------------------------------------------------- 1 | """ Normalization + Activation Layers 2 | """ 3 | import torch 4 | from torch import nn as nn 5 | from torch.nn import functional as F 6 | 7 | from .create_act import get_act_layer 8 | 9 | 10 | class BatchNormAct2d(nn.BatchNorm2d): 11 | """BatchNorm + Activation 12 | 13 | This module performs BatchNorm + Activation in a manner that will remain backwards 14 | compatible with weights trained with separate bn, act. This is why we inherit from BN 15 | instead of composing it as a .bn member. 16 | """ 17 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, 18 | apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None): 19 | super(BatchNormAct2d, self).__init__( 20 | num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats) 21 | if isinstance(act_layer, str): 22 | act_layer = get_act_layer(act_layer) 23 | if act_layer is not None and apply_act: 24 | act_args = dict(inplace=True) if inplace else {} 25 | self.act = act_layer(**act_args) 26 | else: 27 | self.act = nn.Identity() 28 | 29 | def _forward_jit(self, x): 30 | """ A cut & paste of the contents of the PyTorch BatchNorm2d forward function 31 | """ 32 | # exponential_average_factor is self.momentum set to 33 | # (when it is available) only so that if gets updated 34 | # in ONNX graph when this node is exported to ONNX. 35 | if self.momentum is None: 36 | exponential_average_factor = 0.0 37 | else: 38 | exponential_average_factor = self.momentum 39 | 40 | if self.training and self.track_running_stats: 41 | # TODO: if statement only here to tell the jit to skip emitting this when it is None 42 | if self.num_batches_tracked is not None: 43 | self.num_batches_tracked += 1 44 | if self.momentum is None: # use cumulative moving average 45 | exponential_average_factor = 1.0 / float(self.num_batches_tracked) 46 | else: # use exponential moving average 47 | exponential_average_factor = self.momentum 48 | 49 | x = F.batch_norm( 50 | x, self.running_mean, self.running_var, self.weight, self.bias, 51 | self.training or not self.track_running_stats, 52 | exponential_average_factor, self.eps) 53 | return x 54 | 55 | @torch.jit.ignore 56 | def _forward_python(self, x): 57 | return super(BatchNormAct2d, self).forward(x) 58 | 59 | def forward(self, x): 60 | # FIXME cannot call parent forward() and maintain jit.script compatibility? 61 | if torch.jit.is_scripting(): 62 | x = self._forward_jit(x) 63 | else: 64 | x = self._forward_python(x) 65 | x = self.act(x) 66 | return x 67 | 68 | 69 | class GroupNormAct(nn.GroupNorm): 70 | # NOTE num_channel and num_groups order flipped for easier layer swaps / binding of fixed args 71 | def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True, 72 | apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None): 73 | super(GroupNormAct, self).__init__(num_groups, num_channels, eps=eps, affine=affine) 74 | if isinstance(act_layer, str): 75 | act_layer = get_act_layer(act_layer) 76 | if act_layer is not None and apply_act: 77 | act_args = dict(inplace=True) if inplace else {} 78 | self.act = act_layer(**act_args) 79 | else: 80 | self.act = nn.Identity() 81 | 82 | def forward(self, x): 83 | x = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps) 84 | x = self.act(x) 85 | return x 86 | -------------------------------------------------------------------------------- /timm/models/layers/create_attn.py: -------------------------------------------------------------------------------- 1 | """ Attention Factory 2 | 3 | Hacked together by / Copyright 2021 Ross Wightman 4 | """ 5 | import torch 6 | from functools import partial 7 | 8 | from .bottleneck_attn import BottleneckAttn 9 | from .cbam import CbamModule, LightCbamModule 10 | from .eca import EcaModule, CecaModule 11 | from .gather_excite import GatherExcite 12 | from .global_context import GlobalContext 13 | from .halo_attn import HaloAttn 14 | from .lambda_layer import LambdaLayer 15 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn 16 | from .selective_kernel import SelectiveKernel 17 | from .split_attn import SplitAttn 18 | from .squeeze_excite import SEModule, EffectiveSEModule 19 | 20 | 21 | def get_attn(attn_type): 22 | if isinstance(attn_type, torch.nn.Module): 23 | return attn_type 24 | module_cls = None 25 | if attn_type is not None: 26 | if isinstance(attn_type, str): 27 | attn_type = attn_type.lower() 28 | # Lightweight attention modules (channel and/or coarse spatial). 29 | # Typically added to existing network architecture blocks in addition to existing convolutions. 30 | if attn_type == 'se': 31 | module_cls = SEModule 32 | elif attn_type == 'ese': 33 | module_cls = EffectiveSEModule 34 | elif attn_type == 'eca': 35 | module_cls = EcaModule 36 | elif attn_type == 'ecam': 37 | module_cls = partial(EcaModule, use_mlp=True) 38 | elif attn_type == 'ceca': 39 | module_cls = CecaModule 40 | elif attn_type == 'ge': 41 | module_cls = GatherExcite 42 | elif attn_type == 'gc': 43 | module_cls = GlobalContext 44 | elif attn_type == 'gca': 45 | module_cls = partial(GlobalContext, fuse_add=True, fuse_scale=False) 46 | elif attn_type == 'cbam': 47 | module_cls = CbamModule 48 | elif attn_type == 'lcbam': 49 | module_cls = LightCbamModule 50 | 51 | # Attention / attention-like modules w/ significant params 52 | # Typically replace some of the existing workhorse convs in a network architecture. 53 | # All of these accept a stride argument and can spatially downsample the input. 54 | elif attn_type == 'sk': 55 | module_cls = SelectiveKernel 56 | elif attn_type == 'splat': 57 | module_cls = SplitAttn 58 | 59 | # Self-attention / attention-like modules w/ significant compute and/or params 60 | # Typically replace some of the existing workhorse convs in a network architecture. 61 | # All of these accept a stride argument and can spatially downsample the input. 62 | elif attn_type == 'lambda': 63 | return LambdaLayer 64 | elif attn_type == 'bottleneck': 65 | return BottleneckAttn 66 | elif attn_type == 'halo': 67 | return HaloAttn 68 | elif attn_type == 'nl': 69 | module_cls = NonLocalAttn 70 | elif attn_type == 'bat': 71 | module_cls = BatNonLocalAttn 72 | 73 | # Woops! 74 | else: 75 | assert False, "Invalid attn module (%s)" % attn_type 76 | elif isinstance(attn_type, bool): 77 | if attn_type: 78 | module_cls = SEModule 79 | else: 80 | module_cls = attn_type 81 | return module_cls 82 | 83 | 84 | def create_attn(attn_type, channels, **kwargs): 85 | module_cls = get_attn(attn_type) 86 | if module_cls is not None: 87 | # NOTE: it's expected the first (positional) argument of all attention layers is the # input channels 88 | return module_cls(channels, **kwargs) 89 | return None 90 | -------------------------------------------------------------------------------- /timm/optim/adamp.py: -------------------------------------------------------------------------------- 1 | """ 2 | AdamP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/adamp.py 3 | 4 | Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217 5 | Code: https://github.com/clovaai/AdamP 6 | 7 | Copyright (c) 2020-present NAVER Corp. 8 | MIT license 9 | """ 10 | 11 | import torch 12 | import torch.nn.functional as F 13 | from torch.optim.optimizer import Optimizer 14 | import math 15 | 16 | 17 | def _channel_view(x) -> torch.Tensor: 18 | return x.reshape(x.size(0), -1) 19 | 20 | 21 | def _layer_view(x) -> torch.Tensor: 22 | return x.reshape(1, -1) 23 | 24 | 25 | def projection(p, grad, perturb, delta: float, wd_ratio: float, eps: float): 26 | wd = 1. 27 | expand_size = (-1,) + (1,) * (len(p.shape) - 1) 28 | for view_func in [_channel_view, _layer_view]: 29 | param_view = view_func(p) 30 | grad_view = view_func(grad) 31 | cosine_sim = F.cosine_similarity(grad_view, param_view, dim=1, eps=eps).abs_() 32 | 33 | # FIXME this is a problem for PyTorch XLA 34 | if cosine_sim.max() < delta / math.sqrt(param_view.size(1)): 35 | p_n = p / param_view.norm(p=2, dim=1).add_(eps).reshape(expand_size) 36 | perturb -= p_n * view_func(p_n * perturb).sum(dim=1).reshape(expand_size) 37 | wd = wd_ratio 38 | return perturb, wd 39 | 40 | return perturb, wd 41 | 42 | 43 | class AdamP(Optimizer): 44 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 45 | weight_decay=0, delta=0.1, wd_ratio=0.1, nesterov=False): 46 | defaults = dict( 47 | lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, 48 | delta=delta, wd_ratio=wd_ratio, nesterov=nesterov) 49 | super(AdamP, self).__init__(params, defaults) 50 | 51 | @torch.no_grad() 52 | def step(self, closure=None): 53 | loss = None 54 | if closure is not None: 55 | with torch.enable_grad(): 56 | loss = closure() 57 | 58 | for group in self.param_groups: 59 | for p in group['params']: 60 | if p.grad is None: 61 | continue 62 | 63 | grad = p.grad 64 | beta1, beta2 = group['betas'] 65 | nesterov = group['nesterov'] 66 | 67 | state = self.state[p] 68 | 69 | # State initialization 70 | if len(state) == 0: 71 | state['step'] = 0 72 | state['exp_avg'] = torch.zeros_like(p) 73 | state['exp_avg_sq'] = torch.zeros_like(p) 74 | 75 | # Adam 76 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 77 | 78 | state['step'] += 1 79 | bias_correction1 = 1 - beta1 ** state['step'] 80 | bias_correction2 = 1 - beta2 ** state['step'] 81 | 82 | exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) 83 | exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2) 84 | 85 | denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) 86 | step_size = group['lr'] / bias_correction1 87 | 88 | if nesterov: 89 | perturb = (beta1 * exp_avg + (1 - beta1) * grad) / denom 90 | else: 91 | perturb = exp_avg / denom 92 | 93 | # Projection 94 | wd_ratio = 1. 95 | if len(p.shape) > 1: 96 | perturb, wd_ratio = projection(p, grad, perturb, group['delta'], group['wd_ratio'], group['eps']) 97 | 98 | # Weight decay 99 | if group['weight_decay'] > 0: 100 | p.mul_(1. - group['lr'] * group['weight_decay'] * wd_ratio) 101 | 102 | # Step 103 | p.add_(perturb, alpha=-step_size) 104 | 105 | return loss 106 | -------------------------------------------------------------------------------- /timm/models/convmixer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 3 | from timm.models.registry import register_model 4 | from .helpers import build_model_with_cfg 5 | 6 | 7 | def _cfg(url='', **kwargs): 8 | return { 9 | 'url': url, 10 | 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None, 11 | 'crop_pct': .96, 'interpolation': 'bicubic', 12 | 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD, 'classifier': 'head', 13 | 'first_conv': 'stem.0', 14 | **kwargs 15 | } 16 | 17 | 18 | default_cfgs = { 19 | 'convmixer_1536_20': _cfg(url='https://github.com/tmp-iclr/convmixer/releases/download/timm-v1.0/convmixer_1536_20_ks9_p7.pth.tar'), 20 | 'convmixer_768_32': _cfg(url='https://github.com/tmp-iclr/convmixer/releases/download/timm-v1.0/convmixer_768_32_ks7_p7_relu.pth.tar'), 21 | 'convmixer_1024_20_ks9_p14': _cfg(url='https://github.com/tmp-iclr/convmixer/releases/download/timm-v1.0/convmixer_1024_20_ks9_p14.pth.tar') 22 | } 23 | 24 | 25 | class Residual(nn.Module): 26 | def __init__(self, fn): 27 | super().__init__() 28 | self.fn = fn 29 | 30 | def forward(self, x): 31 | return self.fn(x) + x 32 | 33 | 34 | class ConvMixer(nn.Module): 35 | def __init__(self, dim, depth, kernel_size=9, patch_size=7, in_chans=3, num_classes=1000, activation=nn.GELU, **kwargs): 36 | super().__init__() 37 | self.num_classes = num_classes 38 | self.num_features = dim 39 | self.head = nn.Linear(dim, num_classes) if num_classes > 0 else nn.Identity() 40 | self.stem = nn.Sequential( 41 | nn.Conv2d(in_chans, dim, kernel_size=patch_size, stride=patch_size), 42 | activation(), 43 | nn.BatchNorm2d(dim) 44 | ) 45 | self.blocks = nn.Sequential( 46 | *[nn.Sequential( 47 | Residual(nn.Sequential( 48 | nn.Conv2d(dim, dim, kernel_size, groups=dim, padding="same"), 49 | activation(), 50 | nn.BatchNorm2d(dim) 51 | )), 52 | nn.Conv2d(dim, dim, kernel_size=1), 53 | activation(), 54 | nn.BatchNorm2d(dim) 55 | ) for i in range(depth)] 56 | ) 57 | self.pooling = nn.Sequential( 58 | nn.AdaptiveAvgPool2d((1, 1)), 59 | nn.Flatten() 60 | ) 61 | 62 | def get_classifier(self): 63 | return self.head 64 | 65 | def reset_classifier(self, num_classes, global_pool=''): 66 | self.num_classes = num_classes 67 | self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity() 68 | 69 | def forward_features(self, x): 70 | x = self.stem(x) 71 | x = self.blocks(x) 72 | x = self.pooling(x) 73 | return x 74 | 75 | def forward(self, x): 76 | x = self.forward_features(x) 77 | x = self.head(x) 78 | 79 | return x 80 | 81 | 82 | def _create_convmixer(variant, pretrained=False, **kwargs): 83 | return build_model_with_cfg(ConvMixer, variant, pretrained, default_cfg=default_cfgs[variant], **kwargs) 84 | 85 | 86 | @register_model 87 | def convmixer_1536_20(pretrained=False, **kwargs): 88 | model_args = dict(dim=1536, depth=20, kernel_size=9, patch_size=7, **kwargs) 89 | return _create_convmixer('convmixer_1536_20', pretrained, **model_args) 90 | 91 | 92 | @register_model 93 | def convmixer_768_32(pretrained=False, **kwargs): 94 | model_args = dict(dim=768, depth=32, kernel_size=7, patch_size=7, activation=nn.ReLU, **kwargs) 95 | return _create_convmixer('convmixer_768_32', pretrained, **model_args) 96 | 97 | 98 | @register_model 99 | def convmixer_1024_20_ks9_p14(pretrained=False, **kwargs): 100 | model_args = dict(dim=1024, depth=20, kernel_size=9, patch_size=14, **kwargs) 101 | return _create_convmixer('convmixer_1024_20_ks9_p14', pretrained, **model_args) -------------------------------------------------------------------------------- /timm/models/layers/gather_excite.py: -------------------------------------------------------------------------------- 1 | """ Gather-Excite Attention Block 2 | 3 | Paper: `Gather-Excite: Exploiting Feature Context in CNNs` - https://arxiv.org/abs/1810.12348 4 | 5 | Official code here, but it's only partial impl in Caffe: https://github.com/hujie-frank/GENet 6 | 7 | I've tried to support all of the extent both w/ and w/o params. I don't believe I've seen another 8 | impl that covers all of the cases. 9 | 10 | NOTE: extent=0 + extra_params=False is equivalent to Squeeze-and-Excitation 11 | 12 | Hacked together by / Copyright 2021 Ross Wightman 13 | """ 14 | import math 15 | 16 | from torch import nn as nn 17 | import torch.nn.functional as F 18 | 19 | from .create_act import create_act_layer, get_act_layer 20 | from .create_conv2d import create_conv2d 21 | from .helpers import make_divisible 22 | from .mlp import ConvMlp 23 | 24 | 25 | class GatherExcite(nn.Module): 26 | """ Gather-Excite Attention Module 27 | """ 28 | def __init__( 29 | self, channels, feat_size=None, extra_params=False, extent=0, use_mlp=True, 30 | rd_ratio=1./16, rd_channels=None, rd_divisor=1, add_maxpool=False, 31 | act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, gate_layer='sigmoid'): 32 | super(GatherExcite, self).__init__() 33 | self.add_maxpool = add_maxpool 34 | act_layer = get_act_layer(act_layer) 35 | self.extent = extent 36 | if extra_params: 37 | self.gather = nn.Sequential() 38 | if extent == 0: 39 | assert feat_size is not None, 'spatial feature size must be specified for global extent w/ params' 40 | self.gather.add_module( 41 | 'conv1', create_conv2d(channels, channels, kernel_size=feat_size, stride=1, depthwise=True)) 42 | if norm_layer: 43 | self.gather.add_module(f'norm1', nn.BatchNorm2d(channels)) 44 | else: 45 | assert extent % 2 == 0 46 | num_conv = int(math.log2(extent)) 47 | for i in range(num_conv): 48 | self.gather.add_module( 49 | f'conv{i + 1}', 50 | create_conv2d(channels, channels, kernel_size=3, stride=2, depthwise=True)) 51 | if norm_layer: 52 | self.gather.add_module(f'norm{i + 1}', nn.BatchNorm2d(channels)) 53 | if i != num_conv - 1: 54 | self.gather.add_module(f'act{i + 1}', act_layer(inplace=True)) 55 | else: 56 | self.gather = None 57 | if self.extent == 0: 58 | self.gk = 0 59 | self.gs = 0 60 | else: 61 | assert extent % 2 == 0 62 | self.gk = self.extent * 2 - 1 63 | self.gs = self.extent 64 | 65 | if not rd_channels: 66 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 67 | self.mlp = ConvMlp(channels, rd_channels, act_layer=act_layer) if use_mlp else nn.Identity() 68 | self.gate = create_act_layer(gate_layer) 69 | 70 | def forward(self, x): 71 | size = x.shape[-2:] 72 | if self.gather is not None: 73 | x_ge = self.gather(x) 74 | else: 75 | if self.extent == 0: 76 | # global extent 77 | x_ge = x.mean(dim=(2, 3), keepdims=True) 78 | if self.add_maxpool: 79 | # experimental codepath, may remove or change 80 | x_ge = 0.5 * x_ge + 0.5 * x.amax((2, 3), keepdim=True) 81 | else: 82 | x_ge = F.avg_pool2d( 83 | x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2, count_include_pad=False) 84 | if self.add_maxpool: 85 | # experimental codepath, may remove or change 86 | x_ge = 0.5 * x_ge + 0.5 * F.max_pool2d(x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2) 87 | x_ge = self.mlp(x_ge) 88 | if x_ge.shape[-1] != 1 or x_ge.shape[-2] != 1: 89 | x_ge = F.interpolate(x_ge, size=size) 90 | return x * self.gate(x_ge) 91 | -------------------------------------------------------------------------------- /timm/optim/nadam.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.optim.optimizer import Optimizer 5 | 6 | 7 | class Nadam(Optimizer): 8 | """Implements Nadam algorithm (a variant of Adam based on Nesterov momentum). 9 | 10 | It has been proposed in `Incorporating Nesterov Momentum into Adam`__. 11 | 12 | Arguments: 13 | params (iterable): iterable of parameters to optimize or dicts defining 14 | parameter groups 15 | lr (float, optional): learning rate (default: 2e-3) 16 | betas (Tuple[float, float], optional): coefficients used for computing 17 | running averages of gradient and its square 18 | eps (float, optional): term added to the denominator to improve 19 | numerical stability (default: 1e-8) 20 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 21 | schedule_decay (float, optional): momentum schedule decay (default: 4e-3) 22 | 23 | __ http://cs229.stanford.edu/proj2015/054_report.pdf 24 | __ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf 25 | 26 | Originally taken from: https://github.com/pytorch/pytorch/pull/1408 27 | NOTE: Has potential issues but does work well on some problems. 28 | """ 29 | 30 | def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8, 31 | weight_decay=0, schedule_decay=4e-3): 32 | if not 0.0 <= lr: 33 | raise ValueError("Invalid learning rate: {}".format(lr)) 34 | defaults = dict( 35 | lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, schedule_decay=schedule_decay) 36 | super(Nadam, self).__init__(params, defaults) 37 | 38 | @torch.no_grad() 39 | def step(self, closure=None): 40 | """Performs a single optimization step. 41 | 42 | Arguments: 43 | closure (callable, optional): A closure that reevaluates the model 44 | and returns the loss. 45 | """ 46 | loss = None 47 | if closure is not None: 48 | with torch.enable_grad(): 49 | loss = closure() 50 | 51 | for group in self.param_groups: 52 | for p in group['params']: 53 | if p.grad is None: 54 | continue 55 | grad = p.grad 56 | state = self.state[p] 57 | 58 | # State initialization 59 | if len(state) == 0: 60 | state['step'] = 0 61 | state['m_schedule'] = 1. 62 | state['exp_avg'] = torch.zeros_like(p) 63 | state['exp_avg_sq'] = torch.zeros_like(p) 64 | 65 | # Warming momentum schedule 66 | m_schedule = state['m_schedule'] 67 | schedule_decay = group['schedule_decay'] 68 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 69 | beta1, beta2 = group['betas'] 70 | eps = group['eps'] 71 | state['step'] += 1 72 | t = state['step'] 73 | bias_correction2 = 1 - beta2 ** t 74 | 75 | if group['weight_decay'] != 0: 76 | grad = grad.add(p, alpha=group['weight_decay']) 77 | 78 | momentum_cache_t = beta1 * (1. - 0.5 * (0.96 ** (t * schedule_decay))) 79 | momentum_cache_t_1 = beta1 * (1. - 0.5 * (0.96 ** ((t + 1) * schedule_decay))) 80 | m_schedule_new = m_schedule * momentum_cache_t 81 | m_schedule_next = m_schedule * momentum_cache_t * momentum_cache_t_1 82 | state['m_schedule'] = m_schedule_new 83 | 84 | # Decay the first and second moment running average coefficient 85 | exp_avg.mul_(beta1).add_(grad, alpha=1. - beta1) 86 | exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1. - beta2) 87 | 88 | denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(eps) 89 | p.addcdiv_(grad, denom, value=-group['lr'] * (1. - momentum_cache_t) / (1. - m_schedule_new)) 90 | p.addcdiv_(exp_avg, denom, value=-group['lr'] * momentum_cache_t_1 / (1. - m_schedule_next)) 91 | 92 | return loss 93 | -------------------------------------------------------------------------------- /timm/scheduler/scheduler_factory.py: -------------------------------------------------------------------------------- 1 | """ Scheduler Factory 2 | Hacked together by / Copyright 2021 Ross Wightman 3 | """ 4 | from .cosine_lr import CosineLRScheduler 5 | from .multistep_lr import MultiStepLRScheduler 6 | from .plateau_lr import PlateauLRScheduler 7 | from .poly_lr import PolyLRScheduler 8 | from .step_lr import StepLRScheduler 9 | from .tanh_lr import TanhLRScheduler 10 | 11 | 12 | def create_scheduler(args, optimizer): 13 | num_epochs = args.epochs 14 | 15 | if getattr(args, 'lr_noise', None) is not None: 16 | lr_noise = getattr(args, 'lr_noise') 17 | if isinstance(lr_noise, (list, tuple)): 18 | noise_range = [n * num_epochs for n in lr_noise] 19 | if len(noise_range) == 1: 20 | noise_range = noise_range[0] 21 | else: 22 | noise_range = lr_noise * num_epochs 23 | else: 24 | noise_range = None 25 | noise_args = dict( 26 | noise_range_t=noise_range, 27 | noise_pct=getattr(args, 'lr_noise_pct', 0.67), 28 | noise_std=getattr(args, 'lr_noise_std', 1.), 29 | noise_seed=getattr(args, 'seed', 42), 30 | ) 31 | cycle_args = dict( 32 | cycle_mul=getattr(args, 'lr_cycle_mul', 1.), 33 | cycle_decay=getattr(args, 'lr_cycle_decay', 0.1), 34 | cycle_limit=getattr(args, 'lr_cycle_limit', 1), 35 | ) 36 | 37 | lr_scheduler = None 38 | if args.sched == 'cosine': 39 | lr_scheduler = CosineLRScheduler( 40 | optimizer, 41 | t_initial=num_epochs, 42 | lr_min=args.min_lr, 43 | warmup_lr_init=args.warmup_lr, 44 | warmup_t=args.warmup_epochs, 45 | k_decay=getattr(args, 'lr_k_decay', 1.0), 46 | **cycle_args, 47 | **noise_args, 48 | ) 49 | num_epochs = lr_scheduler.get_cycle_length() + args.cooldown_epochs 50 | elif args.sched == 'tanh': 51 | lr_scheduler = TanhLRScheduler( 52 | optimizer, 53 | t_initial=num_epochs, 54 | lr_min=args.min_lr, 55 | warmup_lr_init=args.warmup_lr, 56 | warmup_t=args.warmup_epochs, 57 | t_in_epochs=True, 58 | **cycle_args, 59 | **noise_args, 60 | ) 61 | num_epochs = lr_scheduler.get_cycle_length() + args.cooldown_epochs 62 | elif args.sched == 'step': 63 | lr_scheduler = StepLRScheduler( 64 | optimizer, 65 | decay_t=args.decay_epochs, 66 | decay_rate=args.decay_rate, 67 | warmup_lr_init=args.warmup_lr, 68 | warmup_t=args.warmup_epochs, 69 | **noise_args, 70 | ) 71 | elif args.sched == 'multistep': 72 | lr_scheduler = MultiStepLRScheduler( 73 | optimizer, 74 | decay_t=args.decay_epochs, 75 | decay_rate=args.decay_rate, 76 | warmup_lr_init=args.warmup_lr, 77 | warmup_t=args.warmup_epochs, 78 | **noise_args, 79 | ) 80 | elif args.sched == 'plateau': 81 | mode = 'min' if 'loss' in getattr(args, 'eval_metric', '') else 'max' 82 | lr_scheduler = PlateauLRScheduler( 83 | optimizer, 84 | decay_rate=args.decay_rate, 85 | patience_t=args.patience_epochs, 86 | lr_min=args.min_lr, 87 | mode=mode, 88 | warmup_lr_init=args.warmup_lr, 89 | warmup_t=args.warmup_epochs, 90 | cooldown_t=0, 91 | **noise_args, 92 | ) 93 | elif args.sched == 'poly': 94 | lr_scheduler = PolyLRScheduler( 95 | optimizer, 96 | power=args.decay_rate, # overloading 'decay_rate' as polynomial power 97 | t_initial=num_epochs, 98 | lr_min=args.min_lr, 99 | warmup_lr_init=args.warmup_lr, 100 | warmup_t=args.warmup_epochs, 101 | k_decay=getattr(args, 'lr_k_decay', 1.0), 102 | **cycle_args, 103 | **noise_args, 104 | ) 105 | num_epochs = lr_scheduler.get_cycle_length() + args.cooldown_epochs 106 | 107 | return lr_scheduler, num_epochs 108 | -------------------------------------------------------------------------------- /predict_one_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | import random 5 | import cv2 6 | 7 | from torchvision import transforms 8 | from models.maniqa import MANIQA 9 | from torch.utils.data import DataLoader 10 | from config import Config 11 | from utils.inference_process import ToTensor, Normalize 12 | from tqdm import tqdm 13 | 14 | 15 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 16 | 17 | 18 | def setup_seed(seed): 19 | random.seed(seed) 20 | os.environ['PYTHONHASHSEED'] = str(seed) 21 | np.random.seed(seed) 22 | torch.manual_seed(seed) 23 | torch.cuda.manual_seed(seed) 24 | torch.cuda.manual_seed_all(seed) 25 | torch.backends.cudnn.benchmark = False 26 | torch.backends.cudnn.deterministic = True 27 | 28 | 29 | class Image(torch.utils.data.Dataset): 30 | def __init__(self, image_path, transform, num_crops=20): 31 | super(Image, self).__init__() 32 | self.img_name = image_path.split('/')[-1] 33 | self.img = cv2.imread(image_path, cv2.IMREAD_COLOR) 34 | self.img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) 35 | self.img = np.array(self.img).astype('float32') / 255 36 | self.img = np.transpose(self.img, (2, 0, 1)) 37 | 38 | self.transform = transform 39 | 40 | c, h, w = self.img.shape 41 | print(self.img.shape) 42 | new_h = 224 43 | new_w = 224 44 | 45 | self.img_patches = [] 46 | for i in range(num_crops): 47 | top = np.random.randint(0, h - new_h) 48 | left = np.random.randint(0, w - new_w) 49 | patch = self.img[:, top: top + new_h, left: left + new_w] 50 | self.img_patches.append(patch) 51 | 52 | self.img_patches = np.array(self.img_patches) 53 | 54 | def get_patch(self, idx): 55 | patch = self.img_patches[idx] 56 | sample = {'d_img_org': patch, 'score': 0, 'd_name': self.img_name} 57 | if self.transform: 58 | sample = self.transform(sample) 59 | return sample 60 | 61 | 62 | if __name__ == '__main__': 63 | cpu_num = 1 64 | os.environ['OMP_NUM_THREADS'] = str(cpu_num) 65 | os.environ['OPENBLAS_NUM_THREADS'] = str(cpu_num) 66 | os.environ['MKL_NUM_THREADS'] = str(cpu_num) 67 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(cpu_num) 68 | os.environ['NUMEXPR_NUM_THREADS'] = str(cpu_num) 69 | torch.set_num_threads(cpu_num) 70 | 71 | setup_seed(20) 72 | 73 | # config file 74 | config = Config({ 75 | # image path 76 | "image_path": "./test_images/kunkun.png", 77 | 78 | # valid times 79 | "num_crops": 20, 80 | 81 | # model 82 | "patch_size": 8, 83 | "img_size": 224, 84 | "embed_dim": 768, 85 | "dim_mlp": 768, 86 | "num_heads": [4, 4], 87 | "window_size": 4, 88 | "depths": [2, 2], 89 | "num_outputs": 1, 90 | "num_tab": 2, 91 | "scale": 0.8, 92 | 93 | # checkpoint path 94 | "ckpt_path": "./ckpt_koniq10k.pt", 95 | }) 96 | 97 | # data load 98 | Img = Image(image_path=config.image_path, 99 | transform=transforms.Compose([Normalize(0.5, 0.5), ToTensor()]), 100 | num_crops=config.num_crops) 101 | 102 | # model defination 103 | net = MANIQA(embed_dim=config.embed_dim, num_outputs=config.num_outputs, dim_mlp=config.dim_mlp, 104 | patch_size=config.patch_size, img_size=config.img_size, window_size=config.window_size, 105 | depths=config.depths, num_heads=config.num_heads, num_tab=config.num_tab, scale=config.scale) 106 | 107 | net.load_state_dict(torch.load(config.ckpt_path), strict=False) 108 | net = net.cuda() 109 | 110 | avg_score = 0 111 | for i in tqdm(range(config.num_crops)): 112 | with torch.no_grad(): 113 | net.eval() 114 | patch_sample = Img.get_patch(i) 115 | patch = patch_sample['d_img_org'].cuda() 116 | patch = patch.unsqueeze(0) 117 | score = net(patch) 118 | avg_score += score 119 | 120 | print("Image {} score: {}".format(Img.img_name, avg_score / config.num_crops)) 121 | 122 | -------------------------------------------------------------------------------- /timm/models/pruned/ecaresnet50d_pruned.txt: -------------------------------------------------------------------------------- 1 | conv1.0.weight:[32, 3, 3, 3]***conv1.1.weight:[32]***conv1.3.weight:[32, 32, 3, 3]***conv1.4.weight:[32]***conv1.6.weight:[64, 32, 3, 3]***bn1.weight:[64]***layer1.0.conv1.weight:[47, 64, 1, 1]***layer1.0.bn1.weight:[47]***layer1.0.conv2.weight:[18, 47, 3, 3]***layer1.0.bn2.weight:[18]***layer1.0.conv3.weight:[19, 18, 1, 1]***layer1.0.bn3.weight:[19]***layer1.0.se.conv.weight:[1, 1, 5]***layer1.0.downsample.1.weight:[19, 64, 1, 1]***layer1.0.downsample.2.weight:[19]***layer1.1.conv1.weight:[52, 19, 1, 1]***layer1.1.bn1.weight:[52]***layer1.1.conv2.weight:[22, 52, 3, 3]***layer1.1.bn2.weight:[22]***layer1.1.conv3.weight:[19, 22, 1, 1]***layer1.1.bn3.weight:[19]***layer1.1.se.conv.weight:[1, 1, 5]***layer1.2.conv1.weight:[64, 19, 1, 1]***layer1.2.bn1.weight:[64]***layer1.2.conv2.weight:[35, 64, 3, 3]***layer1.2.bn2.weight:[35]***layer1.2.conv3.weight:[19, 35, 1, 1]***layer1.2.bn3.weight:[19]***layer1.2.se.conv.weight:[1, 1, 5]***layer2.0.conv1.weight:[85, 19, 1, 1]***layer2.0.bn1.weight:[85]***layer2.0.conv2.weight:[37, 85, 3, 3]***layer2.0.bn2.weight:[37]***layer2.0.conv3.weight:[171, 37, 1, 1]***layer2.0.bn3.weight:[171]***layer2.0.se.conv.weight:[1, 1, 5]***layer2.0.downsample.1.weight:[171, 19, 1, 1]***layer2.0.downsample.2.weight:[171]***layer2.1.conv1.weight:[107, 171, 1, 1]***layer2.1.bn1.weight:[107]***layer2.1.conv2.weight:[80, 107, 3, 3]***layer2.1.bn2.weight:[80]***layer2.1.conv3.weight:[171, 80, 1, 1]***layer2.1.bn3.weight:[171]***layer2.1.se.conv.weight:[1, 1, 5]***layer2.2.conv1.weight:[120, 171, 1, 1]***layer2.2.bn1.weight:[120]***layer2.2.conv2.weight:[85, 120, 3, 3]***layer2.2.bn2.weight:[85]***layer2.2.conv3.weight:[171, 85, 1, 1]***layer2.2.bn3.weight:[171]***layer2.2.se.conv.weight:[1, 1, 5]***layer2.3.conv1.weight:[125, 171, 1, 1]***layer2.3.bn1.weight:[125]***layer2.3.conv2.weight:[87, 125, 3, 3]***layer2.3.bn2.weight:[87]***layer2.3.conv3.weight:[171, 87, 1, 1]***layer2.3.bn3.weight:[171]***layer2.3.se.conv.weight:[1, 1, 5]***layer3.0.conv1.weight:[198, 171, 1, 1]***layer3.0.bn1.weight:[198]***layer3.0.conv2.weight:[126, 198, 3, 3]***layer3.0.bn2.weight:[126]***layer3.0.conv3.weight:[818, 126, 1, 1]***layer3.0.bn3.weight:[818]***layer3.0.se.conv.weight:[1, 1, 5]***layer3.0.downsample.1.weight:[818, 171, 1, 1]***layer3.0.downsample.2.weight:[818]***layer3.1.conv1.weight:[255, 818, 1, 1]***layer3.1.bn1.weight:[255]***layer3.1.conv2.weight:[232, 255, 3, 3]***layer3.1.bn2.weight:[232]***layer3.1.conv3.weight:[818, 232, 1, 1]***layer3.1.bn3.weight:[818]***layer3.1.se.conv.weight:[1, 1, 5]***layer3.2.conv1.weight:[256, 818, 1, 1]***layer3.2.bn1.weight:[256]***layer3.2.conv2.weight:[233, 256, 3, 3]***layer3.2.bn2.weight:[233]***layer3.2.conv3.weight:[818, 233, 1, 1]***layer3.2.bn3.weight:[818]***layer3.2.se.conv.weight:[1, 1, 5]***layer3.3.conv1.weight:[253, 818, 1, 1]***layer3.3.bn1.weight:[253]***layer3.3.conv2.weight:[235, 253, 3, 3]***layer3.3.bn2.weight:[235]***layer3.3.conv3.weight:[818, 235, 1, 1]***layer3.3.bn3.weight:[818]***layer3.3.se.conv.weight:[1, 1, 5]***layer3.4.conv1.weight:[256, 818, 1, 1]***layer3.4.bn1.weight:[256]***layer3.4.conv2.weight:[225, 256, 3, 3]***layer3.4.bn2.weight:[225]***layer3.4.conv3.weight:[818, 225, 1, 1]***layer3.4.bn3.weight:[818]***layer3.4.se.conv.weight:[1, 1, 5]***layer3.5.conv1.weight:[256, 818, 1, 1]***layer3.5.bn1.weight:[256]***layer3.5.conv2.weight:[239, 256, 3, 3]***layer3.5.bn2.weight:[239]***layer3.5.conv3.weight:[818, 239, 1, 1]***layer3.5.bn3.weight:[818]***layer3.5.se.conv.weight:[1, 1, 5]***layer4.0.conv1.weight:[492, 818, 1, 1]***layer4.0.bn1.weight:[492]***layer4.0.conv2.weight:[237, 492, 3, 3]***layer4.0.bn2.weight:[237]***layer4.0.conv3.weight:[2022, 237, 1, 1]***layer4.0.bn3.weight:[2022]***layer4.0.se.conv.weight:[1, 1, 7]***layer4.0.downsample.1.weight:[2022, 818, 1, 1]***layer4.0.downsample.2.weight:[2022]***layer4.1.conv1.weight:[512, 2022, 1, 1]***layer4.1.bn1.weight:[512]***layer4.1.conv2.weight:[500, 512, 3, 3]***layer4.1.bn2.weight:[500]***layer4.1.conv3.weight:[2022, 500, 1, 1]***layer4.1.bn3.weight:[2022]***layer4.1.se.conv.weight:[1, 1, 7]***layer4.2.conv1.weight:[512, 2022, 1, 1]***layer4.2.bn1.weight:[512]***layer4.2.conv2.weight:[490, 512, 3, 3]***layer4.2.bn2.weight:[490]***layer4.2.conv3.weight:[2022, 490, 1, 1]***layer4.2.bn3.weight:[2022]***layer4.2.se.conv.weight:[1, 1, 7]***fc.weight:[1000, 2022]***layer1_2_conv3_M.weight:[256, 19]***layer2_3_conv3_M.weight:[512, 171]***layer3_5_conv3_M.weight:[1024, 818]***layer4_2_conv3_M.weight:[2048, 2022] -------------------------------------------------------------------------------- /timm/models/layers/adaptive_avgmax_pool.py: -------------------------------------------------------------------------------- 1 | """ PyTorch selectable adaptive pooling 2 | Adaptive pooling with the ability to select the type of pooling from: 3 | * 'avg' - Average pooling 4 | * 'max' - Max pooling 5 | * 'avgmax' - Sum of average and max pooling re-scaled by 0.5 6 | * 'avgmaxc' - Concatenation of average and max pooling along feature dim, doubles feature dim 7 | 8 | Both a functional and a nn.Module version of the pooling is provided. 9 | 10 | Hacked together by / Copyright 2020 Ross Wightman 11 | """ 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | 17 | def adaptive_pool_feat_mult(pool_type='avg'): 18 | if pool_type == 'catavgmax': 19 | return 2 20 | else: 21 | return 1 22 | 23 | 24 | def adaptive_avgmax_pool2d(x, output_size=1): 25 | x_avg = F.adaptive_avg_pool2d(x, output_size) 26 | x_max = F.adaptive_max_pool2d(x, output_size) 27 | return 0.5 * (x_avg + x_max) 28 | 29 | 30 | def adaptive_catavgmax_pool2d(x, output_size=1): 31 | x_avg = F.adaptive_avg_pool2d(x, output_size) 32 | x_max = F.adaptive_max_pool2d(x, output_size) 33 | return torch.cat((x_avg, x_max), 1) 34 | 35 | 36 | def select_adaptive_pool2d(x, pool_type='avg', output_size=1): 37 | """Selectable global pooling function with dynamic input kernel size 38 | """ 39 | if pool_type == 'avg': 40 | x = F.adaptive_avg_pool2d(x, output_size) 41 | elif pool_type == 'avgmax': 42 | x = adaptive_avgmax_pool2d(x, output_size) 43 | elif pool_type == 'catavgmax': 44 | x = adaptive_catavgmax_pool2d(x, output_size) 45 | elif pool_type == 'max': 46 | x = F.adaptive_max_pool2d(x, output_size) 47 | else: 48 | assert False, 'Invalid pool type: %s' % pool_type 49 | return x 50 | 51 | 52 | class FastAdaptiveAvgPool2d(nn.Module): 53 | def __init__(self, flatten=False): 54 | super(FastAdaptiveAvgPool2d, self).__init__() 55 | self.flatten = flatten 56 | 57 | def forward(self, x): 58 | return x.mean((2, 3), keepdim=not self.flatten) 59 | 60 | 61 | class AdaptiveAvgMaxPool2d(nn.Module): 62 | def __init__(self, output_size=1): 63 | super(AdaptiveAvgMaxPool2d, self).__init__() 64 | self.output_size = output_size 65 | 66 | def forward(self, x): 67 | return adaptive_avgmax_pool2d(x, self.output_size) 68 | 69 | 70 | class AdaptiveCatAvgMaxPool2d(nn.Module): 71 | def __init__(self, output_size=1): 72 | super(AdaptiveCatAvgMaxPool2d, self).__init__() 73 | self.output_size = output_size 74 | 75 | def forward(self, x): 76 | return adaptive_catavgmax_pool2d(x, self.output_size) 77 | 78 | 79 | class SelectAdaptivePool2d(nn.Module): 80 | """Selectable global pooling layer with dynamic input kernel size 81 | """ 82 | def __init__(self, output_size=1, pool_type='fast', flatten=False): 83 | super(SelectAdaptivePool2d, self).__init__() 84 | self.pool_type = pool_type or '' # convert other falsy values to empty string for consistent TS typing 85 | self.flatten = nn.Flatten(1) if flatten else nn.Identity() 86 | if pool_type == '': 87 | self.pool = nn.Identity() # pass through 88 | elif pool_type == 'fast': 89 | assert output_size == 1 90 | self.pool = FastAdaptiveAvgPool2d(flatten) 91 | self.flatten = nn.Identity() 92 | elif pool_type == 'avg': 93 | self.pool = nn.AdaptiveAvgPool2d(output_size) 94 | elif pool_type == 'avgmax': 95 | self.pool = AdaptiveAvgMaxPool2d(output_size) 96 | elif pool_type == 'catavgmax': 97 | self.pool = AdaptiveCatAvgMaxPool2d(output_size) 98 | elif pool_type == 'max': 99 | self.pool = nn.AdaptiveMaxPool2d(output_size) 100 | else: 101 | assert False, 'Invalid pool type: %s' % pool_type 102 | 103 | def is_identity(self): 104 | return not self.pool_type 105 | 106 | def forward(self, x): 107 | x = self.pool(x) 108 | x = self.flatten(x) 109 | return x 110 | 111 | def feat_mult(self): 112 | return adaptive_pool_feat_mult(self.pool_type) 113 | 114 | def __repr__(self): 115 | return self.__class__.__name__ + ' (' \ 116 | + 'pool_type=' + self.pool_type \ 117 | + ', flatten=' + str(self.flatten) + ')' 118 | 119 | -------------------------------------------------------------------------------- /timm/scheduler/tanh_lr.py: -------------------------------------------------------------------------------- 1 | """ TanH Scheduler 2 | 3 | TanH schedule with warmup, cycle/restarts, noise. 4 | 5 | Hacked together by / Copyright 2021 Ross Wightman 6 | """ 7 | import logging 8 | import math 9 | import numpy as np 10 | import torch 11 | 12 | from .scheduler import Scheduler 13 | 14 | 15 | _logger = logging.getLogger(__name__) 16 | 17 | 18 | class TanhLRScheduler(Scheduler): 19 | """ 20 | Hyberbolic-Tangent decay with restarts. 21 | This is described in the paper https://arxiv.org/abs/1806.01593 22 | """ 23 | 24 | def __init__(self, 25 | optimizer: torch.optim.Optimizer, 26 | t_initial: int, 27 | lb: float = -7., 28 | ub: float = 3., 29 | lr_min: float = 0., 30 | cycle_mul: float = 1., 31 | cycle_decay: float = 1., 32 | cycle_limit: int = 1, 33 | warmup_t=0, 34 | warmup_lr_init=0, 35 | warmup_prefix=False, 36 | t_in_epochs=True, 37 | noise_range_t=None, 38 | noise_pct=0.67, 39 | noise_std=1.0, 40 | noise_seed=42, 41 | initialize=True) -> None: 42 | super().__init__( 43 | optimizer, param_group_field="lr", 44 | noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed, 45 | initialize=initialize) 46 | 47 | assert t_initial > 0 48 | assert lr_min >= 0 49 | assert lb < ub 50 | assert cycle_limit >= 0 51 | assert warmup_t >= 0 52 | assert warmup_lr_init >= 0 53 | self.lb = lb 54 | self.ub = ub 55 | self.t_initial = t_initial 56 | self.lr_min = lr_min 57 | self.cycle_mul = cycle_mul 58 | self.cycle_decay = cycle_decay 59 | self.cycle_limit = cycle_limit 60 | self.warmup_t = warmup_t 61 | self.warmup_lr_init = warmup_lr_init 62 | self.warmup_prefix = warmup_prefix 63 | self.t_in_epochs = t_in_epochs 64 | if self.warmup_t: 65 | t_v = self.base_values if self.warmup_prefix else self._get_lr(self.warmup_t) 66 | self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in t_v] 67 | super().update_groups(self.warmup_lr_init) 68 | else: 69 | self.warmup_steps = [1 for _ in self.base_values] 70 | 71 | def _get_lr(self, t): 72 | if t < self.warmup_t: 73 | lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps] 74 | else: 75 | if self.warmup_prefix: 76 | t = t - self.warmup_t 77 | 78 | if self.cycle_mul != 1: 79 | i = math.floor(math.log(1 - t / self.t_initial * (1 - self.cycle_mul), self.cycle_mul)) 80 | t_i = self.cycle_mul ** i * self.t_initial 81 | t_curr = t - (1 - self.cycle_mul ** i) / (1 - self.cycle_mul) * self.t_initial 82 | else: 83 | i = t // self.t_initial 84 | t_i = self.t_initial 85 | t_curr = t - (self.t_initial * i) 86 | 87 | if i < self.cycle_limit: 88 | gamma = self.cycle_decay ** i 89 | lr_max_values = [v * gamma for v in self.base_values] 90 | 91 | tr = t_curr / t_i 92 | lrs = [ 93 | self.lr_min + 0.5 * (lr_max - self.lr_min) * (1 - math.tanh(self.lb * (1. - tr) + self.ub * tr)) 94 | for lr_max in lr_max_values 95 | ] 96 | else: 97 | lrs = [self.lr_min for _ in self.base_values] 98 | return lrs 99 | 100 | def get_epoch_values(self, epoch: int): 101 | if self.t_in_epochs: 102 | return self._get_lr(epoch) 103 | else: 104 | return None 105 | 106 | def get_update_values(self, num_updates: int): 107 | if not self.t_in_epochs: 108 | return self._get_lr(num_updates) 109 | else: 110 | return None 111 | 112 | def get_cycle_length(self, cycles=0): 113 | cycles = max(1, cycles or self.cycle_limit) 114 | if self.cycle_mul == 1.0: 115 | return self.t_initial * cycles 116 | else: 117 | return int(math.floor(-self.t_initial * (self.cycle_mul ** cycles - 1) / (1 - self.cycle_mul))) 118 | -------------------------------------------------------------------------------- /timm/scheduler/poly_lr.py: -------------------------------------------------------------------------------- 1 | """ Polynomial Scheduler 2 | 3 | Polynomial LR schedule with warmup, noise. 4 | 5 | Hacked together by / Copyright 2021 Ross Wightman 6 | """ 7 | import math 8 | import logging 9 | 10 | import torch 11 | 12 | from .scheduler import Scheduler 13 | 14 | 15 | _logger = logging.getLogger(__name__) 16 | 17 | 18 | class PolyLRScheduler(Scheduler): 19 | """ Polynomial LR Scheduler w/ warmup, noise, and k-decay 20 | 21 | k-decay option based on `k-decay: A New Method For Learning Rate Schedule` - https://arxiv.org/abs/2004.05909 22 | """ 23 | 24 | def __init__(self, 25 | optimizer: torch.optim.Optimizer, 26 | t_initial: int, 27 | power: float = 0.5, 28 | lr_min: float = 0., 29 | cycle_mul: float = 1., 30 | cycle_decay: float = 1., 31 | cycle_limit: int = 1, 32 | warmup_t=0, 33 | warmup_lr_init=0, 34 | warmup_prefix=False, 35 | t_in_epochs=True, 36 | noise_range_t=None, 37 | noise_pct=0.67, 38 | noise_std=1.0, 39 | noise_seed=42, 40 | k_decay=1.0, 41 | initialize=True) -> None: 42 | super().__init__( 43 | optimizer, param_group_field="lr", 44 | noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed, 45 | initialize=initialize) 46 | 47 | assert t_initial > 0 48 | assert lr_min >= 0 49 | if t_initial == 1 and cycle_mul == 1 and cycle_decay == 1: 50 | _logger.warning("Cosine annealing scheduler will have no effect on the learning " 51 | "rate since t_initial = t_mul = eta_mul = 1.") 52 | self.t_initial = t_initial 53 | self.power = power 54 | self.lr_min = lr_min 55 | self.cycle_mul = cycle_mul 56 | self.cycle_decay = cycle_decay 57 | self.cycle_limit = cycle_limit 58 | self.warmup_t = warmup_t 59 | self.warmup_lr_init = warmup_lr_init 60 | self.warmup_prefix = warmup_prefix 61 | self.t_in_epochs = t_in_epochs 62 | self.k_decay = k_decay 63 | if self.warmup_t: 64 | self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values] 65 | super().update_groups(self.warmup_lr_init) 66 | else: 67 | self.warmup_steps = [1 for _ in self.base_values] 68 | 69 | def _get_lr(self, t): 70 | if t < self.warmup_t: 71 | lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps] 72 | else: 73 | if self.warmup_prefix: 74 | t = t - self.warmup_t 75 | 76 | if self.cycle_mul != 1: 77 | i = math.floor(math.log(1 - t / self.t_initial * (1 - self.cycle_mul), self.cycle_mul)) 78 | t_i = self.cycle_mul ** i * self.t_initial 79 | t_curr = t - (1 - self.cycle_mul ** i) / (1 - self.cycle_mul) * self.t_initial 80 | else: 81 | i = t // self.t_initial 82 | t_i = self.t_initial 83 | t_curr = t - (self.t_initial * i) 84 | 85 | gamma = self.cycle_decay ** i 86 | lr_max_values = [v * gamma for v in self.base_values] 87 | k = self.k_decay 88 | 89 | if i < self.cycle_limit: 90 | lrs = [ 91 | self.lr_min + (lr_max - self.lr_min) * (1 - t_curr ** k / t_i ** k) ** self.power 92 | for lr_max in lr_max_values 93 | ] 94 | else: 95 | lrs = [self.lr_min for _ in self.base_values] 96 | 97 | return lrs 98 | 99 | def get_epoch_values(self, epoch: int): 100 | if self.t_in_epochs: 101 | return self._get_lr(epoch) 102 | else: 103 | return None 104 | 105 | def get_update_values(self, num_updates: int): 106 | if not self.t_in_epochs: 107 | return self._get_lr(num_updates) 108 | else: 109 | return None 110 | 111 | def get_cycle_length(self, cycles=0): 112 | cycles = max(1, cycles or self.cycle_limit) 113 | if self.cycle_mul == 1.0: 114 | return self.t_initial * cycles 115 | else: 116 | return int(math.floor(-self.t_initial * (self.cycle_mul ** cycles - 1) / (1 - self.cycle_mul))) 117 | -------------------------------------------------------------------------------- /timm/models/layers/mlp.py: -------------------------------------------------------------------------------- 1 | """ MLP module w/ dropout and configurable activation layer 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from torch import nn as nn 6 | 7 | from .helpers import to_2tuple 8 | 9 | 10 | class Mlp(nn.Module): 11 | """ MLP as used in Vision Transformer, MLP-Mixer and related networks 12 | """ 13 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): 14 | super().__init__() 15 | out_features = out_features or in_features 16 | hidden_features = hidden_features or in_features 17 | drop_probs = to_2tuple(drop) 18 | 19 | self.fc1 = nn.Linear(in_features, hidden_features) 20 | self.act = act_layer() 21 | self.drop1 = nn.Dropout(drop_probs[0]) 22 | self.fc2 = nn.Linear(hidden_features, out_features) 23 | self.drop2 = nn.Dropout(drop_probs[1]) 24 | 25 | def forward(self, x): 26 | x = self.fc1(x) 27 | x = self.act(x) 28 | x = self.drop1(x) 29 | x = self.fc2(x) 30 | x = self.drop2(x) 31 | return x 32 | 33 | 34 | class GluMlp(nn.Module): 35 | """ MLP w/ GLU style gating 36 | See: https://arxiv.org/abs/1612.08083, https://arxiv.org/abs/2002.05202 37 | """ 38 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.Sigmoid, drop=0.): 39 | super().__init__() 40 | out_features = out_features or in_features 41 | hidden_features = hidden_features or in_features 42 | assert hidden_features % 2 == 0 43 | drop_probs = to_2tuple(drop) 44 | 45 | self.fc1 = nn.Linear(in_features, hidden_features) 46 | self.act = act_layer() 47 | self.drop1 = nn.Dropout(drop_probs[0]) 48 | self.fc2 = nn.Linear(hidden_features // 2, out_features) 49 | self.drop2 = nn.Dropout(drop_probs[1]) 50 | 51 | def init_weights(self): 52 | # override init of fc1 w/ gate portion set to weight near zero, bias=1 53 | fc1_mid = self.fc1.bias.shape[0] // 2 54 | nn.init.ones_(self.fc1.bias[fc1_mid:]) 55 | nn.init.normal_(self.fc1.weight[fc1_mid:], std=1e-6) 56 | 57 | def forward(self, x): 58 | x = self.fc1(x) 59 | x, gates = x.chunk(2, dim=-1) 60 | x = x * self.act(gates) 61 | x = self.drop1(x) 62 | x = self.fc2(x) 63 | x = self.drop2(x) 64 | return x 65 | 66 | 67 | class GatedMlp(nn.Module): 68 | """ MLP as used in gMLP 69 | """ 70 | def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, 71 | gate_layer=None, drop=0.): 72 | super().__init__() 73 | out_features = out_features or in_features 74 | hidden_features = hidden_features or in_features 75 | drop_probs = to_2tuple(drop) 76 | 77 | self.fc1 = nn.Linear(in_features, hidden_features) 78 | self.act = act_layer() 79 | self.drop1 = nn.Dropout(drop_probs[0]) 80 | if gate_layer is not None: 81 | assert hidden_features % 2 == 0 82 | self.gate = gate_layer(hidden_features) 83 | hidden_features = hidden_features // 2 # FIXME base reduction on gate property? 84 | else: 85 | self.gate = nn.Identity() 86 | self.fc2 = nn.Linear(hidden_features, out_features) 87 | self.drop2 = nn.Dropout(drop_probs[1]) 88 | 89 | def forward(self, x): 90 | x = self.fc1(x) 91 | x = self.act(x) 92 | x = self.drop1(x) 93 | x = self.gate(x) 94 | x = self.fc2(x) 95 | x = self.drop2(x) 96 | return x 97 | 98 | 99 | class ConvMlp(nn.Module): 100 | """ MLP using 1x1 convs that keeps spatial dims 101 | """ 102 | def __init__( 103 | self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU, norm_layer=None, drop=0.): 104 | super().__init__() 105 | out_features = out_features or in_features 106 | hidden_features = hidden_features or in_features 107 | self.fc1 = nn.Conv2d(in_features, hidden_features, kernel_size=1, bias=True) 108 | self.norm = norm_layer(hidden_features) if norm_layer else nn.Identity() 109 | self.act = act_layer() 110 | self.fc2 = nn.Conv2d(hidden_features, out_features, kernel_size=1, bias=True) 111 | self.drop = nn.Dropout(drop) 112 | 113 | def forward(self, x): 114 | x = self.fc1(x) 115 | x = self.norm(x) 116 | x = self.act(x) 117 | x = self.drop(x) 118 | x = self.fc2(x) 119 | return x 120 | -------------------------------------------------------------------------------- /timm/scheduler/plateau_lr.py: -------------------------------------------------------------------------------- 1 | """ Plateau Scheduler 2 | 3 | Adapts PyTorch plateau scheduler and allows application of noise, warmup. 4 | 5 | Hacked together by / Copyright 2020 Ross Wightman 6 | """ 7 | import torch 8 | 9 | from .scheduler import Scheduler 10 | 11 | 12 | class PlateauLRScheduler(Scheduler): 13 | """Decay the LR by a factor every time the validation loss plateaus.""" 14 | 15 | def __init__(self, 16 | optimizer, 17 | decay_rate=0.1, 18 | patience_t=10, 19 | verbose=True, 20 | threshold=1e-4, 21 | cooldown_t=0, 22 | warmup_t=0, 23 | warmup_lr_init=0, 24 | lr_min=0, 25 | mode='max', 26 | noise_range_t=None, 27 | noise_type='normal', 28 | noise_pct=0.67, 29 | noise_std=1.0, 30 | noise_seed=None, 31 | initialize=True, 32 | ): 33 | super().__init__(optimizer, 'lr', initialize=initialize) 34 | 35 | self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 36 | self.optimizer, 37 | patience=patience_t, 38 | factor=decay_rate, 39 | verbose=verbose, 40 | threshold=threshold, 41 | cooldown=cooldown_t, 42 | mode=mode, 43 | min_lr=lr_min 44 | ) 45 | 46 | self.noise_range = noise_range_t 47 | self.noise_pct = noise_pct 48 | self.noise_type = noise_type 49 | self.noise_std = noise_std 50 | self.noise_seed = noise_seed if noise_seed is not None else 42 51 | self.warmup_t = warmup_t 52 | self.warmup_lr_init = warmup_lr_init 53 | if self.warmup_t: 54 | self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values] 55 | super().update_groups(self.warmup_lr_init) 56 | else: 57 | self.warmup_steps = [1 for _ in self.base_values] 58 | self.restore_lr = None 59 | 60 | def state_dict(self): 61 | return { 62 | 'best': self.lr_scheduler.best, 63 | 'last_epoch': self.lr_scheduler.last_epoch, 64 | } 65 | 66 | def load_state_dict(self, state_dict): 67 | self.lr_scheduler.best = state_dict['best'] 68 | if 'last_epoch' in state_dict: 69 | self.lr_scheduler.last_epoch = state_dict['last_epoch'] 70 | 71 | # override the base class step fn completely 72 | def step(self, epoch, metric=None): 73 | if epoch <= self.warmup_t: 74 | lrs = [self.warmup_lr_init + epoch * s for s in self.warmup_steps] 75 | super().update_groups(lrs) 76 | else: 77 | if self.restore_lr is not None: 78 | # restore actual LR from before our last noise perturbation before stepping base 79 | for i, param_group in enumerate(self.optimizer.param_groups): 80 | param_group['lr'] = self.restore_lr[i] 81 | self.restore_lr = None 82 | 83 | self.lr_scheduler.step(metric, epoch) # step the base scheduler 84 | 85 | if self.noise_range is not None: 86 | if isinstance(self.noise_range, (list, tuple)): 87 | apply_noise = self.noise_range[0] <= epoch < self.noise_range[1] 88 | else: 89 | apply_noise = epoch >= self.noise_range 90 | if apply_noise: 91 | self._apply_noise(epoch) 92 | 93 | def _apply_noise(self, epoch): 94 | g = torch.Generator() 95 | g.manual_seed(self.noise_seed + epoch) 96 | if self.noise_type == 'normal': 97 | while True: 98 | # resample if noise out of percent limit, brute force but shouldn't spin much 99 | noise = torch.randn(1, generator=g).item() 100 | if abs(noise) < self.noise_pct: 101 | break 102 | else: 103 | noise = 2 * (torch.rand(1, generator=g).item() - 0.5) * self.noise_pct 104 | 105 | # apply the noise on top of previous LR, cache the old value so we can restore for normal 106 | # stepping of base scheduler 107 | restore_lr = [] 108 | for i, param_group in enumerate(self.optimizer.param_groups): 109 | old_lr = float(param_group['lr']) 110 | restore_lr.append(old_lr) 111 | new_lr = old_lr + old_lr * noise 112 | param_group['lr'] = new_lr 113 | self.restore_lr = restore_lr 114 | -------------------------------------------------------------------------------- /timm/scheduler/cosine_lr.py: -------------------------------------------------------------------------------- 1 | """ Cosine Scheduler 2 | 3 | Cosine LR schedule with warmup, cycle/restarts, noise, k-decay. 4 | 5 | Hacked together by / Copyright 2021 Ross Wightman 6 | """ 7 | import logging 8 | import math 9 | import numpy as np 10 | import torch 11 | 12 | from .scheduler import Scheduler 13 | 14 | 15 | _logger = logging.getLogger(__name__) 16 | 17 | 18 | class CosineLRScheduler(Scheduler): 19 | """ 20 | Cosine decay with restarts. 21 | This is described in the paper https://arxiv.org/abs/1608.03983. 22 | 23 | Inspiration from 24 | https://github.com/allenai/allennlp/blob/master/allennlp/training/learning_rate_schedulers/cosine.py 25 | 26 | k-decay option based on `k-decay: A New Method For Learning Rate Schedule` - https://arxiv.org/abs/2004.05909 27 | """ 28 | 29 | def __init__(self, 30 | optimizer: torch.optim.Optimizer, 31 | t_initial: int, 32 | lr_min: float = 0., 33 | cycle_mul: float = 1., 34 | cycle_decay: float = 1., 35 | cycle_limit: int = 1, 36 | warmup_t=0, 37 | warmup_lr_init=0, 38 | warmup_prefix=False, 39 | t_in_epochs=True, 40 | noise_range_t=None, 41 | noise_pct=0.67, 42 | noise_std=1.0, 43 | noise_seed=42, 44 | k_decay=1.0, 45 | initialize=True) -> None: 46 | super().__init__( 47 | optimizer, param_group_field="lr", 48 | noise_range_t=noise_range_t, noise_pct=noise_pct, noise_std=noise_std, noise_seed=noise_seed, 49 | initialize=initialize) 50 | 51 | assert t_initial > 0 52 | assert lr_min >= 0 53 | if t_initial == 1 and cycle_mul == 1 and cycle_decay == 1: 54 | _logger.warning("Cosine annealing scheduler will have no effect on the learning " 55 | "rate since t_initial = t_mul = eta_mul = 1.") 56 | self.t_initial = t_initial 57 | self.lr_min = lr_min 58 | self.cycle_mul = cycle_mul 59 | self.cycle_decay = cycle_decay 60 | self.cycle_limit = cycle_limit 61 | self.warmup_t = warmup_t 62 | self.warmup_lr_init = warmup_lr_init 63 | self.warmup_prefix = warmup_prefix 64 | self.t_in_epochs = t_in_epochs 65 | self.k_decay = k_decay 66 | if self.warmup_t: 67 | self.warmup_steps = [(v - warmup_lr_init) / self.warmup_t for v in self.base_values] 68 | super().update_groups(self.warmup_lr_init) 69 | else: 70 | self.warmup_steps = [1 for _ in self.base_values] 71 | 72 | def _get_lr(self, t): 73 | if t < self.warmup_t: 74 | lrs = [self.warmup_lr_init + t * s for s in self.warmup_steps] 75 | else: 76 | if self.warmup_prefix: 77 | t = t - self.warmup_t 78 | 79 | if self.cycle_mul != 1: 80 | i = math.floor(math.log(1 - t / self.t_initial * (1 - self.cycle_mul), self.cycle_mul)) 81 | t_i = self.cycle_mul ** i * self.t_initial 82 | t_curr = t - (1 - self.cycle_mul ** i) / (1 - self.cycle_mul) * self.t_initial 83 | else: 84 | i = t // self.t_initial 85 | t_i = self.t_initial 86 | t_curr = t - (self.t_initial * i) 87 | 88 | gamma = self.cycle_decay ** i 89 | lr_max_values = [v * gamma for v in self.base_values] 90 | k = self.k_decay 91 | 92 | if i < self.cycle_limit: 93 | lrs = [ 94 | self.lr_min + 0.5 * (lr_max - self.lr_min) * (1 + math.cos(math.pi * t_curr ** k / t_i ** k)) 95 | for lr_max in lr_max_values 96 | ] 97 | else: 98 | lrs = [self.lr_min for _ in self.base_values] 99 | 100 | return lrs 101 | 102 | def get_epoch_values(self, epoch: int): 103 | if self.t_in_epochs: 104 | return self._get_lr(epoch) 105 | else: 106 | return None 107 | 108 | def get_update_values(self, num_updates: int): 109 | if not self.t_in_epochs: 110 | return self._get_lr(num_updates) 111 | else: 112 | return None 113 | 114 | def get_cycle_length(self, cycles=0): 115 | cycles = max(1, cycles or self.cycle_limit) 116 | if self.cycle_mul == 1.0: 117 | return self.t_initial * cycles 118 | else: 119 | return int(math.floor(-self.t_initial * (self.cycle_mul ** cycles - 1) / (1 - self.cycle_mul))) 120 | -------------------------------------------------------------------------------- /timm/models/layers/activations.py: -------------------------------------------------------------------------------- 1 | """ Activations 2 | 3 | A collection of activations fn and modules with a common interface so that they can 4 | easily be swapped. All have an `inplace` arg even if not used. 5 | 6 | Hacked together by / Copyright 2020 Ross Wightman 7 | """ 8 | 9 | import torch 10 | from torch import nn as nn 11 | from torch.nn import functional as F 12 | 13 | 14 | def swish(x, inplace: bool = False): 15 | """Swish - Described in: https://arxiv.org/abs/1710.05941 16 | """ 17 | return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid()) 18 | 19 | 20 | class Swish(nn.Module): 21 | def __init__(self, inplace: bool = False): 22 | super(Swish, self).__init__() 23 | self.inplace = inplace 24 | 25 | def forward(self, x): 26 | return swish(x, self.inplace) 27 | 28 | 29 | def mish(x, inplace: bool = False): 30 | """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 31 | NOTE: I don't have a working inplace variant 32 | """ 33 | return x.mul(F.softplus(x).tanh()) 34 | 35 | 36 | class Mish(nn.Module): 37 | """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 38 | """ 39 | def __init__(self, inplace: bool = False): 40 | super(Mish, self).__init__() 41 | 42 | def forward(self, x): 43 | return mish(x) 44 | 45 | 46 | def sigmoid(x, inplace: bool = False): 47 | return x.sigmoid_() if inplace else x.sigmoid() 48 | 49 | 50 | # PyTorch has this, but not with a consistent inplace argmument interface 51 | class Sigmoid(nn.Module): 52 | def __init__(self, inplace: bool = False): 53 | super(Sigmoid, self).__init__() 54 | self.inplace = inplace 55 | 56 | def forward(self, x): 57 | return x.sigmoid_() if self.inplace else x.sigmoid() 58 | 59 | 60 | def tanh(x, inplace: bool = False): 61 | return x.tanh_() if inplace else x.tanh() 62 | 63 | 64 | # PyTorch has this, but not with a consistent inplace argmument interface 65 | class Tanh(nn.Module): 66 | def __init__(self, inplace: bool = False): 67 | super(Tanh, self).__init__() 68 | self.inplace = inplace 69 | 70 | def forward(self, x): 71 | return x.tanh_() if self.inplace else x.tanh() 72 | 73 | 74 | def hard_swish(x, inplace: bool = False): 75 | inner = F.relu6(x + 3.).div_(6.) 76 | return x.mul_(inner) if inplace else x.mul(inner) 77 | 78 | 79 | class HardSwish(nn.Module): 80 | def __init__(self, inplace: bool = False): 81 | super(HardSwish, self).__init__() 82 | self.inplace = inplace 83 | 84 | def forward(self, x): 85 | return hard_swish(x, self.inplace) 86 | 87 | 88 | def hard_sigmoid(x, inplace: bool = False): 89 | if inplace: 90 | return x.add_(3.).clamp_(0., 6.).div_(6.) 91 | else: 92 | return F.relu6(x + 3.) / 6. 93 | 94 | 95 | class HardSigmoid(nn.Module): 96 | def __init__(self, inplace: bool = False): 97 | super(HardSigmoid, self).__init__() 98 | self.inplace = inplace 99 | 100 | def forward(self, x): 101 | return hard_sigmoid(x, self.inplace) 102 | 103 | 104 | def hard_mish(x, inplace: bool = False): 105 | """ Hard Mish 106 | Experimental, based on notes by Mish author Diganta Misra at 107 | https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md 108 | """ 109 | if inplace: 110 | return x.mul_(0.5 * (x + 2).clamp(min=0, max=2)) 111 | else: 112 | return 0.5 * x * (x + 2).clamp(min=0, max=2) 113 | 114 | 115 | class HardMish(nn.Module): 116 | def __init__(self, inplace: bool = False): 117 | super(HardMish, self).__init__() 118 | self.inplace = inplace 119 | 120 | def forward(self, x): 121 | return hard_mish(x, self.inplace) 122 | 123 | 124 | class PReLU(nn.PReLU): 125 | """Applies PReLU (w/ dummy inplace arg) 126 | """ 127 | def __init__(self, num_parameters: int = 1, init: float = 0.25, inplace: bool = False) -> None: 128 | super(PReLU, self).__init__(num_parameters=num_parameters, init=init) 129 | 130 | def forward(self, input: torch.Tensor) -> torch.Tensor: 131 | return F.prelu(input, self.weight) 132 | 133 | 134 | def gelu(x: torch.Tensor, inplace: bool = False) -> torch.Tensor: 135 | return F.gelu(x) 136 | 137 | 138 | class GELU(nn.Module): 139 | """Applies the Gaussian Error Linear Units function (w/ dummy inplace arg) 140 | """ 141 | def __init__(self, inplace: bool = False): 142 | super(GELU, self).__init__() 143 | 144 | def forward(self, input: torch.Tensor) -> torch.Tensor: 145 | return F.gelu(input) 146 | -------------------------------------------------------------------------------- /timm/models/layers/cbam.py: -------------------------------------------------------------------------------- 1 | """ CBAM (sort-of) Attention 2 | 3 | Experimental impl of CBAM: Convolutional Block Attention Module: https://arxiv.org/abs/1807.06521 4 | 5 | WARNING: Results with these attention layers have been mixed. They can significantly reduce performance on 6 | some tasks, especially fine-grained it seems. I may end up removing this impl. 7 | 8 | Hacked together by / Copyright 2020 Ross Wightman 9 | """ 10 | import torch 11 | from torch import nn as nn 12 | import torch.nn.functional as F 13 | 14 | from .conv_bn_act import ConvBnAct 15 | from .create_act import create_act_layer, get_act_layer 16 | from .helpers import make_divisible 17 | 18 | 19 | class ChannelAttn(nn.Module): 20 | """ Original CBAM channel attention module, currently avg + max pool variant only. 21 | """ 22 | def __init__( 23 | self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1, 24 | act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False): 25 | super(ChannelAttn, self).__init__() 26 | if not rd_channels: 27 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 28 | self.fc1 = nn.Conv2d(channels, rd_channels, 1, bias=mlp_bias) 29 | self.act = act_layer(inplace=True) 30 | self.fc2 = nn.Conv2d(rd_channels, channels, 1, bias=mlp_bias) 31 | self.gate = create_act_layer(gate_layer) 32 | 33 | def forward(self, x): 34 | x_avg = self.fc2(self.act(self.fc1(x.mean((2, 3), keepdim=True)))) 35 | x_max = self.fc2(self.act(self.fc1(x.amax((2, 3), keepdim=True)))) 36 | return x * self.gate(x_avg + x_max) 37 | 38 | 39 | class LightChannelAttn(ChannelAttn): 40 | """An experimental 'lightweight' that sums avg + max pool first 41 | """ 42 | def __init__( 43 | self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1, 44 | act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False): 45 | super(LightChannelAttn, self).__init__( 46 | channels, rd_ratio, rd_channels, rd_divisor, act_layer, gate_layer, mlp_bias) 47 | 48 | def forward(self, x): 49 | x_pool = 0.5 * x.mean((2, 3), keepdim=True) + 0.5 * x.amax((2, 3), keepdim=True) 50 | x_attn = self.fc2(self.act(self.fc1(x_pool))) 51 | return x * F.sigmoid(x_attn) 52 | 53 | 54 | class SpatialAttn(nn.Module): 55 | """ Original CBAM spatial attention module 56 | """ 57 | def __init__(self, kernel_size=7, gate_layer='sigmoid'): 58 | super(SpatialAttn, self).__init__() 59 | self.conv = ConvBnAct(2, 1, kernel_size, act_layer=None) 60 | self.gate = create_act_layer(gate_layer) 61 | 62 | def forward(self, x): 63 | x_attn = torch.cat([x.mean(dim=1, keepdim=True), x.amax(dim=1, keepdim=True)], dim=1) 64 | x_attn = self.conv(x_attn) 65 | return x * self.gate(x_attn) 66 | 67 | 68 | class LightSpatialAttn(nn.Module): 69 | """An experimental 'lightweight' variant that sums avg_pool and max_pool results. 70 | """ 71 | def __init__(self, kernel_size=7, gate_layer='sigmoid'): 72 | super(LightSpatialAttn, self).__init__() 73 | self.conv = ConvBnAct(1, 1, kernel_size, act_layer=None) 74 | self.gate = create_act_layer(gate_layer) 75 | 76 | def forward(self, x): 77 | x_attn = 0.5 * x.mean(dim=1, keepdim=True) + 0.5 * x.amax(dim=1, keepdim=True) 78 | x_attn = self.conv(x_attn) 79 | return x * self.gate(x_attn) 80 | 81 | 82 | class CbamModule(nn.Module): 83 | def __init__( 84 | self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1, 85 | spatial_kernel_size=7, act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False): 86 | super(CbamModule, self).__init__() 87 | self.channel = ChannelAttn( 88 | channels, rd_ratio=rd_ratio, rd_channels=rd_channels, 89 | rd_divisor=rd_divisor, act_layer=act_layer, gate_layer=gate_layer, mlp_bias=mlp_bias) 90 | self.spatial = SpatialAttn(spatial_kernel_size, gate_layer=gate_layer) 91 | 92 | def forward(self, x): 93 | x = self.channel(x) 94 | x = self.spatial(x) 95 | return x 96 | 97 | 98 | class LightCbamModule(nn.Module): 99 | def __init__( 100 | self, channels, rd_ratio=1./16, rd_channels=None, rd_divisor=1, 101 | spatial_kernel_size=7, act_layer=nn.ReLU, gate_layer='sigmoid', mlp_bias=False): 102 | super(LightCbamModule, self).__init__() 103 | self.channel = LightChannelAttn( 104 | channels, rd_ratio=rd_ratio, rd_channels=rd_channels, 105 | rd_divisor=rd_divisor, act_layer=act_layer, gate_layer=gate_layer, mlp_bias=mlp_bias) 106 | self.spatial = LightSpatialAttn(spatial_kernel_size) 107 | 108 | def forward(self, x): 109 | x = self.channel(x) 110 | x = self.spatial(x) 111 | return x 112 | 113 | --------------------------------------------------------------------------------