├── .gitignore ├── LICENSE ├── README.md ├── adversarial_attack.py ├── benchmark.py ├── convert.py ├── cvm ├── __init__.py ├── attacks │ ├── __init__.py │ ├── attack.py │ ├── fgsm.py │ └── pgd.py ├── data │ ├── __init__.py │ ├── constants.py │ ├── imagenet_1k.py │ ├── imagenet_1k_real_labels.py │ ├── loader.py │ └── samplers.py ├── loss │ ├── __init__.py │ └── soft_label_cross_entropy_loss.py ├── models │ ├── __init__.py │ ├── alexnet.py │ ├── convmixer.py │ ├── convnext.py │ ├── densenet.py │ ├── det │ │ ├── __init__.py │ │ └── yolov1.py │ ├── efficientnet.py │ ├── efficientnetv2.py │ ├── gan │ │ ├── __init__.py │ │ └── dcgan.py │ ├── ghostnet.py │ ├── googlenet.py │ ├── gssdnet.py │ ├── inception_v3.py │ ├── inception_v4.py │ ├── mlp_mixer.py │ ├── mnasnet.py │ ├── mobilenet.py │ ├── mobilenetv2.py │ ├── mobilenetv3.py │ ├── ops │ │ ├── __init__.py │ │ ├── blocks │ │ │ ├── __init__.py │ │ │ ├── adder.py │ │ │ ├── affine.py │ │ │ ├── aspp.py │ │ │ ├── bottleneck.py │ │ │ ├── cbam.py │ │ │ ├── channel.py │ │ │ ├── depthwise_separable_conv2d.py │ │ │ ├── drop.py │ │ │ ├── efficient_channel_attention.py │ │ │ ├── factory.py │ │ │ ├── gather_excite.py │ │ │ ├── gaussian_blur.py │ │ │ ├── global_context.py │ │ │ ├── inception.py │ │ │ ├── interpolate.py │ │ │ ├── inverted_residual_block.py │ │ │ ├── mlp.py │ │ │ ├── non_local.py │ │ │ ├── norm.py │ │ │ ├── selective_kernel.py │ │ │ ├── squeeze_excite.py │ │ │ ├── stage.py │ │ │ └── vanilla_conv2d.py │ │ └── functional.py │ ├── regnet.py │ ├── resmlp.py │ ├── resnet.py │ ├── rexnet.py │ ├── seg │ │ ├── __init__.py │ │ ├── deeplabv3.py │ │ ├── deeplabv3_plus.py │ │ ├── fcn.py │ │ ├── heads.py │ │ ├── segmentation_model.py │ │ └── unet.py │ ├── shufflenet.py │ ├── shufflenetv2.py │ ├── squeezenet.py │ ├── utils.py │ ├── vae │ │ ├── __init__.py │ │ ├── cvae.py │ │ └── vae.py │ ├── vggnet.py │ ├── vgnet.py │ ├── vision_transformer.py │ └── xception.py ├── scheduler │ ├── __init__.py │ ├── cosine_lr.py │ └── step_lr.py ├── utils │ ├── __init__.py │ ├── augment.py │ ├── coco.py │ ├── ema.py │ ├── factory.py │ ├── logger.py │ ├── metrics.py │ ├── seg_transforms.py │ └── utils.py └── version.py ├── flops.py ├── info.py ├── profiler.py ├── real_labels.json ├── requirements.txt ├── resize_imagenet.py ├── results.md ├── run.py ├── setup.py ├── tests ├── test_blocks.py └── test_models.py ├── train.py ├── train_gan.py ├── train_seg.py ├── train_vae.py └── validate.py /.gitignore: -------------------------------------------------------------------------------- 1 | logs/ 2 | __pycache__/ 3 | .vscode 4 | test.py 5 | build/ 6 | *.egg-info/ 7 | *.pth 8 | images/ 9 | out/ -------------------------------------------------------------------------------- /adversarial_attack.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import torch 4 | 5 | from tqdm import tqdm 6 | 7 | from cvm.utils import * 8 | from cvm.attacks import * 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') 13 | parser.add_argument('--dataset', type=str, default='ImageNet', choices=list_datasets() + ['ImageNet'], 14 | help='path to the ImageNet dataset.') 15 | parser.add_argument('--data-dir', type=str, default='/datasets/ILSVRC2012', 16 | help='path to the ImageNet dataset.') 17 | parser.add_argument('--model', '-m', type=str, default='mobilenet_v1_x1_0', choices=list_models(), 18 | help='type of model to use. (default: mobilenet_v1_x1_0)') 19 | parser.add_argument('--num-classes', type=int, default=1000, metavar='N', 20 | help='number of label classes') 21 | parser.add_argument('--in-channels', type=int, default=3, metavar='N') 22 | parser.add_argument('--model-path', type=str, default=None) 23 | parser.add_argument('--model-weights', type=str, default='DEFAULT') 24 | parser.add_argument('--workers', '-j', type=int, default=8, metavar='N', 25 | help='number of data loading workers pre GPU. (default: 3)') 26 | parser.add_argument('--batch-size', type=int, default=256, metavar='N', 27 | help='mini-batch size, this is the total batch size of all GPUs. (default: 256)') 28 | parser.add_argument('--crop-size', type=int, default=224) 29 | parser.add_argument('--resize-size', type=int, default=232) 30 | parser.add_argument('--dali', action='store_true', help='use nvidia dali.') 31 | parser.add_argument('--dali-cpu', action='store_true', 32 | help='runs CPU based version of DALI pipeline. (default: false)') 33 | parser.add_argument('--method', type=str, default='PGD', choices=['FGSM', 'PGD']) 34 | parser.add_argument('--attack-eps', type=float, default=4/255, metavar='E') 35 | parser.add_argument('--attack-steps', type=int, default=2, metavar='N') 36 | parser.add_argument('--attack-alpha', type=float, default=2/255, metavar='A') 37 | parser.add_argument('--attack-target', type=int, default=-1, metavar='T') 38 | return parser.parse_args() 39 | 40 | 41 | if __name__ == '__main__': 42 | assert torch.cuda.is_available(), 'CUDA IS NOT AVAILABLE!!' 43 | torch.backends.cudnn.benchmark = True 44 | 45 | args = parse_args() 46 | init_distributed_mode(args) 47 | 48 | if args.local_rank == 0: 49 | print(json.dumps(vars(args), indent=4)) 50 | 51 | model = create_model( 52 | args.model, 53 | pretrained=True, 54 | thumbnail=(args.crop_size < 128), 55 | pth=args.model_path, 56 | weights=args.model_weights, 57 | distributed=args.distributed, 58 | local_rank=args.local_rank, 59 | in_channels=args.in_channels, 60 | num_classes=args.num_classes 61 | ) 62 | 63 | val_loader = create_loader( 64 | args.dataset, 65 | root=args.data_dir, 66 | is_training=False, 67 | batch_size=args.batch_size, 68 | val_resize_size=args.resize_size, 69 | val_crop_size=args.crop_size, 70 | crop_size=args.crop_size, 71 | workers=args.workers, 72 | dali=args.dali, 73 | dali_cpu=args.dali_cpu, 74 | distributed=args.distributed, 75 | local_rank=args.local_rank 76 | ) 77 | 78 | if args.local_rank == 0: 79 | if val_loader.type != "dali": 80 | print(f'Validation: \n{val_loader.dataset.transform}') 81 | 82 | attacker = None 83 | if args.method == 'FGSM': 84 | attacker = FGSM(model, args.attack_eps) 85 | elif args.method == 'PGD': 86 | attacker = PGD(model, args.attack_eps, args.attack_steps, args.attack_alpha) 87 | else: 88 | raise ValueError(f'Invalid attacker: {args.method}.') 89 | 90 | attacker.set_nomarlized(get_dataset_mean(args.dataset), get_dataset_std(args.dataset)) 91 | 92 | if args.local_rank == 0: 93 | print(f'Attacker: {attacker}') 94 | 95 | top1 = AverageMeter() 96 | top5 = AverageMeter() 97 | model.eval() 98 | for (images, target) in tqdm(val_loader, desc='validating', unit='batch'): 99 | 100 | if args.attack_target >= 0: 101 | target.fill_(args.attack_target) 102 | 103 | images = attacker.perturb(images, target, args.attack_target >= 0) 104 | 105 | with torch.inference_mode(): 106 | output = model(images) 107 | 108 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 109 | 110 | top1.update(acc1.item(), images.size(0)) 111 | top5.update(acc5.item(), images.size(0)) 112 | 113 | acc = f'\n -- top1={top1.avg:6.3f}, top5={top5.avg:6.3f}\n' 114 | if args.local_rank == 0: 115 | print(acc) 116 | -------------------------------------------------------------------------------- /benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import time 4 | from cvm.utils import create_model 5 | 6 | 7 | class InferenceBenchmarkRunner(): 8 | def __init__(self, model, input, device='cuda', amp=False) -> None: 9 | self.model = model 10 | self.input = input 11 | self.device = device 12 | self.amp = amp 13 | 14 | self.model = model.to(self.device) 15 | self.model.eval() 16 | self.input = input.to(self.device) 17 | 18 | def timestamp(self, sync=False): 19 | if sync and self.device == 'cuda': 20 | torch.cuda.synchronize(device=self.device) 21 | 22 | return time.perf_counter() 23 | 24 | def infer(self): 25 | start = self.timestamp() 26 | with torch.amp.autocast(device_type='cuda', enabled=self.amp): 27 | output = self.model(self.input) 28 | end = self.timestamp(True) 29 | return end - start 30 | 31 | 32 | if __name__ == '__main__': 33 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 34 | parser.add_argument('--model', '-m', type=str) 35 | parser.add_argument('--batch-size', type=int, default=16) 36 | parser.add_argument('--amp', action='store_true') 37 | parser.add_argument('--device', type=str, default='cuda') 38 | 39 | args = parser.parse_args() 40 | print(args) 41 | 42 | model = create_model(args.model) 43 | 44 | input = torch.randn(args.batch_size, 3, 224, 224) 45 | 46 | runner = InferenceBenchmarkRunner(model, input, args.device, args.amp) 47 | 48 | with torch.no_grad(): 49 | for _ in range(50): 50 | runner.infer() 51 | 52 | total_step = 0 53 | run_start = runner.timestamp() 54 | for i in range(50): 55 | delta_fwd = runner.infer() 56 | total_step += delta_fwd 57 | 58 | run_end = runner.timestamp(True) 59 | run_elapsed = run_end - run_start 60 | print(f'Inference benchmark: {round(50 / run_elapsed, 2):.2f} batches/s, {round(1000 * total_step / 50, 2)} ms') 61 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import argparse 3 | import shutil 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--input', '-i', type=str) 8 | parser.add_argument('--output', '-o', type=str) 9 | args = parser.parse_args() 10 | 11 | with open(args.input, 'rb') as f: 12 | sha_hash = hashlib.sha256(f.read()).hexdigest() 13 | 14 | final_filename = f'logs/{args.output}-{sha_hash[:8]}.pth' 15 | shutil.copy(args.input, final_filename) 16 | print(f'Saved: {final_filename}') 17 | -------------------------------------------------------------------------------- /cvm/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__ 2 | 3 | from cvm import models 4 | from cvm import utils 5 | from cvm import loss 6 | from cvm import scheduler 7 | from cvm import data 8 | -------------------------------------------------------------------------------- /cvm/attacks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fgsm import FGSM 2 | from .pgd import PGD 3 | 4 | __all__ = ['FGSM', 'PGD'] -------------------------------------------------------------------------------- /cvm/attacks/attack.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import torch 3 | 4 | from typing import Callable 5 | 6 | 7 | class Attacker(abc.ABC): 8 | def __init__(self, model, epsilon: float = 0.03, mean=None, std=None): 9 | super().__init__() 10 | 11 | self.model = model 12 | self.model.eval() 13 | 14 | self.epsilon = epsilon 15 | 16 | self.mean = None 17 | self.std = None 18 | 19 | self.normalized = None # None, False, True 20 | 21 | def set_nomarlized(self, mean, std): 22 | self.mean = mean 23 | self.std = std 24 | 25 | self.normalized = True 26 | 27 | def normalize(self, x: torch.Tensor): 28 | mean = torch.as_tensor(self.mean, dtype=x.dtype, device=x.device) 29 | std = torch.as_tensor(self.std, dtype=x.dtype, device=x.device) 30 | 31 | if mean.ndim == 1: 32 | mean = mean.view(-1, 1, 1) 33 | if std.ndim == 1: 34 | std = std.view(-1, 1, 1) 35 | 36 | return (x - mean) / std 37 | 38 | def inverse_normalize(self, x: torch.Tensor): 39 | mean = torch.as_tensor(self.mean, dtype=x.dtype, device=x.device) 40 | std = torch.as_tensor(self.std, dtype=x.dtype, device=x.device) 41 | 42 | if mean.ndim == 1: 43 | mean = mean.view(-1, 1, 1) 44 | if std.ndim == 1: 45 | std = std.view(-1, 1, 1) 46 | 47 | return x * std + mean 48 | 49 | def prepare_inputs(self, x): 50 | if self.normalized is True: 51 | x = self.inverse_normalize(x) 52 | self.normalized = False 53 | 54 | x.requires_grad_(True) 55 | return x 56 | 57 | def unprepare_inputs(self, x): 58 | if self.normalized is False: 59 | x = self.normalize(x) 60 | self.normalized = True 61 | 62 | return x 63 | 64 | def forward(self, x): 65 | if self.normalized is False: 66 | x = self.normalize(x) 67 | 68 | return self.model(x) 69 | 70 | perturb: Callable 71 | -------------------------------------------------------------------------------- /cvm/attacks/fgsm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from .attack import Attacker 4 | 5 | 6 | class FGSM(Attacker): 7 | r""" 8 | 'Explaining and Harnessing Adversarial Examples', https://arxiv.org/abs/1412.6572 9 | """ 10 | 11 | def __init__(self, model, epsilon: float = 6/255): 12 | super().__init__(model, epsilon=epsilon) 13 | 14 | def perturb(self, images: torch.Tensor, labels: torch.Tensor = None, targeted: bool = False): 15 | images_adv = images.clone().detach() 16 | 17 | images_adv = self.prepare_inputs(images_adv) 18 | 19 | loss = F.cross_entropy(self.forward(images_adv), labels) 20 | grad = torch.autograd.grad(loss, images_adv)[0] 21 | 22 | eta = self.epsilon * torch.sign(grad) 23 | 24 | if not targeted: 25 | images_adv = (images_adv + eta).detach() 26 | else: 27 | images_adv = (images_adv - eta).detach() 28 | 29 | images_adv = torch.clamp(images_adv, min=0, max=1.0) 30 | 31 | return self.unprepare_inputs(images_adv) 32 | 33 | def __repr__(self) -> str: 34 | return f'FGSM(eps={self.epsilon:>6.4f}({self.epsilon * 255.0:>.1f}/255.0), normalized={self.normalized}, mean={self.mean}, std={self.std})' 35 | -------------------------------------------------------------------------------- /cvm/attacks/pgd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from .attack import Attacker 4 | 5 | 6 | class PGD(Attacker): 7 | r""" 8 | 'Towards Deep Learning Models Resistant to Adversarial Attacks', https://arxiv.org/abs/1706.06083 9 | """ 10 | 11 | def __init__(self, model, epsilon: float = 6/255, steps: int = 3, alpha: float = 2/255): 12 | super().__init__(model, epsilon=epsilon) 13 | 14 | self.steps = steps 15 | self.alpha = alpha 16 | 17 | def perturb(self, images: torch.Tensor, labels: torch.Tensor = None, targeted: bool = False): 18 | images_adv = images.detach().clone() 19 | 20 | images_adv = self.prepare_inputs(images_adv) 21 | images_nat = images_adv.clone().detach() 22 | 23 | for _ in range(self.steps): 24 | images_adv.requires_grad_(True) 25 | 26 | loss = F.cross_entropy(self.forward(images_adv), labels) 27 | grad = torch.autograd.grad(loss, images_adv)[0] 28 | 29 | eta = self.alpha * torch.sign(grad) 30 | 31 | if not targeted: 32 | images_adv = (images_adv + eta).detach() 33 | else: 34 | images_adv = (images_adv - eta).detach() 35 | 36 | images_adv = torch.clamp(images_adv, images_nat - self.epsilon, images_nat + self.epsilon) 37 | images_adv = torch.clamp(images_adv, min=0, max=1.0) 38 | 39 | return self.unprepare_inputs(images_adv) 40 | 41 | def __repr__(self) -> str: 42 | return f'PGD(eps={self.epsilon:>6.4f}({self.epsilon * 255.0:>.1f}/255.0), steps={self.steps}, alpha={self.alpha:>6.4f}({self.alpha * 255.0:>.1f}/255.0), normalized={self.normalized}, mean={self.mean}, std={self.std})' 43 | -------------------------------------------------------------------------------- /cvm/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .constants import * 2 | from .imagenet_1k import * 3 | from .imagenet_1k_real_labels import * 4 | from .loader import * -------------------------------------------------------------------------------- /cvm/data/constants.py: -------------------------------------------------------------------------------- 1 | IMAGE_MEAN = (0.485, 0.456, 0.406) 2 | IMAGE_STD = (0.229, 0.224, 0.225) 3 | 4 | CIFAR_MEAN = (0.491, 0.482, 0.446) 5 | CIFAR_STD = (0.247, 0.243, 0.261) 6 | 7 | MNIST_MEAN = (0.1307,) 8 | MNIST_STD = (0.3081,) 9 | 10 | VOC_MEAN = (0.485, 0.456, 0.406) 11 | VOC_STD = (0.229, 0.224, 0.225) 12 | 13 | 14 | CIFAR_IMAGE_SIZE = 32 15 | MNIST_IMAGE_SIZE = 28 16 | -------------------------------------------------------------------------------- /cvm/data/imagenet_1k_real_labels.py: -------------------------------------------------------------------------------- 1 | """ Real labels evaluator for ImageNet 2 | [1] Are we done with ImageNet?. arXiv:2006.07159 3 | """ 4 | import os 5 | import json 6 | import torch 7 | import numpy as np 8 | 9 | __all__ = ['ImageNet1KRealLabelsEvaluator'] 10 | 11 | 12 | class ImageNet1KRealLabelsEvaluator: 13 | 14 | def __init__(self, samples, labels_file='real_labels.json', topk=(1, 5)): 15 | with open(labels_file) as f: 16 | self.labels = { 17 | f'ILSVRC2012_val_{i + 1:08d}.JPEG': labels for i, labels in enumerate(json.load(f)) 18 | } 19 | 20 | assert len(samples) == len(self.labels) 21 | 22 | self.samples = samples 23 | self.topk = topk 24 | self.res = {k: [] for k in topk} 25 | self.index = 0 26 | 27 | def put(self, output: torch.Tensor): 28 | maxk = max(self.topk) 29 | _, pred = output.topk(maxk, 1, True, True) 30 | pred = pred.cpu().numpy() 31 | 32 | for topk_label in pred: 33 | filename = os.path.basename(self.samples[self.index][0]) 34 | 35 | if self.labels[filename]: 36 | for k in self.topk: 37 | self.res[k].append( 38 | any([p in self.labels[filename] for p in topk_label[:k]]) 39 | ) 40 | self.index += 1 41 | 42 | @property 43 | def accuracy(self): 44 | return {k: float(np.mean(self.res[k])) * 100 for k in self.topk} 45 | -------------------------------------------------------------------------------- /cvm/data/loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class DataIterator: 5 | def __init__( 6 | self, 7 | loader, 8 | type: str = 'dali' 9 | ): 10 | self.loader = loader 11 | self.type = type 12 | self._counter = 0 13 | self.itor = self 14 | 15 | def __iter__(self): 16 | self.itor = iter(self.loader) 17 | return self 18 | 19 | def __next__(self): 20 | batch = next(self.itor) 21 | 22 | if self.type == 'dali': 23 | input = batch[0]["data"] 24 | target = batch[0]["label"].squeeze(-1).long() 25 | else: 26 | input = batch[0].cuda(non_blocking=True) 27 | target = batch[1].cuda(non_blocking=True) 28 | 29 | return input, target 30 | 31 | @property 32 | def sampler(self): 33 | return self.loader.sampler if self.type == 'torch' else None 34 | 35 | @property 36 | def dataset(self): 37 | return self.loader.dataset if self.type == 'torch' else None 38 | 39 | def reset(self): 40 | self._counter += 1 41 | 42 | if self.type == 'dali': 43 | self.loader.reset() 44 | elif isinstance(self.sampler, torch.utils.data.distributed.DistributedSampler): 45 | self.loader.sampler.set_epoch(self._counter) 46 | 47 | def __len__(self): 48 | return len(self.loader) 49 | -------------------------------------------------------------------------------- /cvm/data/samplers.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.distributed as dist 5 | 6 | 7 | class RASampler(torch.utils.data.Sampler): 8 | """Sampler that restricts data loading to a subset of the dataset for distributed, 9 | with repeated augmentation. 10 | It ensures that different each augmented version of a sample will be visible to a 11 | different process (GPU). 12 | Heavily based on 'torch.utils.data.DistributedSampler'. 13 | 14 | This is borrowed from the DeiT Repo: 15 | https://github.com/facebookresearch/deit/blob/main/samplers.py 16 | """ 17 | 18 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, seed=0, repetitions=3): 19 | if num_replicas is None: 20 | if not dist.is_available(): 21 | raise RuntimeError("Requires distributed package to be available!") 22 | num_replicas = dist.get_world_size() 23 | if rank is None: 24 | if not dist.is_available(): 25 | raise RuntimeError("Requires distributed package to be available!") 26 | rank = dist.get_rank() 27 | self.dataset = dataset 28 | self.num_replicas = num_replicas 29 | self.rank = rank 30 | self.epoch = 0 31 | self.num_samples = int(math.ceil(len(self.dataset) * float(repetitions) / self.num_replicas)) 32 | self.total_size = self.num_samples * self.num_replicas 33 | self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas)) 34 | self.shuffle = shuffle 35 | self.seed = seed 36 | self.repetitions = repetitions 37 | 38 | def __iter__(self): 39 | if self.shuffle: 40 | # Deterministically shuffle based on epoch 41 | g = torch.Generator() 42 | g.manual_seed(self.seed + self.epoch) 43 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 44 | else: 45 | indices = list(range(len(self.dataset))) 46 | 47 | # Add extra samples to make it evenly divisible 48 | indices = [ele for ele in indices for i in range(self.repetitions)] 49 | indices += indices[: (self.total_size - len(indices))] 50 | assert len(indices) == self.total_size 51 | 52 | # Subsample 53 | indices = indices[self.rank : self.total_size : self.num_replicas] 54 | assert len(indices) == self.num_samples 55 | 56 | return iter(indices[: self.num_selected_samples]) 57 | 58 | def __len__(self): 59 | return self.num_selected_samples 60 | 61 | def set_epoch(self, epoch): 62 | self.epoch = epoch 63 | -------------------------------------------------------------------------------- /cvm/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .soft_label_cross_entropy_loss import * -------------------------------------------------------------------------------- /cvm/loss/soft_label_cross_entropy_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | __all__ = ['SoftLabelCrossEntropyLoss'] 6 | 7 | 8 | class SoftLabelCrossEntropyLoss(nn.Module): 9 | def __init__(self): 10 | super().__init__() 11 | 12 | def forward(self, x: torch.Tensor, y: torch.Tensor): 13 | logprobs = F.log_softmax(x, dim=-1) 14 | loss = -(logprobs * y).sum(dim=-1) 15 | return loss.mean() 16 | -------------------------------------------------------------------------------- /cvm/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .alexnet import * 2 | from .vggnet import * 3 | from .resnet import * 4 | from .squeezenet import * 5 | from .googlenet import * 6 | from .inception_v3 import * 7 | from .inception_v4 import * 8 | from .xception import * 9 | from .densenet import * 10 | from .mobilenet import * 11 | from .mobilenetv2 import * 12 | from .mobilenetv3 import * 13 | from .ghostnet import * 14 | from .shufflenet import * 15 | from .shufflenetv2 import * 16 | from .mnasnet import * 17 | from .efficientnet import * 18 | from .efficientnetv2 import * 19 | from .mlp_mixer import * 20 | from .resmlp import * 21 | from .rexnet import * 22 | from .regnet import * 23 | from .vision_transformer import * 24 | from .convmixer import * 25 | from .convnext import * 26 | from .vgnet import * 27 | from .gssdnet import * 28 | 29 | from . import gan 30 | from . import vae 31 | from . import seg 32 | from . import det -------------------------------------------------------------------------------- /cvm/models/alexnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .utils import export, load_from_local_or_url 4 | from typing import Any 5 | 6 | 7 | @export 8 | class AlexNet(nn.Module): 9 | def __init__( 10 | self, 11 | in_channels: int = 3, 12 | num_classes: int = 1000, 13 | dropout_rate: float = 0.5, 14 | thumbnail: bool = False, 15 | **kwargs: Any 16 | ): 17 | super().__init__() 18 | 19 | FRONT_S = 1 if thumbnail else 4 20 | 21 | self.features = nn.Sequential( 22 | nn.Conv2d(in_channels, 64, kernel_size=11, 23 | stride=FRONT_S, padding=2), 24 | nn.ReLU(inplace=True), 25 | 26 | nn.MaxPool2d(kernel_size=3, stride=2), 27 | 28 | nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2), 29 | nn.ReLU(inplace=True), 30 | 31 | nn.MaxPool2d(kernel_size=3, stride=2), 32 | 33 | nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1), 34 | nn.ReLU(inplace=True), 35 | 36 | nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1), 37 | nn.ReLU(inplace=True), 38 | 39 | nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), 40 | nn.ReLU(inplace=True), 41 | 42 | nn.MaxPool2d(kernel_size=3, stride=2) 43 | ) 44 | 45 | self.pool = nn.AdaptiveAvgPool2d((6, 6)) 46 | 47 | self.classifier = nn.Sequential( 48 | nn.Dropout(dropout_rate), 49 | nn.Linear(9216, 4096), 50 | nn.ReLU(inplace=True), 51 | nn.Dropout(dropout_rate), 52 | nn.Linear(4096, 4096), 53 | nn.ReLU(inplace=True), 54 | nn.Linear(4096, num_classes) 55 | ) 56 | 57 | def forward(self, x): 58 | x = self.features(x) 59 | x = self.pool(x) 60 | x = torch.flatten(x, 1) 61 | x = self.classifier(x) 62 | return x 63 | 64 | 65 | @export 66 | def alexnet(pretrained: bool = False, pth: str = None, progress: bool = False, **kwargs: Any): 67 | model = AlexNet(**kwargs) 68 | 69 | if pretrained: 70 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 71 | return model 72 | -------------------------------------------------------------------------------- /cvm/models/convmixer.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import torch 3 | import torch.nn as nn 4 | 5 | from .ops import blocks 6 | from .utils import export, config, load_from_local_or_url 7 | from typing import Any 8 | 9 | 10 | class Residual(nn.Sequential): 11 | def __init__(self, *args): 12 | super().__init__(*args) 13 | 14 | def forward(self, x): 15 | return self[0](x) + x 16 | 17 | 18 | @export 19 | class ConvMixer(nn.Module): 20 | @blocks.normalizer(position='after') 21 | def __init__( 22 | self, 23 | in_channels: int = 3, 24 | num_classes: int = 1000, 25 | h=None, 26 | depth=None, 27 | kernel_size: int = 9, 28 | patch_size: int = 7, 29 | **kwargs: Any 30 | ): 31 | super().__init__() 32 | 33 | self.features = nn.Sequential( 34 | blocks.Conv2dBlock(in_channels, h, patch_size, stride=patch_size), 35 | 36 | *[nn.Sequential( 37 | Residual( 38 | blocks.Conv2dBlock(h, h, kernel_size, groups=h, padding='same') 39 | ), 40 | blocks.Conv2d1x1Block(h, h) 41 | ) for _ in range(depth)] 42 | ) 43 | 44 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 45 | self.classifier = nn.Linear(h, num_classes) 46 | 47 | def forward(self, x): 48 | x = self.features(x) 49 | x = self.pool(x) 50 | x = torch.flatten(x, 1) 51 | x = self.classifier(x) 52 | return x 53 | 54 | 55 | def _conv_mixer( 56 | h, 57 | depth, 58 | kernel_size: int = 9, 59 | patch_size: int = 7, 60 | pretrained: bool = False, 61 | pth: str = None, 62 | progress: bool = True, 63 | **kwargs: Any 64 | ): 65 | 66 | model = ConvMixer(h=h, depth=depth, kernel_size=kernel_size, 67 | patch_size=patch_size, **kwargs) 68 | 69 | if pretrained: 70 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 71 | return model 72 | 73 | 74 | @export 75 | @blocks.activation(nn.GELU) 76 | def conv_mixer_1536_20_k9_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 77 | return _conv_mixer(1536, 20, 9, 7, pretrained, pth, progress, **kwargs) 78 | 79 | 80 | @export 81 | @blocks.activation(nn.GELU) 82 | def conv_mixer_1536_20_k3_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 83 | return _conv_mixer(1536, 20, 3, 7, pretrained, pth, progress, **kwargs) 84 | 85 | 86 | @export 87 | @blocks.activation(nn.GELU) 88 | def conv_mixer_1024_20_k9_p14(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 89 | return _conv_mixer(1024, 20, 9, 14, pretrained, pth, progress, **kwargs) 90 | 91 | 92 | @export 93 | @blocks.activation(nn.GELU) 94 | def conv_mixer_1024_16_k9_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 95 | return _conv_mixer(1024, 16, 9, 7, pretrained, pth, progress, **kwargs) 96 | 97 | 98 | @export 99 | @blocks.activation(nn.GELU) 100 | def conv_mixer_1024_12_k8_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 101 | return _conv_mixer(1024, 12, 8, 7, pretrained, pth, progress, **kwargs) 102 | 103 | 104 | @export 105 | @blocks.activation(partial(nn.ReLU, inplace=True)) 106 | def conv_mixer_768_32_k7_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 107 | return _conv_mixer(768, 32, 7, 7, pretrained, pth, progress, **kwargs) 108 | 109 | 110 | @export 111 | @blocks.activation(partial(nn.ReLU, inplace=True)) 112 | def conv_mixer_768_32_k3_p14(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 113 | return _conv_mixer(768, 32, 3, 14, pretrained, pth, progress, **kwargs) 114 | 115 | 116 | @export 117 | @blocks.activation(nn.GELU) 118 | def conv_mixer_512_16_k8_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 119 | return _conv_mixer(512, 16, 8, 7, pretrained, pth, progress, **kwargs) 120 | 121 | 122 | @export 123 | @blocks.activation(nn.GELU) 124 | def conv_mixer_512_12_k8_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 125 | return _conv_mixer(512, 12, 8, 7, pretrained, pth, progress, **kwargs) 126 | -------------------------------------------------------------------------------- /cvm/models/convnext.py: -------------------------------------------------------------------------------- 1 | ''' 2 | paper: 3 | [ConvNeXt] A ConvNet for the 2020s(https://arxiv.org/abs/2201.03545) 4 | official code : 5 | https://github.com/facebookresearch/ConvNeXt/blob/dcb928723662a1289d31190d09d82378b57b810a/models/convnext.py 6 | ''' 7 | import torch 8 | import torch.nn as nn 9 | from .ops import blocks 10 | from .utils import export, config, load_from_local_or_url 11 | from typing import Any, OrderedDict, List 12 | 13 | 14 | class ConvNetBlock(nn.Module): 15 | def __init__( 16 | self, 17 | dim: int, 18 | kernel_size: int = 7, 19 | padding: int = 3, 20 | survival_prob: float = 0.0, 21 | layer_scale: float = 1e-6 22 | ): 23 | super().__init__() 24 | 25 | self.branch1 = nn.Sequential( 26 | blocks.DepthwiseConv2d(dim, dim, kernel_size, padding=padding, bias=True), 27 | blocks.Permute([0, 2, 3, 1]), 28 | nn.LayerNorm(dim, eps=1e-6), 29 | nn.Linear(dim, 4 * dim), 30 | nn.GELU(), 31 | nn.Linear(4 * dim, dim), 32 | blocks.Permute([0, 3, 1, 2]), 33 | blocks.Scale(dim, layer_scale), 34 | blocks.StochasticDepth(survival_prob) 35 | ) 36 | 37 | self.branch2 = nn.Identity() 38 | self.combine = blocks.Combine('ADD') 39 | 40 | def forward(self, x): 41 | return self.combine([self.branch1(x), self.branch2(x)]) 42 | 43 | 44 | class DownsamplingBlock(nn.Sequential): 45 | def __init__( 46 | self, 47 | inp: int, 48 | oup: int 49 | ): 50 | super().__init__( 51 | blocks.LayerNorm2d(inp, eps=1e-6), 52 | nn.Conv2d(inp, oup, kernel_size=2, stride=2) 53 | ) 54 | 55 | 56 | @export 57 | class ConvNeXt(nn.Module): 58 | def __init__( 59 | self, 60 | in_channels: int = 3, 61 | num_classes: int = 1000, 62 | layers: List[int] = [3, 3, 9, 3], 63 | dims: List[int] = [96, 192, 384, 768], 64 | drop_path_rate: float = 0.2, 65 | layer_scale: float = 1e-6, 66 | thumbnail: bool = False, 67 | **kwargs: Any 68 | ): 69 | super().__init__() 70 | 71 | FRONT_S = 1 if thumbnail else 4 72 | 73 | self.features = nn.Sequential(OrderedDict([ 74 | ('stem', blocks.Stage( 75 | nn.Conv2d(in_channels, dims[0], kernel_size=4, stride=FRONT_S), 76 | blocks.LayerNorm2d(dims[0], eps=1e-6) 77 | )) 78 | ])) 79 | 80 | survival_probs = [1 - x.item() for x in torch.linspace(0, drop_path_rate, sum(layers))] 81 | for i in range(len(layers)): 82 | stage = blocks.Stage([ 83 | ConvNetBlock(dims[i], survival_prob=survival_probs[sum(layers[:i]) + j], layer_scale=layer_scale) 84 | for j in range(layers[i])] 85 | ) 86 | if i < 3: 87 | stage.append(DownsamplingBlock(dims[i], dims[i+1])) 88 | 89 | self.features.add_module(f'stage{i + 1}', stage) 90 | 91 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 92 | self.classifier = nn.Sequential( 93 | blocks.LayerNorm2d(dims[-1], eps=1e-6), 94 | nn.Flatten(1), 95 | nn.Linear(dims[-1], num_classes) 96 | ) 97 | 98 | def forward(self, x): 99 | x = self.features(x) 100 | x = self.pool(x) 101 | x = self.classifier(x) 102 | return x 103 | 104 | 105 | @export 106 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.2-convnext-weights/torch-convnext_t-98aeea18.pth') 107 | def convnext_t(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 108 | model = ConvNeXt(layers=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs) 109 | if pretrained: 110 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 111 | return model 112 | 113 | 114 | @export 115 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.2-convnext-weights/torch-convnext_s-0ebda7c5.pth') 116 | def convnext_s(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 117 | model = ConvNeXt(layers=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs) 118 | if pretrained: 119 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 120 | return model 121 | 122 | 123 | @export 124 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.2-convnext-weights/torch-convnext_b-1e0fb038.pth') 125 | def convnext_b(pretrained: bool = False, in_22k=False, pth: str = None, progress: bool = True, **kwargs: Any): 126 | model = ConvNeXt(layers=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs) 127 | if pretrained: 128 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 129 | return model 130 | 131 | 132 | @export 133 | def convnext_l(pretrained: bool = False, in_22k=False, pth: str = None, progress: bool = True, **kwargs: Any): 134 | model = ConvNeXt(layers=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs) 135 | if pretrained: 136 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 137 | return model 138 | 139 | 140 | @export 141 | def convnext_xl(pretrained: bool = False, in_22k=False, pth: str = None, progress: bool = True, **kwargs: Any): 142 | model = ConvNeXt(layers=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs) 143 | if pretrained: 144 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 145 | return model 146 | -------------------------------------------------------------------------------- /cvm/models/densenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, load_from_local_or_url 6 | from typing import Any, OrderedDict, List 7 | 8 | 9 | class DenseLayer(nn.Sequential): 10 | '''BN-ReLU-Conv''' 11 | 12 | def __init__(self, inp, oup): 13 | super().__init__() 14 | 15 | super().__init__(OrderedDict([ 16 | ('norm1', nn.BatchNorm2d(inp)), 17 | ('relu1', nn.ReLU(inplace=True)), 18 | ('conv1', blocks.Conv2d1x1(inp, oup)), 19 | ('norm2', nn.BatchNorm2d(oup)), 20 | ('relu2', nn.ReLU(inplace=True)), 21 | ('conv2', blocks.Conv2d3x3(oup, 32)) 22 | ])) 23 | 24 | 25 | class TransitionLayer(nn.Sequential): 26 | '''BN-ReLU-Conv''' 27 | 28 | def __init__(self, inp, oup): 29 | super().__init__(OrderedDict([ 30 | ('norm', nn.BatchNorm2d(inp)), 31 | ('relu', nn.ReLU(inplace=True)), 32 | ('conv', blocks.Conv2d1x1(inp, oup)), 33 | ('pool', nn.AvgPool2d(kernel_size=2, stride=2, padding=0)) 34 | ])) 35 | 36 | 37 | class DenseBlock(nn.Module): 38 | def __init__(self, inp, oup, n): 39 | super().__init__() 40 | 41 | layers = [] 42 | 43 | for i in range(n): 44 | layers.append(DenseLayer(inp + 32 * i, oup)) 45 | 46 | self.features = nn.Sequential(*layers) 47 | 48 | def forward(self, x): 49 | outs = [x] 50 | for layer in self.features.children(): 51 | outs.append(layer(torch.cat(outs, dim=1))) 52 | return torch.cat(outs, dim=1) 53 | 54 | 55 | @export 56 | class DenseNet(nn.Module): 57 | def __init__( 58 | self, 59 | in_channels: int = 3, 60 | num_classes: int = 1000, 61 | layers: List[int] = [2, 2, 2, 2], 62 | channels: List[int] = [64, 128, 256, 512], 63 | thumbnail: bool = False, 64 | **kwargs: Any 65 | ): 66 | super().__init__() 67 | 68 | FRONT_S = 1 if thumbnail else 2 69 | 70 | maxpool = nn.Identity() 71 | if not thumbnail: 72 | maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 73 | 74 | self.features = nn.Sequential( 75 | blocks.Conv2dBlock(in_channels, channels[0], 7, FRONT_S, padding=3), 76 | maxpool, 77 | DenseBlock(channels[0], 128, layers[0]), 78 | TransitionLayer(channels[0] + 32 * layers[0], channels[1]), 79 | DenseBlock(channels[1], 128, layers[1]), 80 | TransitionLayer(channels[1] + 32 * layers[1], channels[2]), 81 | DenseBlock(channels[2], 128, layers[2]), 82 | TransitionLayer(channels[2] + 32 * layers[2], channels[3]), 83 | DenseBlock(channels[3], 128, layers[3]), 84 | 85 | nn.BatchNorm2d(channels[3] + 32 * layers[-1]), 86 | nn.ReLU(inplace=True) 87 | ) 88 | 89 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 90 | self.classifier = nn.Linear(channels[3] + 32 * layers[-1], num_classes) 91 | 92 | def forward(self, x): 93 | x = self.features(x) 94 | x = self.pool(x) 95 | x = torch.flatten(x, 1) 96 | x = self.classifier(x) 97 | return x 98 | 99 | 100 | def _densenet( 101 | layers: List[int], 102 | channels: List[int], 103 | pretrained: bool = False, 104 | pth: str = None, 105 | progress: bool = True, 106 | **kwargs: Any 107 | ): 108 | model = DenseNet(layers=layers, channels=channels, **kwargs) 109 | 110 | if pretrained: 111 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 112 | return model 113 | 114 | 115 | @export 116 | def densenet121(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 117 | return _densenet([6, 12, 24, 16], [64, 128, 256, 512], pretrained, pth, progress, **kwargs) 118 | 119 | 120 | @export 121 | def densenet169(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 122 | return _densenet([6, 12, 32, 32], [64, 128, 256, 640], pretrained, pth, progress, **kwargs) 123 | 124 | 125 | @export 126 | def densenet201(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 127 | return _densenet([6, 12, 48, 32], [64, 128, 256, 896], pretrained, pth, progress, **kwargs) 128 | 129 | 130 | @export 131 | def densenet264(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 132 | return _densenet([6, 12, 64, 48], [64, 128, 256, 1408], pretrained, pth, progress, **kwargs) 133 | -------------------------------------------------------------------------------- /cvm/models/det/__init__.py: -------------------------------------------------------------------------------- 1 | from .yolov1 import * -------------------------------------------------------------------------------- /cvm/models/det/yolov1.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from ..ops import blocks 4 | from ..utils import export, get_out_channels, load_from_local_or_url 5 | import cvm.models as models 6 | from typing import Any, List 7 | 8 | 9 | @export 10 | class YOLOv1(nn.Module): 11 | def __init__( 12 | self, 13 | backbone: nn.Module, 14 | grid_size: List[int] = (7, 7), 15 | num_boxes_per_cell: int = 2, 16 | num_classes: int = 20 17 | ): 18 | super().__init__() 19 | 20 | self.backbone = backbone 21 | 22 | self.pool = nn.AdaptiveAvgPool2d((7, 7)) 23 | 24 | self.head = nn.Sequential( 25 | blocks.Conv2dBlock(get_out_channels(backbone), 512, 3), 26 | blocks.Conv2d1x1(512, num_classes + 5 * num_boxes_per_cell) 27 | ) 28 | 29 | def forward(self, x): 30 | x = self.backbone(x) 31 | x = self.pool(x) 32 | x = self.head(x) 33 | return x 34 | 35 | 36 | def create_yolov1( 37 | backbone: str = 'resnet50_v1', 38 | num_classes: int = 21, 39 | pretrained_backbone: bool = False, 40 | pretrained: bool = False, 41 | pth: str = None, 42 | progress: bool = True, 43 | **kwargs: Any 44 | ): 45 | if pretrained: 46 | pretrained_backbone = False 47 | 48 | backbone = models.__dict__[backbone]( 49 | pretrained=pretrained_backbone, 50 | **kwargs 51 | ).features 52 | 53 | model = YOLOv1(backbone, num_classes=num_classes) 54 | 55 | if pretrained: 56 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 57 | return model 58 | 59 | 60 | @export 61 | def yolov1_resnet18_v1( 62 | num_classes: int = 21, 63 | pretrained_backbone: bool = False, 64 | pretrained: bool = False, 65 | pth: str = None, 66 | progress: bool = True, 67 | **kwargs: Any 68 | ): 69 | return create_yolov1('resnet18_v1', num_classes, pretrained_backbone, pretrained, pth, progress, **kwargs) 70 | 71 | 72 | @export 73 | def yolov1_mobilenet_v3_large( 74 | num_classes: int = 21, 75 | pretrained_backbone: bool = False, 76 | pretrained: bool = False, 77 | pth: str = None, 78 | progress: bool = True, 79 | **kwargs: Any 80 | ): 81 | return create_yolov1('mobilenet_v3_large', num_classes, pretrained_backbone, pretrained, pth, progress, **kwargs) 82 | 83 | 84 | @export 85 | def yolov1_regnet_x_400mf( 86 | num_classes: int = 21, 87 | pretrained_backbone: bool = False, 88 | pretrained: bool = False, 89 | pth: str = None, 90 | progress: bool = True, 91 | **kwargs: Any 92 | ): 93 | return create_yolov1('regnet_x_400mf', num_classes, pretrained_backbone, pretrained, pth, progress, **kwargs) 94 | -------------------------------------------------------------------------------- /cvm/models/gan/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcgan import * -------------------------------------------------------------------------------- /cvm/models/gan/dcgan.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from ..utils import export, load_from_local_or_url 4 | from typing import Any 5 | 6 | 7 | @export 8 | class DCGAN(nn.Module): 9 | def __init__( 10 | self, 11 | hidden_dim: int = 100, 12 | in_channels: int = 3, 13 | **kwargs: Any 14 | ) -> None: 15 | super().__init__() 16 | 17 | base_width = 64 18 | 19 | self.generator = nn.Sequential( 20 | # input : (batch_size, hidden_dim, 1, 1) 21 | nn.ConvTranspose2d(hidden_dim, base_width * 8, kernel_size=4, stride=1, padding=0, bias=False), 22 | nn.BatchNorm2d(base_width * 8), 23 | nn.ReLU(True), 24 | # state size : (batch_size, ngf * 8, 4, 4) 25 | nn.ConvTranspose2d(base_width * 8, base_width * 4, kernel_size=4, stride=2, padding=1, bias=False), 26 | nn.BatchNorm2d(base_width * 4), 27 | nn.ReLU(True), 28 | # state size : (batch_size, ngf * 4, 8, 8) 29 | nn.ConvTranspose2d(base_width * 4, base_width * 2, kernel_size=4, stride=2, padding=1, bias=False), 30 | nn.BatchNorm2d(base_width * 2), 31 | nn.ReLU(True), 32 | # state size: (batch_size, ngf * 2, 16, 16) 33 | nn.ConvTranspose2d(base_width * 2, base_width, kernel_size=4, stride=2, padding=1, bias=False), 34 | nn.BatchNorm2d(base_width), 35 | nn.ReLU(True), 36 | # state size : (batch_size, ngf, 32, 32) 37 | nn.ConvTranspose2d(base_width, in_channels, kernel_size=4, stride=2, padding=1, bias=False), 38 | nn.Tanh() 39 | # state size : (batch_size, nc, 64, 64) 40 | ) 41 | 42 | self.discriminator = nn.Sequential( 43 | # input size : (batch_size, nc, 64, 64) 44 | nn.Conv2d(in_channels, base_width, kernel_size=4, stride=2, padding=1, bias=False), 45 | nn.LeakyReLU(0.2, inplace=True), 46 | # state size : (batch_size, base_width, 32, 32) 47 | nn.Conv2d(base_width, base_width * 2, 4, 2, 1, bias=False), 48 | nn.BatchNorm2d(base_width * 2), 49 | nn.LeakyReLU(0.2, inplace=True), 50 | #state size : (batch_size, base_width * 2, 16, 16) 51 | nn.Conv2d(base_width * 2, base_width * 4, 4, 2, 1, bias=False), 52 | nn.BatchNorm2d(base_width * 4), 53 | nn.LeakyReLU(0.2, inplace=True), 54 | # state size : (batch_size, base_width * 4, 8, 8) 55 | nn.Conv2d(base_width * 4, base_width * 8, 4, 2, 1, bias=False), 56 | nn.BatchNorm2d(base_width * 8), 57 | nn.LeakyReLU(0.2, inplace=True), 58 | # state size : (batch_size, base_width * 8, 4, 4) 59 | nn.Conv2d(base_width * 8, 1, 4, 1, 0, bias=False), 60 | nn.Sigmoid(), 61 | # state size : (batch_size, 1, 1, 1) 62 | nn.Flatten() 63 | ) 64 | 65 | 66 | @export 67 | def dcgan( 68 | pretrained: bool = False, 69 | pth: str = None, 70 | progress: bool = True, 71 | **kwargs: Any 72 | ): 73 | model = DCGAN(**kwargs) 74 | 75 | if pretrained: 76 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 77 | return model 78 | -------------------------------------------------------------------------------- /cvm/models/googlenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, load_from_local_or_url 6 | from typing import Any, List, OrderedDict 7 | 8 | __all__ = ['inception_v1'] 9 | 10 | 11 | class InceptionBlock(blocks.ConcatBranches): 12 | def __init__( 13 | self, 14 | inp, 15 | planes_1x1: int, 16 | planes_3x3: List[int], 17 | planes_5x5: List[int], 18 | planes_pool: int 19 | ): 20 | super().__init__(OrderedDict([ 21 | ('branch-1x1', blocks.Conv2d1x1Block(inp, planes_1x1)), 22 | ('branch-3x3', nn.Sequential( 23 | blocks.Conv2d1x1Block(inp, planes_3x3[0]), 24 | blocks.Conv2dBlock(planes_3x3[0], planes_3x3[1]) 25 | )), 26 | ('branch-5x5', nn.Sequential( 27 | blocks.Conv2d1x1Block(inp, planes_5x5[0]), 28 | blocks.Conv2dBlock(planes_5x5[0], planes_5x5[1], kernel_size=5, padding=2) 29 | )), 30 | ('branch-pool', nn.Sequential( 31 | nn.MaxPool2d(3, stride=1, padding=1), 32 | blocks.Conv2d1x1Block(inp, planes_pool) 33 | )) 34 | ])) 35 | 36 | 37 | class InceptionAux(nn.Sequential): 38 | def __init__(self, inp, oup): 39 | super().__init__( 40 | nn.AdaptiveAvgPool2d((4, 4)), 41 | blocks.Conv2d1x1Block(inp, 128), 42 | nn.Flatten(1), 43 | nn.Linear(2048, 1024), 44 | nn.ReLU(inplace=True), 45 | nn.Dropout(0.7), 46 | nn.Linear(1024, oup) 47 | ) 48 | 49 | 50 | @export 51 | def googlenet(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 52 | model = GoogLeNet(**kwargs) 53 | 54 | if pretrained: 55 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 56 | return model 57 | 58 | 59 | inception_v1 = googlenet 60 | 61 | 62 | @export 63 | class GoogLeNet(nn.Module): 64 | def __init__( 65 | self, 66 | in_channels: int = 3, 67 | num_classes: int = 1000, 68 | thumbnail: bool = False, 69 | **kwargs: Any 70 | ): 71 | super().__init__() 72 | 73 | FRONT_S = 1 if thumbnail else 2 74 | 75 | self.stem = nn.Sequential( 76 | blocks.Conv2dBlock(in_channels, 64, 7, stride=FRONT_S, padding=3), 77 | nn.Identity() if thumbnail else nn.MaxPool2d(3, 2, ceil_mode=True) 78 | ) 79 | 80 | self.stage1 = nn.Sequential( 81 | blocks.Conv2d1x1Block(64, 64), 82 | blocks.Conv2dBlock(64, 192, 3, padding=1), 83 | nn.MaxPool2d(3, 2, ceil_mode=True) 84 | ) 85 | 86 | self.stage2 = nn.Sequential(OrderedDict([ 87 | ('inception_3a', InceptionBlock(192, 64, [96, 128], [16, 32], 32)), 88 | ('inception_3b', InceptionBlock(256, 128, [128, 192], [32, 96], 64)), 89 | ('max_pool', nn.MaxPool2d(3, 2, ceil_mode=True)) 90 | ])) 91 | 92 | self.stage3 = nn.Sequential(OrderedDict([ 93 | ('inception_4a', InceptionBlock(480, 192, [96, 208], [16, 48], 64)), 94 | ('inception_4b', InceptionBlock(512, 160, [112, 224], [24, 64], 64)), 95 | ('inception_4c', InceptionBlock(512, 128, [128, 256], [24, 64], 64)), 96 | ('inception_4d', InceptionBlock(512, 112, [144, 288], [32, 64], 64)), 97 | ('inception_4e', InceptionBlock(528, 256, [160, 320], [32, 128], 128)), 98 | ('max_pool', nn.MaxPool2d(3, 2, ceil_mode=True)) 99 | ])) 100 | 101 | self.stage4 = nn.Sequential(OrderedDict([ 102 | ('inception_5a', InceptionBlock(832, 256, [160, 320], [32, 128], 128)), 103 | ('inception_5b', InceptionBlock(832, 384, [192, 384], [48, 128], 128)) 104 | ])) 105 | 106 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 107 | 108 | self.classifiar = nn.Sequential( 109 | nn.Dropout(0.4), 110 | nn.Linear(1024, num_classes) 111 | ) 112 | 113 | self.aux1 = InceptionAux(512, num_classes) 114 | self.aux2 = InceptionAux(528, num_classes) 115 | 116 | def forward(self, x): 117 | x = self.stem(x) 118 | 119 | x = self.stage1(x) 120 | x = self.stage2(x) 121 | 122 | x = self.stage3.inception_4a(x) 123 | aux1 = self.aux1(x) if self.training else None 124 | x = self.stage3.inception_4b(x) 125 | x = self.stage3.inception_4c(x) 126 | x = self.stage3.inception_4d(x) 127 | aux2 = self.aux2(x) if self.training else None 128 | x = self.stage3.inception_4e(x) 129 | 130 | x = self.stage3.max_pool(x) 131 | 132 | x = self.stage4(x) 133 | 134 | x = self.pool(x) 135 | x = torch.flatten(x, 1) 136 | x = self.classifiar(x) 137 | 138 | if self.training: 139 | return x, aux1, aux2 140 | else: 141 | return x 142 | -------------------------------------------------------------------------------- /cvm/models/inception_v3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .ops import blocks 4 | from .utils import export, load_from_local_or_url 5 | from typing import Any, List, OrderedDict 6 | 7 | 8 | # Figure 5 9 | class InceptionBlockV5(blocks.ConcatBranches): 10 | def __init__( 11 | self, 12 | inp, 13 | planes_1x1: int, 14 | planes_5x5: List[int], 15 | planes_3x3db: List[int], 16 | planes_pool: int 17 | ): 18 | super().__init__(OrderedDict([ 19 | ('branch-1x1', blocks.Conv2d1x1Block(inp, planes_1x1)), 20 | ('branch-5x5', nn.Sequential( 21 | blocks.Conv2d1x1Block(inp, planes_5x5[0]), 22 | blocks.Conv2dBlock(planes_5x5[0], planes_5x5[1], kernel_size=5, padding=2) 23 | )), 24 | ('branch-3x3db', nn.Sequential( 25 | blocks.Conv2d1x1Block(inp, planes_3x3db[0]), 26 | blocks.Conv2dBlock(planes_3x3db[0], planes_3x3db[1]), 27 | blocks.Conv2dBlock(planes_3x3db[1], planes_3x3db[1]) 28 | )), 29 | ('branch-pool', nn.Sequential( 30 | nn.AvgPool2d(3, stride=1, padding=1), 31 | blocks.Conv2d1x1Block(inp, planes_pool) 32 | )) 33 | ])) 34 | 35 | 36 | # Figure 6: blocks.InceptionB 37 | 38 | 39 | # Figure 7 40 | class InceptionBlockV7(blocks.ConcatBranches): 41 | def __init__( 42 | self, 43 | inp, 44 | planes_1x1: int, 45 | planes_3x3: List[int], 46 | planes_3x3db: List[int], 47 | planes_pool 48 | ) -> None: 49 | super().__init__(OrderedDict([ 50 | ('branch_1x1', blocks.Conv2d1x1Block(inp, planes_1x1)), 51 | ('branch-3x3', nn.Sequential( 52 | blocks.Conv2d1x1Block(inp, planes_3x3[0]), 53 | blocks.ConcatBranches(OrderedDict([ 54 | ('branch-3x3-1', blocks.Conv2dBlock( 55 | planes_3x3[0], planes_3x3[1], kernel_size=(1, 3), padding=(0, 1) 56 | )), 57 | ('branch-3x3-2', blocks.Conv2dBlock( 58 | planes_3x3[0], planes_3x3[1], kernel_size=(3, 1), padding=(1, 0) 59 | )) 60 | ])) 61 | )), 62 | ('branch-3x3db', nn.Sequential( 63 | blocks.Conv2d1x1Block(inp, planes_3x3db[0]), 64 | blocks.Conv2dBlock(planes_3x3db[0], planes_3x3db[1]), 65 | blocks.ConcatBranches(OrderedDict([ 66 | ('branch-3x3db-1', blocks.Conv2dBlock( 67 | planes_3x3db[1], planes_3x3db[1], kernel_size=(1, 3), padding=(0, 1) 68 | )), 69 | ('branch-3x3db-2', blocks.Conv2dBlock( 70 | planes_3x3db[1], planes_3x3db[1], kernel_size=(3, 1), padding=(1, 0) 71 | )) 72 | ])) 73 | )), 74 | ('branch-pool', nn.Sequential( 75 | nn.AvgPool2d(3, stride=1, padding=1), 76 | blocks.Conv2d1x1Block(inp, planes_pool) 77 | )) 78 | ])) 79 | 80 | 81 | class InceptionV3(nn.Module): 82 | r""" 83 | Paper: Rethinking the Inception Architecture for Computer Vision, https://arxiv.org/abs/1512.00567 84 | Code: https://github.com/keras-team/keras/blob/master/keras/applications/inception_v3.py 85 | """ 86 | 87 | def __init__( 88 | self, 89 | in_channels: int = 3, 90 | num_classes: int = 1000, 91 | dropout_rate: float = 0.2, 92 | thumbnail: bool = False, 93 | **kwargs: Any 94 | ) -> None: 95 | super().__init__() 96 | 97 | self.stem = blocks.Conv2dBlock(in_channels, 32, kernel_size=3, stride=2, padding=0) 98 | 99 | self.stage1 = blocks.Stage( 100 | blocks.Conv2dBlock(32, 32, kernel_size=3, padding=0), 101 | blocks.Conv2dBlock(32, 64, kernel_size=3, padding=1), 102 | nn.MaxPool2d(kernel_size=3, stride=2) 103 | ) 104 | 105 | self.stage2 = blocks.Stage( 106 | blocks.Conv2d1x1Block(64, 80), 107 | blocks.Conv2dBlock(80, 192, kernel_size=3, padding=0), 108 | nn.MaxPool2d(kernel_size=3, stride=2) 109 | ) 110 | 111 | self.stage3 = blocks.Stage( 112 | InceptionBlockV5(192, 64, [48, 64], [64, 96], 32), # mix 0: 35 x 35 x 256 113 | InceptionBlockV5(256, 64, [48, 64], [64, 96], 64), # mix 1: 35 x 35 x 288 114 | InceptionBlockV5(288, 64, [48, 64], [64, 96], 64), # mix 2: 35 x 35 x 288 115 | blocks.ReductionA(288, 384, [64, 96, 96]) # mix 3: 17 x 17 x 768 116 | ) 117 | 118 | self.stage4 = blocks.Stage( 119 | blocks.InceptionB(768, 192, [128, 128, 192], [128, 128, 192], 192), # mix 4: 17 x 17 x 768 120 | blocks.InceptionB(768, 192, [160, 160, 192], [160, 160, 192], 192), # mix 5: 17 x 17 x 768 121 | blocks.InceptionB(768, 192, [160, 160, 192], [160, 160, 192], 192), # mix 6: 17 x 17 x 768 122 | blocks.InceptionB(768, 192, [192, 192, 192], [192, 192, 192], 192), # mix 7: 17 x 17 x 768 123 | blocks.ReductionB(768, [192, 320], [192, 192]) # mix 8: 17 x 17 x 1280 124 | ) 125 | 126 | self.stage5 = blocks.Stage( 127 | InceptionBlockV7(1280, 320, [384, 384], [448, 384], 192), # mixed 9: 8 x 8 x 2048 128 | InceptionBlockV7(2048, 320, [384, 384], [448, 384], 192), # mixed 9: 8 x 8 x 2048 129 | ) 130 | 131 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 132 | self.classifer = nn.Sequential( 133 | nn.Dropout(dropout_rate, inplace=True), 134 | nn.Linear(2048, num_classes) 135 | ) 136 | 137 | def forward(self, x): 138 | x = self.stem(x) 139 | x = self.stage1(x) 140 | x = self.stage2(x) 141 | x = self.stage3(x) 142 | x = self.stage4(x) 143 | x = self.stage5(x) 144 | x = self.pool(x) 145 | x = torch.flatten(x, start_dim=1) 146 | x = self.classifer(x) 147 | return x 148 | 149 | 150 | @export 151 | def inception_v3(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 152 | model = InceptionV3(**kwargs) 153 | 154 | if pretrained: 155 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 156 | return model 157 | -------------------------------------------------------------------------------- /cvm/models/mlp_mixer.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import torch 3 | import torch.nn as nn 4 | from .ops import blocks 5 | from .utils import export, config, load_from_local_or_url 6 | from typing import Any 7 | 8 | 9 | class MixerBlock(nn.Module): 10 | def __init__( 11 | self, 12 | hidden_dim, 13 | sequence_len, 14 | ratio=(0.5, 4.0), 15 | normalizer_fn: nn.Module = partial(nn.LayerNorm, eps=1e-6), 16 | dropout_rate: float = 0., 17 | drop_path_rate: float = 0. 18 | ): 19 | super().__init__() 20 | 21 | self.norm1 = normalizer_fn(hidden_dim) 22 | self.token_mixing = blocks.MlpBlock(sequence_len, int(hidden_dim * ratio[0]), dropout_rate=dropout_rate) 23 | self.drop1 = blocks.StochasticDepth(1. - drop_path_rate) 24 | 25 | self.norm2 = normalizer_fn(hidden_dim) 26 | self.channel_mixing = blocks.MlpBlock(hidden_dim, int(hidden_dim * ratio[1]), dropout_rate=dropout_rate) 27 | self.drop2 = blocks.StochasticDepth(1. - drop_path_rate) 28 | 29 | def forward(self, x): 30 | x = x + self.drop1(self.token_mixing(self.norm1(x).transpose(1, 2)).transpose(1, 2)) 31 | x = x + self.drop2(self.channel_mixing(self.norm2(x))) 32 | return x 33 | 34 | 35 | @export 36 | class Mixer(nn.Module): 37 | r''' 38 | See: https://github.com/google-research/vision_transformer/blob/main/vit_jax/models_mixer.py 39 | ''' 40 | 41 | def __init__( 42 | self, 43 | image_size: int = 224, 44 | in_channels: int = 3, 45 | num_classes: int = 1000, 46 | patch_size: int = 32, 47 | hidden_dim: int = 768, 48 | num_blocks: int = 12, 49 | dropout_rate: float = 0., 50 | drop_path_rate: float = 0., 51 | **kwargs: Any 52 | ): 53 | super().__init__() 54 | 55 | self.num_blocks = num_blocks 56 | self.num_patches = (image_size // patch_size) ** 2 57 | 58 | self.stem = nn.Conv2d(in_channels, hidden_dim, 59 | kernel_size=patch_size, stride=patch_size) 60 | self.mixer = nn.Sequential( 61 | *[ 62 | MixerBlock( 63 | hidden_dim, self.num_patches, dropout_rate=dropout_rate, drop_path_rate=drop_path_rate 64 | ) for _ in range(self.num_blocks) 65 | ] 66 | ) 67 | self.norm = nn.LayerNorm(hidden_dim) 68 | 69 | self.head = nn.Linear(hidden_dim, num_classes) 70 | 71 | def forward(self, x): 72 | x = self.stem(x) 73 | # n c h w -> n p c 74 | x = x.flatten(2).transpose(1, 2) 75 | x = self.mixer(x) 76 | x = self.norm(x) 77 | x = x.mean(dim=1) 78 | x = self.head(x) 79 | 80 | return x 81 | 82 | 83 | def _mixer( 84 | image_size: int = 224, 85 | patch_size: int = 32, 86 | hidden_dim: int = 768, 87 | num_blocks: int = 12, 88 | pretrained: bool = False, 89 | pth: str = None, 90 | progress: bool = True, 91 | **kwargs: Any 92 | ): 93 | model = Mixer(image_size, patch_size=patch_size, 94 | hidden_dim=hidden_dim, num_blocks=num_blocks, **kwargs) 95 | 96 | if pretrained: 97 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 98 | return model 99 | 100 | 101 | @export 102 | def mixer_s32_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 103 | return _mixer(224, 32, 512, 8, pretrained, pth, progress, **kwargs) 104 | 105 | 106 | @export 107 | def mixer_s16_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 108 | return _mixer(224, 16, 512, 8, pretrained, pth, progress, **kwargs) 109 | 110 | 111 | @export 112 | def mixer_b32_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 113 | return _mixer(224, 32, 768, 12, pretrained, pth, progress, **kwargs) 114 | 115 | 116 | @export 117 | def mixer_b16_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 118 | return _mixer(224, 16, 768, 12, pretrained, pth, progress, **kwargs) 119 | 120 | 121 | @export 122 | def mixer_l32_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 123 | return _mixer(224, 32, 1024, 24, pretrained, pth, progress, **kwargs) 124 | 125 | 126 | @export 127 | def mixer_l16_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 128 | return _mixer(224, 16, 1024, 24, pretrained, pth, progress, **kwargs) 129 | 130 | 131 | @export 132 | def mixer_h14_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 133 | return _mixer(224, 14, 1280, 32, pretrained, pth, progress, **kwargs) 134 | -------------------------------------------------------------------------------- /cvm/models/mnasnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, config, load_from_local_or_url 6 | from typing import Any 7 | 8 | 9 | # Paper suggests 0.99 momentum 10 | _BN_MOMENTUM = 0.01 11 | 12 | 13 | @export 14 | class MnasNet(nn.Module): 15 | def __init__( 16 | self, 17 | in_channels: int = 3, 18 | num_classes: int = 1000, 19 | dropout_rate: float = 0.2, 20 | thumbnail: bool = False, 21 | **kwargs: Any 22 | ): 23 | super().__init__() 24 | 25 | FRONT_S = 1 if thumbnail else 2 26 | 27 | t = [1, 6, 3, 6, 6, 6, 6] 28 | c = [32, 16, 24, 40, 80, 112, 160, 320, 1280] 29 | n = [1, 2, 3, 4, 2, 3, 1] # repeats 30 | s = [1, FRONT_S, 2, 2, 1, 2, 1] 31 | k = [3, 3, 5, 3, 3, 5, 3] 32 | se = [0, 0, 0.25, 0, 0.25, 0.25, 0] 33 | 34 | features = [blocks.Conv2dBlock(in_channels, c[0], 3, stride=FRONT_S)] 35 | 36 | for i in range(len(t)): 37 | features.append( 38 | self.make_layers(c[i], t[i], c[i+1], n[i], s[i], k[i], se[i]) 39 | ) 40 | 41 | features.append(blocks.Conv2d1x1Block(c[-2], c[-1])) 42 | 43 | self.features = nn.Sequential(*features) 44 | 45 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 46 | self.classifier = nn.Sequential( 47 | nn.Dropout(dropout_rate, inplace=True), 48 | nn.Linear(c[-1], num_classes) 49 | ) 50 | 51 | @staticmethod 52 | def make_layers( 53 | inp: int, 54 | t: int, 55 | oup: int, 56 | n: int, 57 | stride: int, 58 | kernel_size: int = 3, 59 | rd_ratio: float = None 60 | ): 61 | layers = [blocks.InvertedResidualBlock(inp, oup, t, kernel_size, stride, rd_ratio=rd_ratio)] 62 | 63 | for _ in range(n - 1): 64 | layers.append(blocks.InvertedResidualBlock(oup, oup, t, kernel_size, rd_ratio=rd_ratio)) 65 | 66 | return blocks.Stage(layers) 67 | 68 | def forward(self, x): 69 | x = self.features(x) 70 | x = self.pool(x) 71 | x = torch.flatten(x, 1) 72 | x = self.classifier(x) 73 | return x 74 | 75 | 76 | @export 77 | def mnasnet_a1(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 78 | model = MnasNet(**kwargs) 79 | 80 | if pretrained: 81 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 82 | return model 83 | -------------------------------------------------------------------------------- /cvm/models/mobilenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, config, load_from_local_or_url 6 | from typing import Any, OrderedDict, Type, Union, List 7 | 8 | 9 | class MobileBlock(nn.Sequential): 10 | def __init__( 11 | self, 12 | inp, 13 | oup, 14 | kernel_size: int = 3, 15 | stride: int = 1, 16 | padding: int = None, 17 | dilation: int = 1, 18 | groups: int = 1 19 | ): 20 | super().__init__( 21 | blocks.DepthwiseBlock(inp, inp, kernel_size, stride, padding, dilation=dilation), 22 | blocks.PointwiseBlock(inp, oup, groups=groups) 23 | ) 24 | 25 | 26 | class DepthwiseSeparableBlock(nn.Sequential): 27 | def __init__( 28 | self, 29 | inp, 30 | oup, 31 | kernel_size: int = 3, 32 | stride: int = 1, 33 | padding: int = None, 34 | dilation: int = 1, 35 | groups: int = 1 36 | ): 37 | super().__init__( 38 | blocks.DepthwiseConv2d(inp, inp, kernel_size, stride, padding, dilation=dilation), 39 | blocks.PointwiseBlock(inp, oup, groups=groups) 40 | ) 41 | 42 | 43 | @export 44 | class MobileNet(nn.Module): 45 | '''https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py''' 46 | 47 | def __init__( 48 | self, 49 | in_channels: int = 3, 50 | num_classes: int = 1000, 51 | base_width: int = 32, 52 | block: Type[Union[MobileBlock, DepthwiseSeparableBlock]] = MobileBlock, 53 | depth_multiplier: float = 1.0, 54 | dropout_rate: float = 0.2, 55 | dilations: List[int] = None, 56 | thumbnail: bool = False, 57 | **kwargs: Any 58 | ): 59 | super().__init__() 60 | 61 | def depth(d): return max(int(d * depth_multiplier), 8) 62 | 63 | dilations = dilations or [1, 1, 1, 1] 64 | assert len(dilations) == 4, '' 65 | 66 | FRONT_S = 1 if thumbnail else 2 67 | 68 | layers = [2, 2, 6, 2] 69 | strides = [FRONT_S, 2, 2, 2] 70 | 71 | self.features = nn.Sequential(OrderedDict([ 72 | ('stem', blocks.Stage( 73 | blocks.Conv2dBlock(in_channels, depth(base_width), stride=FRONT_S), 74 | block(depth(base_width), depth(base_width) * 2) 75 | )) 76 | ])) 77 | 78 | for stage, stride in enumerate(strides): 79 | inp = depth(base_width * 2 ** (stage + 1)) 80 | oup = depth(base_width * 2 ** (stage + 2)) 81 | 82 | self.features.add_module(f'stage{stage+1}', blocks.Stage( 83 | [block( 84 | inp if i == 0 else oup, 85 | oup, 86 | stride=stride if (i == 0 and dilations[stage] == 1) else 1, 87 | dilation=max(dilations[stage] // (stride if i == 0 else 1), 1) 88 | ) for i in range(layers[stage])] 89 | )) 90 | 91 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 92 | self.classifier = nn.Sequential( 93 | nn.Dropout(dropout_rate, inplace=True), 94 | nn.Linear(oup, num_classes) 95 | ) 96 | 97 | def forward(self, x): 98 | x = self.features(x) 99 | x = self.pool(x) 100 | x = torch.flatten(x, 1) 101 | x = self.classifier(x) 102 | return x 103 | 104 | 105 | def _mobilenet_v1( 106 | depth_multiplier: float = 1.0, 107 | block: Type[Union[MobileBlock, DepthwiseSeparableBlock]] = MobileBlock, 108 | pretrained: bool = False, 109 | pth: str = None, 110 | progress: bool = True, 111 | **kwargs: Any 112 | ): 113 | model = MobileNet(depth_multiplier=depth_multiplier, block=block, **kwargs) 114 | 115 | if pretrained: 116 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 117 | return model 118 | 119 | 120 | @export 121 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x1_0-e00006ef.pth') 122 | def mobilenet_v1_x1_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 123 | return _mobilenet_v1(1.0, MobileBlock, pretrained, pth, progress, **kwargs) 124 | 125 | 126 | @export 127 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x0_75-43c1cb04.pth') 128 | def mobilenet_v1_x0_75(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 129 | return _mobilenet_v1(0.75, MobileBlock, pretrained, pth, progress, **kwargs) 130 | 131 | 132 | @export 133 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x0_5-588ee141.pth') 134 | def mobilenet_v1_x0_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 135 | return _mobilenet_v1(0.5, MobileBlock, pretrained, pth, progress, **kwargs) 136 | 137 | 138 | @export 139 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x0_35-cbab38a6.pth') 140 | def mobilenet_v1_x0_35(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 141 | return _mobilenet_v1(0.35, MobileBlock, pretrained, pth, progress, **kwargs) 142 | 143 | 144 | @export 145 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x1_0_wo_dwrelubn-2956d795.pth') 146 | @blocks.normalizer(position='after') 147 | def mobilenet_v1_x1_0_wo_dwrelubn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs): 148 | return _mobilenet_v1(1.0, DepthwiseSeparableBlock, pretrained, pth, progress, **kwargs) 149 | -------------------------------------------------------------------------------- /cvm/models/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import torch 3 | import torch.nn as nn 4 | 5 | from .ops import blocks 6 | from .utils import export, config, load_from_local_or_url 7 | from .ops.functional import make_divisible 8 | from typing import Any, OrderedDict, List 9 | 10 | 11 | @export 12 | class MobileNetV2(nn.Module): 13 | @blocks.activation(partial(nn.ReLU6, inplace=True)) 14 | def __init__( 15 | self, 16 | in_channels: int = 3, 17 | num_classes: int = 1000, 18 | multiplier: float = 1.0, 19 | dropout_rate: float = 0.2, 20 | dilations: List[int] = None, 21 | thumbnail: bool = False, 22 | **kwargs: Any 23 | ): 24 | super().__init__() 25 | 26 | dilations = [1] + (dilations or [1, 1, 1, 1]) 27 | assert len(dilations) == 5, '' 28 | 29 | self.block = blocks.InvertedResidualBlock 30 | 31 | FRONT_S = 1 if thumbnail else 2 32 | 33 | t = [1, 6, 6, 6, 6, 6, 6] 34 | c = [32, 16, 24, 32, 64, 96, 160, 320] 35 | n = [1, 2, 3, 4, 3, 3, 1] 36 | s = [1, FRONT_S, 2, 2, 1, 2, 1] 37 | stages = [0, 1, 1, 1, 0, 1, 0] 38 | 39 | if multiplier < 1.0: 40 | c = [make_divisible(x * multiplier, 8) for x in c] 41 | 42 | self.features = nn.Sequential(OrderedDict([ 43 | ('stem', blocks.Stage( 44 | blocks.Conv2dBlock(in_channels, c[0], 3, stride=FRONT_S) 45 | )) 46 | ])) 47 | 48 | for i in range(len(t)): 49 | layers = self.make_layers( 50 | c[i], 51 | t[i], 52 | c[i+1], 53 | n[i], 54 | s[i], 55 | dilations[len(self.features) + (stages[i] - 1)] 56 | ) 57 | 58 | if stages[i]: 59 | self.features.add_module(f'stage{len(self.features)}', blocks.Stage(layers)) 60 | else: 61 | self.features[-1].append(layers) 62 | 63 | self.features[-1].append(blocks.Conv2d1x1Block(c[-1], 1280)) 64 | 65 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 66 | self.classifier = nn.Sequential( 67 | nn.Dropout(dropout_rate, inplace=True), 68 | nn.Linear(1280, num_classes) 69 | ) 70 | 71 | def make_layers(self, inp: int, t: int, oup: int, n: int, stride: int, dilation: int): 72 | layers = [ 73 | self.block( 74 | inp, 75 | oup, 76 | t, 77 | stride=stride if dilation == 1 else 1, 78 | dilation=max(dilation // stride, 1) 79 | ) 80 | ] 81 | 82 | for _ in range(n - 1): 83 | layers.append(self.block(oup, oup, t, dilation=dilation)) 84 | 85 | return layers 86 | 87 | def forward(self, x): 88 | x = self.features(x) 89 | x = self.pool(x) 90 | x = torch.flatten(x, 1) 91 | x = self.classifier(x) 92 | 93 | return x 94 | 95 | 96 | def _mobilenet_v2( 97 | multiplier: float = 1.0, 98 | pretrained: bool = False, 99 | pth: str = None, 100 | progress: bool = True, 101 | **kwargs: Any 102 | ): 103 | model = MobileNetV2(multiplier=multiplier, **kwargs) 104 | 105 | if pretrained: 106 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 107 | return model 108 | 109 | 110 | @export 111 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v2_x1_0-bf342af4.pth') 112 | def mobilenet_v2_x1_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 113 | return _mobilenet_v2(1.0, pretrained, pth, progress, **kwargs) 114 | 115 | 116 | @export 117 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v2_x0_75-fdfaf351.pth') 118 | def mobilenet_v2_x0_75(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 119 | return _mobilenet_v2(0.75, pretrained, pth, progress, **kwargs) 120 | 121 | 122 | @export 123 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v2_x0_5-a9d4ed71.pth') 124 | def mobilenet_v2_x0_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 125 | return _mobilenet_v2(0.5, pretrained, pth, progress, **kwargs) 126 | 127 | 128 | @export 129 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v2_x0_35-9bce1f31.pth') 130 | def mobilenet_v2_x0_35(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 131 | return _mobilenet_v2(0.35, pretrained, pth, progress, **kwargs) 132 | -------------------------------------------------------------------------------- /cvm/models/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .blocks import * 2 | 3 | from . import functional -------------------------------------------------------------------------------- /cvm/models/ops/blocks/__init__.py: -------------------------------------------------------------------------------- 1 | from .factory import normalizer, activation, normalizer_fn, activation_fn, norm_activation, attention, attention_fn, Nil 2 | from .stage import Stage 3 | from .affine import Affine, Scale 4 | from .vanilla_conv2d import Conv2d1x1, Conv2d3x3, Conv2d1x1BN, Conv2d3x3BN, Conv2d1x1Block, Conv2dBlock 5 | from .bottleneck import ResBasicBlockV1, BottleneckV1, ResBasicBlockV2, BottleneckV2 6 | from .inception import InceptionA, InceptionB, InceptionC, ReductionA, ReductionB, ReductionC, InceptionResNetA, InceptionResNetB, InceptionResNetC 7 | from .channel import Combine, ChannelChunk, ChannelSplit, ChannelShuffle, ConcatBranches, Permute 8 | from .depthwise_separable_conv2d import DepthwiseConv2d, PointwiseConv2d, DepthwiseConv2dBN, PointwiseConv2dBN, DepthwiseBlock, PointwiseBlock 9 | from .inverted_residual_block import InvertedResidualBlock, FusedInvertedResidualBlock 10 | from .squeeze_excite import se, SEBlock 11 | from .mlp import MlpBlock 12 | from .drop import StochasticDepth 13 | from .gaussian_blur import GaussianBlur, GaussianBlurBN, GaussianBlurBlock 14 | from .aspp import ASPP, ASPPPooling 15 | from .adder import adder2d, adder, adder2d_function 16 | from .non_local import NonLocalBlock 17 | from .interpolate import Interpolate 18 | from .gather_excite import GatherExciteBlock 19 | from .selective_kernel import SelectiveKernelBlock 20 | from .cbam import CBAM 21 | from .efficient_channel_attention import EfficientChannelAttention 22 | from .norm import LayerNorm2d 23 | from .global_context import GlobalContextBlock -------------------------------------------------------------------------------- /cvm/models/ops/blocks/adder.py: -------------------------------------------------------------------------------- 1 | ''' 2 | refer to: https://github.com/huawei-noah/AdderNet/blob/master/adder.py 3 | 4 | Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of BSD 3-Clause License. 7 | This program is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | BSD 3-Clause License for more details. 11 | ''' 12 | import torch 13 | import torch.nn as nn 14 | import numpy as np 15 | from torch.autograd import Function 16 | import math 17 | 18 | 19 | def adder2d_function(X, W, stride=1, padding=0): 20 | n_filters, d_filter, h_filter, w_filter = W.size() 21 | n_x, d_x, h_x, w_x = X.size() 22 | 23 | h_out = (h_x - h_filter + 2 * padding) / stride + 1 24 | w_out = (w_x - w_filter + 2 * padding) / stride + 1 25 | 26 | h_out, w_out = int(h_out), int(w_out) 27 | X_col = torch.nn.functional.unfold(X.view(1, -1, h_x, w_x), h_filter, dilation=1, 28 | padding=padding, stride=stride).view(n_x, -1, h_out*w_out) 29 | X_col = X_col.permute(1, 2, 0).contiguous().view(X_col.size(1), -1) 30 | W_col = W.view(n_filters, -1) 31 | 32 | out = adder.apply(W_col, X_col) 33 | 34 | out = out.view(n_filters, h_out, w_out, n_x) 35 | out = out.permute(3, 0, 1, 2).contiguous() 36 | 37 | return out 38 | 39 | 40 | class adder(Function): 41 | @staticmethod 42 | def forward(ctx, W_col, X_col): 43 | ctx.save_for_backward(W_col, X_col) 44 | output = -(W_col.unsqueeze(2)-X_col.unsqueeze(0)).abs().sum(1) 45 | return output 46 | 47 | @staticmethod 48 | def backward(ctx, grad_output): 49 | W_col, X_col = ctx.saved_tensors 50 | grad_W_col = ((X_col.unsqueeze(0)-W_col.unsqueeze(2))*grad_output.unsqueeze(1)).sum(2) 51 | grad_W_col = grad_W_col/grad_W_col.norm(p=2).clamp(min=1e-12)*math.sqrt(W_col.size(1)*W_col.size(0))/5 52 | grad_X_col = (-(X_col.unsqueeze(0)-W_col.unsqueeze(2)).clamp(-1, 1)*grad_output.unsqueeze(1)).sum(0) 53 | 54 | return grad_W_col, grad_X_col 55 | 56 | 57 | class adder2d(nn.Module): 58 | 59 | def __init__(self, input_channel, output_channel, kernel_size, stride=1, padding=0, bias=False): 60 | super(adder2d, self).__init__() 61 | self.stride = stride 62 | self.padding = padding 63 | self.input_channel = input_channel 64 | self.output_channel = output_channel 65 | self.kernel_size = kernel_size 66 | self.adder = torch.nn.Parameter(nn.init.normal_(torch.randn( 67 | output_channel, input_channel, kernel_size, kernel_size))) 68 | self.bias = bias 69 | if bias: 70 | self.b = torch.nn.Parameter(nn.init.uniform_(torch.zeros(output_channel))) 71 | 72 | def forward(self, x): 73 | output = adder2d_function(x, self.adder, self.stride, self.padding) 74 | if self.bias: 75 | output += self.b.unsqueeze(0).unsqueeze(2).unsqueeze(3) 76 | 77 | return output 78 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/affine.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class Scale(nn.Module): 6 | def __init__(self, dim, alpha: float = 1e-6): 7 | super().__init__() 8 | 9 | self.dim = dim 10 | 11 | self.alpha = nn.Parameter(torch.ones(dim, 1, 1).fill_(alpha)) 12 | 13 | def forward(self, x): 14 | return self.alpha * x 15 | 16 | def extra_repr(self): 17 | return f'{self.dim}' 18 | 19 | 20 | class Affine(nn.Module): 21 | def __init__(self, dim, alpha: float = 1.0, beta: float = 0.0): 22 | super().__init__() 23 | 24 | self.dim = dim 25 | 26 | self.alpha = nn.Parameter(torch.empty(dim, 1, 1).fill_(alpha)) 27 | self.beta = nn.Parameter(torch.empty(dim, 1, 1).fill_(beta)) 28 | 29 | def forward(self, x): 30 | return self.alpha * x + self.beta 31 | 32 | def extra_repr(self): 33 | return f'{self.dim}' 34 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/aspp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .vanilla_conv2d import Conv2d1x1, Conv2d1x1Block, Conv2dBlock 5 | from .channel import Combine 6 | from typing import List 7 | 8 | 9 | class ASPPPooling(nn.Sequential): 10 | def __init__(self, in_channels: int, out_channels: int): 11 | super().__init__( 12 | nn.AdaptiveAvgPool2d(1), 13 | Conv2d1x1Block(in_channels, out_channels) 14 | ) 15 | 16 | def forward(self, x): 17 | size = x.shape[-2:] 18 | for mod in self: 19 | x = mod(x) 20 | return F.interpolate(x, size=size, mode="bilinear", align_corners=False) 21 | 22 | 23 | class ASPP(nn.Module): 24 | """Atrous Spatial Pyramid Pooling""" 25 | 26 | def __init__( 27 | self, 28 | in_channels: int, 29 | out_channels: int = 256, 30 | rates: List[int] = [6, 12, 18] 31 | ): 32 | super().__init__() 33 | 34 | ms = [Conv2d1x1Block(in_channels, out_channels)] 35 | for rate in rates: 36 | ms.append(Conv2dBlock(in_channels, out_channels, padding=rate, dilation=rate)) 37 | 38 | ms.append(ASPPPooling(in_channels, out_channels)) 39 | self.ms = nn.ModuleList(ms) 40 | 41 | self.combine = Combine('CONCAT') 42 | self.conv1x1 = Conv2d1x1(out_channels * len(self.ms), out_channels) 43 | 44 | def forward(self, x): 45 | aspp = [] 46 | for module in self.ms: 47 | aspp.append(module(x)) 48 | 49 | x = self.combine(aspp) 50 | x = self.conv1x1(x) 51 | return x 52 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/cbam.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from .vanilla_conv2d import Conv2d1x1 4 | from .factory import normalizer_fn, activation_fn 5 | from ..functional import make_divisible 6 | 7 | 8 | class ChannelAttention(nn.Module): 9 | def __init__( 10 | self, 11 | in_channels, 12 | rd_ratio: float = 1/8, 13 | rd_divisor: int = 8, 14 | gate_fn: nn.Module = nn.Sigmoid 15 | ) -> None: 16 | super().__init__() 17 | 18 | rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor) 19 | 20 | self.max_pool = nn.AdaptiveMaxPool2d((1, 1)) 21 | self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) 22 | 23 | self.mlp = nn.Sequential( 24 | Conv2d1x1(in_channels, rd_channels, bias=True), 25 | activation_fn(), 26 | Conv2d1x1(rd_channels, in_channels, bias=True) 27 | ) 28 | self.gate = gate_fn() 29 | 30 | def forward(self, x): 31 | return x * self.gate(self.mlp(self.max_pool(x)) + self.mlp(self.avg_pool(x))) 32 | 33 | 34 | class SpatialAttention(nn.Module): 35 | def __init__( 36 | self, 37 | kernel_size: int = 7, 38 | gate_fn: nn.Module = nn.Sigmoid 39 | ) -> None: 40 | super().__init__() 41 | 42 | self.conv = nn.Conv2d(2, 1, kernel_size, padding=(kernel_size - 1) // 2, bias=False) 43 | self.norm = normalizer_fn(1) 44 | self.gate = gate_fn() 45 | 46 | def forward(self, x): 47 | s = torch.cat([torch.amax(x, dim=1, keepdim=True), torch.mean(x, dim=1, keepdim=True)], dim=1) 48 | return x * self.gate(self.norm(self.conv(s))) 49 | 50 | 51 | class CBAM(nn.Sequential): 52 | r""" 53 | Paper: CBAM: Convolutional Block Attention Module, https://arxiv.org/abs/1807.06521 54 | Code: https://github.com/Jongchan/attention-module 55 | """ 56 | 57 | def __init__( 58 | self, 59 | in_channels, 60 | rd_ratio, 61 | kernel_size: int = 7, 62 | gate_fn: nn.Module = nn.Sigmoid 63 | ) -> None: 64 | super().__init__( 65 | ChannelAttention(in_channels, rd_ratio, gate_fn=gate_fn), 66 | SpatialAttention(kernel_size=kernel_size, gate_fn=gate_fn) 67 | ) 68 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/channel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from ..functional import channel_shuffle 4 | from typing import List 5 | 6 | 7 | class ChannelChunk(nn.Module): 8 | def __init__(self, groups: int): 9 | super().__init__() 10 | 11 | self.groups = groups 12 | 13 | def forward(self, x: torch.Tensor): 14 | return torch.chunk(x, self.groups, dim=1) 15 | 16 | def extra_repr(self): 17 | return f'groups={self.groups}' 18 | 19 | 20 | class ChannelSplit(nn.Module): 21 | def __init__(self, sections): 22 | super().__init__() 23 | 24 | self.sections = sections 25 | 26 | def forward(self, x: torch.Tensor): 27 | return torch.split(x, self.sections, dim=1) 28 | 29 | def extra_repr(self): 30 | return f'sections={self.sections}' 31 | 32 | 33 | class ChannelShuffle(nn.Module): 34 | def __init__(self, groups: int): 35 | super().__init__() 36 | 37 | self.groups = groups 38 | 39 | def forward(self, x): 40 | return channel_shuffle(x, self.groups) 41 | 42 | def extra_repr(self): 43 | return 'groups={}'.format(self.groups) 44 | 45 | 46 | class Combine(nn.Module): 47 | def __init__(self, method: str = 'ADD', *args, **kwargs): 48 | super().__init__() 49 | assert method in ['ADD', 'CONCAT'], '' 50 | 51 | self.method = method 52 | self._combine = self._add if self.method == 'ADD' else self._cat 53 | 54 | @staticmethod 55 | def _add(x): 56 | return x[0] + x[1] 57 | 58 | @staticmethod 59 | def _cat(x): 60 | return torch.cat(x, dim=1) 61 | 62 | def forward(self, x): 63 | return self._combine(x) 64 | 65 | def extra_repr(self): 66 | return f'method=\'{self.method}\'' 67 | 68 | 69 | class ConcatBranches(nn.Sequential): 70 | def forward(self, x): 71 | res = [] 72 | for module in self: 73 | res.append(module(x)) 74 | return torch.cat(res, dim=1) 75 | 76 | 77 | class Permute(nn.Module): 78 | def __init__(self, dims: List[int]): 79 | super().__init__() 80 | self.dims = dims 81 | 82 | def forward(self, x): 83 | return x.permute(*self.dims) 84 | 85 | def extra_repr(self): 86 | return ', '.join([str(dim) for dim in self.dims]) 87 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/depthwise_separable_conv2d.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from . import factory 3 | 4 | 5 | class DepthwiseConv2d(nn.Conv2d): 6 | def __init__( 7 | self, 8 | inp, 9 | oup, 10 | kernel_size: int = 3, 11 | stride: int = 1, 12 | padding: int = None, 13 | dilation: int = 1, 14 | bias: bool = False, 15 | ): 16 | if padding is None: 17 | padding = ((kernel_size - 1) * (dilation - 1) + kernel_size) // 2 18 | 19 | super().__init__( 20 | inp, oup, kernel_size, stride=stride, 21 | padding=padding, dilation=dilation, bias=bias, groups=inp 22 | ) 23 | 24 | 25 | class PointwiseConv2d(nn.Conv2d): 26 | def __init__( 27 | self, 28 | inp, 29 | oup, 30 | stride: int = 1, 31 | bias: bool = False, 32 | groups: int = 1 33 | ): 34 | super().__init__(inp, oup, 1, stride=stride, padding=0, bias=bias, groups=groups) 35 | 36 | 37 | class DepthwiseConv2dBN(nn.Sequential): 38 | def __init__( 39 | self, 40 | inp, 41 | oup, 42 | kernel_size: int = 3, 43 | stride: int = 1, 44 | padding: int = None, 45 | dilation: int = 1, 46 | normalizer_fn: nn.Module = None 47 | ): 48 | normalizer_fn = normalizer_fn or factory._NORMALIZER 49 | 50 | super().__init__( 51 | DepthwiseConv2d(inp, oup, kernel_size, stride=stride, padding=padding, dilation=dilation) 52 | ) 53 | 54 | if normalizer_fn: 55 | self.add_module(str(self.__len__()), normalizer_fn(oup)) 56 | 57 | 58 | class PointwiseConv2dBN(nn.Sequential): 59 | def __init__( 60 | self, 61 | inp, 62 | oup, 63 | stride: int = 1, 64 | normalizer_fn: nn.Module = None 65 | ): 66 | normalizer_fn = normalizer_fn or factory._NORMALIZER 67 | 68 | super().__init__( 69 | PointwiseConv2d(inp, oup, stride=stride) 70 | ) 71 | 72 | if normalizer_fn: 73 | self.add_module(str(self.__len__()), normalizer_fn(oup)) 74 | 75 | 76 | class DepthwiseBlock(nn.Sequential): 77 | def __init__( 78 | self, 79 | inp, 80 | oup, 81 | kernel_size: int = 3, 82 | stride: int = 1, 83 | padding: int = None, 84 | dilation: int = 1, 85 | normalizer_fn: nn.Module = None, 86 | activation_fn: nn.Module = None, 87 | norm_position: str = None 88 | ): 89 | super().__init__( 90 | DepthwiseConv2d(inp, oup, kernel_size, stride, padding=padding, dilation=dilation), 91 | *factory.norm_activation(oup, normalizer_fn, activation_fn, norm_position) 92 | ) 93 | 94 | 95 | class PointwiseBlock(nn.Sequential): 96 | def __init__( 97 | self, 98 | inp, 99 | oup, 100 | stride: int = 1, 101 | groups: int = 1, 102 | normalizer_fn: nn.Module = None, 103 | activation_fn: nn.Module = None, 104 | norm_position: str = None, 105 | ): 106 | super().__init__( 107 | PointwiseConv2d(inp, oup, stride=stride, groups=groups), 108 | *factory.norm_activation(oup, normalizer_fn, activation_fn, norm_position) 109 | ) 110 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/drop.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class StochasticDepth(nn.Module): 6 | r"""Stochastic Depth: Drop paths per sample (when applied in main path of residual blocks) 7 | 8 | Paper: 9 | Deep Networks with Stochastic Depth, https://arxiv.org/abs/1603.09382 10 | """ 11 | 12 | def __init__(self, survival_prob: float): 13 | super().__init__() 14 | 15 | self.p = survival_prob 16 | 17 | def forward(self, x): 18 | if self.p == 1. or not self.training: 19 | return x 20 | 21 | # work with diff dim tensors, not just 2D ConvNets 22 | shape = (x.shape[0],) + (1,) * (x.ndim - 1) 23 | 24 | probs = self.p + torch.rand(shape, dtype=x.dtype, device=x.device) 25 | # We therefore need to re-calibrate the outputs of any given function f 26 | # by the expected number of times it participates in training, p. 27 | return (x / self.p) * probs.floor_() 28 | 29 | def extra_repr(self): 30 | return f'survival_prob={self.p}' 31 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/efficient_channel_attention.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class EfficientChannelAttention(nn.Module): 7 | r""" 8 | Paper: ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks, https://arxiv.org/abs/1910.03151 9 | """ 10 | def __init__( 11 | self, 12 | in_channels, 13 | gamma=2, 14 | beta=2 15 | ) -> None: 16 | super().__init__() 17 | 18 | t = int(abs((math.log(in_channels, 2) + beta) / gamma)) 19 | k = max(t if t % 2 else t + 1, 3) 20 | 21 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 22 | self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2) 23 | self.gate = nn.Sigmoid() 24 | 25 | def forward(self, x: torch.Tensor): 26 | y = self.pool(x) 27 | y = self.conv(y.view(y.shape[0], 1, -1)) 28 | y = y.view(y.shape[0], -1, 1, 1) 29 | y = self.gate(y) 30 | 31 | return x * y.expand_as(x) 32 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/factory.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from contextlib import contextmanager 3 | from functools import partial 4 | import torch.nn as nn 5 | from .squeeze_excite import SEBlock 6 | 7 | _NORM_POSIITON: str = 'before' 8 | _NORMALIZER: nn.Module = nn.BatchNorm2d 9 | _ACTIVATION: nn.Module = partial(nn.ReLU, inplace=True) 10 | _ATTENTION: nn.Module = SEBlock 11 | 12 | 13 | class Nil: 14 | ... 15 | 16 | 17 | @contextmanager 18 | def normalizer( 19 | # _NORMALIZER can be None, Nil: _NORMALIZER->_NORMALIZER, None: _NORMALIZER->None 20 | fn: nn.Module = Nil, 21 | position: str = None 22 | ): 23 | 24 | global _NORMALIZER, _NORM_POSIITON 25 | 26 | fn = _NORMALIZER if fn == Nil else fn 27 | position = position or _NORM_POSIITON 28 | 29 | _pre_normalizer = _NORMALIZER 30 | _pre_position = _NORM_POSIITON 31 | 32 | _NORMALIZER = fn 33 | _NORM_POSIITON = position 34 | 35 | yield 36 | 37 | _NORMALIZER = _pre_normalizer 38 | _NORM_POSIITON = _pre_position 39 | 40 | 41 | @contextmanager 42 | def activation(fn: nn.Module): 43 | global _ACTIVATION 44 | 45 | _pre_activation = _ACTIVATION 46 | _ACTIVATION = fn 47 | yield 48 | _ACTIVATION = _pre_activation 49 | 50 | 51 | @contextmanager 52 | def attention(fn: nn.Module): 53 | global _ATTENTION 54 | 55 | _pre_attn = _ATTENTION 56 | _ATTENTION = fn 57 | yield 58 | _ATTENTION = _pre_attn 59 | 60 | 61 | def normalizer_fn(channels): 62 | return _NORMALIZER(channels) 63 | 64 | 65 | def activation_fn(): 66 | return _ACTIVATION() 67 | 68 | 69 | def attention_fn(channels, **kwargs): 70 | return _ATTENTION(channels, **kwargs) 71 | 72 | 73 | def norm_activation( 74 | channels, 75 | normalizer_fn: nn.Module = None, 76 | activation_fn: nn.Module = None, 77 | norm_position: str = None 78 | ) -> List[nn.Module]: 79 | norm_position = norm_position or _NORM_POSIITON 80 | assert norm_position in ['before', 'after', 'none'], '' 81 | 82 | normalizer_fn = normalizer_fn or _NORMALIZER 83 | activation_fn = activation_fn or _ACTIVATION 84 | 85 | if normalizer_fn == None and activation_fn == None: 86 | return [] 87 | 88 | if normalizer_fn == None: 89 | return [activation_fn()] 90 | 91 | if activation_fn == None: 92 | return [normalizer_fn(channels)] 93 | 94 | if norm_position == 'after': 95 | return [activation_fn(), normalizer_fn(channels)] 96 | 97 | return [normalizer_fn(channels), activation_fn()] 98 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/gather_excite.py: -------------------------------------------------------------------------------- 1 | import math 2 | from functools import partial 3 | from contextlib import contextmanager 4 | from torch import nn 5 | from .depthwise_separable_conv2d import DepthwiseBlock, DepthwiseConv2dBN 6 | from .interpolate import Interpolate 7 | 8 | _GE_INNER_NONLINEAR: nn.Module = partial(nn.ReLU, inplace=True) 9 | _GE_GATING_FN: nn.Module = nn.Sigmoid 10 | 11 | 12 | @contextmanager 13 | def ge( 14 | inner_nonlinear: nn.Module = _GE_INNER_NONLINEAR, 15 | gating_fn: nn.Module = _GE_GATING_FN 16 | ): 17 | global _GE_INNER_NONLINEAR 18 | global _GE_GATING_FN 19 | 20 | _pre_inner_fn = _GE_INNER_NONLINEAR 21 | _pre_fn = _GE_GATING_FN 22 | 23 | _GE_INNER_NONLINEAR = inner_nonlinear 24 | _GE_GATING_FN = gating_fn 25 | 26 | yield 27 | 28 | _GE_INNER_NONLINEAR = _pre_inner_fn 29 | _GE_GATING_FN = _pre_fn 30 | 31 | 32 | class GatherExciteBlock(nn.Module): 33 | r"""Gather-Excite Block 34 | Paper: Gather-Excite: Exploiting Feature Context in Convolutional Neural Networks, https://arxiv.org/abs/1810.12348 35 | Code: https://github.com/hujie-frank/GENet 36 | """ 37 | 38 | def __init__( 39 | self, 40 | channels, 41 | extent_ratio: int = 0, 42 | param_free: bool = True, 43 | kernel_size: int = 3, 44 | inner_activation_fn: nn.Module = None, 45 | gating_fn: nn.Module = None 46 | ): 47 | super().__init__() 48 | 49 | inner_activation_fn = inner_activation_fn or _GE_INNER_NONLINEAR 50 | gating_fn = gating_fn or _GE_GATING_FN 51 | 52 | self.gather = nn.Sequential() 53 | 54 | if param_free is True: 55 | if extent_ratio == 0: 56 | self.gather = nn.AdaptiveAvgPool2d((1, 1)) 57 | else: 58 | self.gather = nn.AvgPool2d((15, 15), stride=extent_ratio) 59 | else: 60 | if extent_ratio == 0: 61 | self.gather.append(DepthwiseConv2dBN(channels, channels, kernel_size=kernel_size, padding=0)) 62 | else: 63 | for i in range(int(math.log2(extent_ratio))): 64 | if i != (int(math.log2(extent_ratio)) - 1): 65 | self.gather.append(DepthwiseBlock(channels, channels, kernel_size=kernel_size, 66 | stride=2, activation_fn=inner_activation_fn)) 67 | else: 68 | self.gather.append(DepthwiseConv2dBN(channels, channels, kernel_size=kernel_size, stride=2)) 69 | 70 | self.excite = Interpolate() 71 | self.gate = gating_fn() 72 | 73 | def _forward(self, x): 74 | size = x.shape[-2:] 75 | 76 | # gather 77 | x = self.gather(x) 78 | 79 | if x.shape[-1] != 1: 80 | x = self.excite(x, size) 81 | 82 | x = self.gate(x) 83 | 84 | return x 85 | 86 | def forward(self, x): 87 | return x * self._forward(x) 88 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/gaussian_blur.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from . import factory 5 | from ..functional import get_gaussian_kernels2d 6 | from typing import Tuple 7 | 8 | 9 | class GaussianBlur(nn.Module): 10 | def __init__( 11 | self, 12 | channels: int, 13 | kernel_size: int = 3, 14 | sigma_range: Tuple[float, float] = (1.0, 1.0), 15 | normalize: bool = True, 16 | stride: int = 1, 17 | padding: int = None, 18 | dilation: int = 1 19 | ): 20 | super().__init__() 21 | 22 | padding = padding or ((kernel_size - 1) * (dilation - 1) + kernel_size) // 2 23 | 24 | self.channels = channels 25 | self.kernel_size = (kernel_size, kernel_size) 26 | self.padding = (padding, padding) 27 | self.stride = (stride, stride) 28 | self.dilation = (dilation, dilation) 29 | self.padding_mode = 'zeros' 30 | self.sigma_range = sigma_range 31 | self.normalize = normalize 32 | 33 | self.register_buffer( 34 | 'weight', 35 | get_gaussian_kernels2d( 36 | kernel_size, 37 | torch.linspace(self.sigma_range[0], self.sigma_range[1], self.channels).view(-1, 1, 1, 1), 38 | self.normalize 39 | ) 40 | ) 41 | 42 | def forward(self, x): 43 | return F.conv2d(x, self.weight, None, self.stride, self.padding, self.dilation, self.channels) 44 | 45 | @property 46 | def out_channels(self): 47 | return self.channels 48 | 49 | def extra_repr(self): 50 | s = ('{channels}, kernel_size={kernel_size}' 51 | ', sigma_range={sigma_range}, normalize={normalize}, stride={stride}') 52 | if self.padding != (0,) * len(self.padding): 53 | s += ', padding={padding}' 54 | if self.dilation != (1,) * len(self.dilation): 55 | s += ', dilation={dilation}' 56 | if self.padding_mode != 'zeros': 57 | s += ', padding_mode={padding_mode}' 58 | return s.format(**self.__dict__) 59 | 60 | 61 | class GaussianBlurBN(nn.Sequential): 62 | def __init__( 63 | self, 64 | channels, 65 | kernel_size: int = 3, 66 | sigma_range: Tuple[float, float] = (1.0, 1.0), 67 | normalize: bool = True, 68 | stride: int = 1, 69 | padding: int = None, 70 | dilation: int = 1, 71 | normalizer_fn: nn.Module = None 72 | ): 73 | normalizer_fn = normalizer_fn or factory._NORMALIZER 74 | 75 | super().__init__( 76 | GaussianBlur(channels, kernel_size, sigma_range, normalize, 77 | stride=stride, padding=padding, dilation=dilation), 78 | normalizer_fn(channels) 79 | ) 80 | 81 | 82 | class GaussianBlurBlock(nn.Sequential): 83 | def __init__( 84 | self, 85 | channels, 86 | kernel_size: int = 3, 87 | sigma_range: Tuple[float, float] = (1.0, 1.0), 88 | normalize: bool = True, 89 | stride: int = 1, 90 | padding: int = None, 91 | dilation: int = 1, 92 | normalizer_fn: nn.Module = None, 93 | activation_fn: nn.Module = None, 94 | norm_position: str = None 95 | ): 96 | super().__init__( 97 | GaussianBlur(channels, kernel_size, sigma_range, normalize, 98 | stride=stride, padding=padding, dilation=dilation), 99 | *factory.norm_activation(channels, normalizer_fn, activation_fn, norm_position) 100 | ) 101 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/global_context.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from .vanilla_conv2d import Conv2d1x1 4 | from .norm import LayerNorm2d 5 | from ..functional import make_divisible 6 | 7 | 8 | class GlobalContextBlock(nn.Module): 9 | r""" 10 | Paper: GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond, https://arxiv.org/abs/1904.11492 11 | """ 12 | 13 | def __init__( 14 | self, 15 | in_channels, 16 | rd_ratio: float = 1/8, 17 | rd_divisor: int = 8, 18 | ) -> None: 19 | super().__init__() 20 | 21 | channels = make_divisible(in_channels * rd_ratio, rd_divisor) 22 | 23 | self.conv1x1 = Conv2d1x1(in_channels, 1, bias=True) 24 | self.softmax = nn.Softmax(dim=1) 25 | 26 | self.transform = nn.Sequential( 27 | Conv2d1x1(in_channels, channels), 28 | LayerNorm2d(channels), 29 | nn.ReLU(inplace=True), 30 | Conv2d1x1(channels, in_channels) 31 | ) 32 | 33 | def forward(self, x): 34 | # context modeling 35 | c = torch.einsum( 36 | "ncx, nxo -> nco", 37 | x.view(x.shape[0], x.shape[1], -1), 38 | self.softmax(self.conv1x1(x).view(x.shape[0], -1, 1)) 39 | ) 40 | c = x * c.unsqueeze(-1) 41 | 42 | # transform 43 | return x + self.transform(c) 44 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/interpolate.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch.nn.functional as F 3 | from typing import Optional, List 4 | 5 | 6 | class Interpolate(nn.Module): 7 | def __init__(self, mode='nearest') -> None: 8 | super().__init__() 9 | 10 | self.mode = mode 11 | 12 | def forward(self, x, size: Optional[int] = None, scale_factor: Optional[List[float]] = None): 13 | return F.interpolate(x, size=size, scale_factor=scale_factor, mode=self.mode) 14 | 15 | def extra_repr(self) -> str: 16 | return 'mode=' + self.mode 17 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/inverted_residual_block.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from . import factory 3 | from .vanilla_conv2d import Conv2d1x1Block, Conv2d1x1BN, Conv2dBlock 4 | from .depthwise_separable_conv2d import DepthwiseBlock, DepthwiseConv2dBN 5 | from .squeeze_excite import SEBlock 6 | from .channel import Combine 7 | from .drop import StochasticDepth 8 | 9 | 10 | class InvertedResidualBlock(nn.Module): 11 | def __init__( 12 | self, 13 | inp, 14 | oup, 15 | t, 16 | kernel_size: int = 3, 17 | stride: int = 1, 18 | padding: int = None, 19 | dilation: int = 1, 20 | rd_ratio: float = None, 21 | se_ind: bool = False, 22 | survival_prob: float = None, 23 | normalizer_fn: nn.Module = None, 24 | activation_fn: nn.Module = None, 25 | dw_se_act: nn.Module = None 26 | ): 27 | super().__init__() 28 | 29 | self.inp = inp 30 | self.planes = int(self.inp * t) 31 | self.oup = oup 32 | self.stride = stride 33 | self.apply_residual = (self.stride == 1) and (self.inp == self.oup) 34 | self.rd_ratio = rd_ratio if se_ind or rd_ratio is None else (rd_ratio / t) 35 | self.has_attn = (self.rd_ratio is not None) and (self.rd_ratio > 0) and (self.rd_ratio <= 1) 36 | 37 | normalizer_fn = normalizer_fn or factory._NORMALIZER 38 | activation_fn = activation_fn or factory._ACTIVATION 39 | 40 | layers = [] 41 | if t != 1: 42 | layers.append(Conv2d1x1Block(inp, self.planes, normalizer_fn=normalizer_fn, activation_fn=activation_fn)) 43 | 44 | if dw_se_act is None: 45 | layers.append(DepthwiseBlock(self.planes, self.planes, kernel_size, stride=self.stride, 46 | padding=padding, dilation=dilation, normalizer_fn=normalizer_fn, activation_fn=activation_fn)) 47 | else: 48 | layers.append(DepthwiseConv2dBN(self.planes, self.planes, kernel_size, stride=self.stride, padding=padding, 49 | dilation=dilation, normalizer_fn=normalizer_fn)) 50 | 51 | if self.has_attn: 52 | layers.append(SEBlock(self.planes, rd_ratio=self.rd_ratio)) 53 | 54 | if dw_se_act: 55 | layers.append(dw_se_act()) 56 | 57 | layers.append(Conv2d1x1BN(self.planes, oup, normalizer_fn=normalizer_fn)) 58 | 59 | if self.apply_residual and survival_prob: 60 | layers.append(StochasticDepth(survival_prob)) 61 | 62 | self.branch1 = nn.Sequential(*layers) 63 | self.branch2 = nn.Identity() if self.apply_residual else None 64 | self.combine = Combine('ADD') if self.apply_residual else None 65 | 66 | def forward(self, x): 67 | if self.apply_residual: 68 | return self.combine([self.branch2(x), self.branch1(x)]) 69 | else: 70 | return self.branch1(x) 71 | 72 | 73 | class FusedInvertedResidualBlock(nn.Module): 74 | def __init__( 75 | self, 76 | inp, 77 | oup, 78 | t, 79 | kernel_size: int = 3, 80 | stride: int = 1, 81 | padding: int = None, 82 | rd_ratio: float = None, 83 | se_ind: bool = False, 84 | survival_prob: float = None, 85 | normalizer_fn: nn.Module = None, 86 | activation_fn: nn.Module = None 87 | ): 88 | super().__init__() 89 | 90 | self.inp = inp 91 | self.planes = int(self.inp * t) 92 | self.oup = oup 93 | self.stride = stride 94 | self.padding = padding if padding is not None else (kernel_size // 2) 95 | self.apply_residual = (self.stride == 1) and (self.inp == self.oup) 96 | self.rd_ratio = rd_ratio if se_ind or rd_ratio is None else (rd_ratio / t) 97 | self.has_attn = (self.rd_ratio is not None) and (self.rd_ratio > 0) and (self.rd_ratio <= 1) 98 | 99 | normalizer_fn = normalizer_fn or factory._NORMALIZER 100 | activation_fn = activation_fn or factory._ACTIVATION 101 | 102 | layers = [ 103 | Conv2dBlock(inp, self.planes, kernel_size, stride=self.stride, padding=self.padding, 104 | normalizer_fn=normalizer_fn, activation_fn=activation_fn) 105 | ] 106 | 107 | if self.has_attn: 108 | layers.append(SEBlock(self.planes, rd_ratio=self.rd_ratio)) 109 | 110 | layers.append(Conv2d1x1BN( 111 | self.planes, oup, normalizer_fn=normalizer_fn)) 112 | 113 | if self.apply_residual and survival_prob: 114 | layers.append(StochasticDepth(survival_prob)) 115 | 116 | self.branch1 = nn.Sequential(*layers) 117 | self.branch2 = nn.Identity() if self.apply_residual else None 118 | self.combine = Combine('ADD') if self.apply_residual else None 119 | 120 | def forward(self, x): 121 | if self.apply_residual: 122 | return self.combine([self.branch2(x), self.branch1(x)]) 123 | else: 124 | return self.branch1(x) 125 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/mlp.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from typing import OrderedDict 3 | 4 | 5 | class MlpBlock(nn.Sequential): 6 | def __init__( 7 | self, 8 | in_features, 9 | hidden_features=None, 10 | out_features=None, 11 | activation_fn: nn.Module = nn.GELU, 12 | dropout_rate: float = 0. 13 | ): 14 | hidden_features = hidden_features or in_features 15 | out_features = out_features or in_features 16 | 17 | super().__init__( 18 | nn.Linear(in_features, hidden_features), 19 | activation_fn(), 20 | nn.Dropout(dropout_rate), 21 | nn.Linear(hidden_features, out_features), 22 | nn.Dropout(dropout_rate) 23 | ) 24 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/non_local.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from .vanilla_conv2d import Conv2d1x1 4 | from ..functional import make_divisible 5 | 6 | 7 | class NonLocalBlock(nn.Module): 8 | r"""Non-Local Block for image classification 9 | Paper: Non-local Neural Networks, https://arxiv.org/abs/1711.07971 10 | Code: https://github.com/facebookresearch/video-nonlocal-net 11 | """ 12 | 13 | def __init__( 14 | self, 15 | in_channels, 16 | rd_ratio, 17 | rd_divisor: int = 8, 18 | use_scale: bool = True, 19 | use_norm: bool = True 20 | ): 21 | super().__init__() 22 | 23 | channels = make_divisible(in_channels * rd_ratio, rd_divisor) 24 | 25 | self.ratio = rd_ratio 26 | self.scale = channels ** -0.5 if use_scale else 1.0 27 | self.use_scale = use_scale 28 | 29 | # theta, phi, g 30 | self.W = Conv2d1x1(in_channels, channels * 3, bias=True) 31 | 32 | # z 33 | self.Z = nn.Sequential( 34 | Conv2d1x1(channels, in_channels, bias=not use_norm), 35 | nn.BatchNorm2d(in_channels) if use_norm else nn.Identity() 36 | ) 37 | 38 | self.reset_parameters() 39 | 40 | def reset_parameters(self): 41 | for name, m in self.named_modules(): 42 | if isinstance(m, nn.Conv2d): 43 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 44 | if len(list(m.parameters())) > 1: 45 | nn.init.constant_(m.bias, 0.0) 46 | elif isinstance(m, nn.BatchNorm2d): 47 | nn.init.constant_(m.weight, 0.0) 48 | nn.init.constant_(m.bias, 0.0) 49 | elif isinstance(m, nn.GroupNorm): 50 | nn.init.constant_(m.weight, 0.0) 51 | nn.init.constant_(m.bias, 0.0) 52 | 53 | def forward(self, x: torch.Tensor) -> torch.Tensor: 54 | N, _, H, W = x.size() 55 | 56 | # self-attention: y = softmax((Q(x) @ K(x)) / N) @ V(x). @{ 57 | t, p, g = torch.chunk(torch.flatten(self.W(x), start_dim=2), 3, dim=1) # Q, K, V 58 | 59 | s = torch.einsum('ncq, nck -> nqk', t, p) 60 | s = torch.softmax(s * self.scale, dim=2) 61 | s = torch.einsum('nqv, ncv -> ncq', s, g) 62 | # @} 63 | 64 | z = self.Z(s.contiguous().view(N, -1, H, W)) 65 | 66 | return z + x 67 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/norm.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class LayerNorm2d(nn.LayerNorm): 6 | """ LayerNorm for channels of '2D' spatial BCHW tensors """ 7 | 8 | def forward(self, x): 9 | x = x.permute(0, 2, 3, 1) 10 | x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) 11 | x = x.permute(0, 3, 1, 2) 12 | return x 13 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/selective_kernel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from .vanilla_conv2d import Conv2d1x1, Conv2d1x1Block 4 | from .depthwise_separable_conv2d import DepthwiseBlock 5 | from .channel import Combine 6 | from ..functional import make_divisible 7 | 8 | 9 | class SelectiveKernelBlock(nn.Module): 10 | r""" 11 | Paper: Selective Kernel Networks, https://arxiv.org/abs/1903.06586 12 | """ 13 | 14 | def __init__( 15 | self, 16 | in_channels, 17 | rd_ratio: float = 1/8, 18 | rd_divisor: int = 8, 19 | ) -> None: 20 | super().__init__() 21 | 22 | self.in_channels = in_channels 23 | 24 | rd_channels = max(make_divisible(in_channels * rd_ratio, rd_divisor), 32) 25 | 26 | self.conv3x3 = DepthwiseBlock(in_channels, in_channels, kernel_size=3, dilation=1) 27 | self.conv5x5 = DepthwiseBlock(in_channels, in_channels, kernel_size=3, dilation=2) 28 | 29 | self.fuse = Combine('ADD') 30 | 31 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 32 | 33 | self.reduce = Conv2d1x1Block(in_channels, rd_channels) 34 | 35 | self.qk = Conv2d1x1(rd_channels, in_channels * 2, bias=True) 36 | self.softmax = nn.Softmax(dim=1) 37 | 38 | def forward(self, x): 39 | u3 = self.conv3x3(x) 40 | u5 = self.conv5x5(x) 41 | 42 | u = self.fuse([u3, u5]) 43 | 44 | s = self.pool(u) 45 | 46 | z = self.reduce(s) 47 | 48 | ab = self.softmax(self.qk(z).view(-1, 2, self.in_channels, 1, 1)) 49 | 50 | v = torch.sum(torch.stack([u3, u5], dim=1) * ab, dim=1) 51 | 52 | return v 53 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/squeeze_excite.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from contextlib import contextmanager 3 | from torch import nn 4 | from . import factory 5 | from .vanilla_conv2d import Conv2d1x1 6 | from ..functional import make_divisible 7 | from typing import OrderedDict 8 | 9 | _SE_INNER_NONLINEAR: nn.Module = partial(nn.ReLU, inplace=True) 10 | _SE_GATING_FN: nn.Module = nn.Sigmoid 11 | _SE_DIVISOR: int = 8 12 | _SE_USE_NORM: bool = False 13 | 14 | 15 | @contextmanager 16 | def se( 17 | inner_nonlinear: nn.Module = _SE_INNER_NONLINEAR, 18 | gating_fn: nn.Module = _SE_GATING_FN, 19 | divisor: int = _SE_DIVISOR, 20 | use_norm: bool = _SE_USE_NORM 21 | ): 22 | global _SE_INNER_NONLINEAR 23 | global _SE_GATING_FN 24 | global _SE_DIVISOR 25 | global _SE_USE_NORM 26 | 27 | _pre_inner_fn = _SE_INNER_NONLINEAR 28 | _pre_fn = _SE_GATING_FN 29 | _pre_divisor = _SE_DIVISOR 30 | _pre_use_norm = _SE_USE_NORM 31 | _SE_INNER_NONLINEAR = inner_nonlinear 32 | _SE_GATING_FN = gating_fn 33 | _SE_DIVISOR = divisor 34 | _SE_USE_NORM = use_norm 35 | yield 36 | _SE_INNER_NONLINEAR = _pre_inner_fn 37 | _SE_GATING_FN = _pre_fn 38 | _SE_DIVISOR = _pre_divisor 39 | _SE_USE_NORM = _pre_use_norm 40 | 41 | 42 | class SEBlock(nn.Sequential): 43 | """Squeeze-and-Excitation Block 44 | """ 45 | 46 | def __init__( 47 | self, 48 | channels, 49 | rd_ratio, 50 | inner_activation_fn: nn.Module = None, 51 | gating_fn: nn.Module = None 52 | ): 53 | squeezed_channels = make_divisible(int(channels * rd_ratio), _SE_DIVISOR) 54 | inner_activation_fn = inner_activation_fn or _SE_INNER_NONLINEAR 55 | gating_fn = gating_fn or _SE_GATING_FN 56 | 57 | layers = OrderedDict([]) 58 | 59 | layers['pool'] = nn.AdaptiveAvgPool2d((1, 1)) 60 | layers['reduce'] = Conv2d1x1(channels, squeezed_channels, bias=True) 61 | if _SE_USE_NORM: 62 | layers['norm'] = factory.normalizer_fn(squeezed_channels) 63 | layers['act'] = inner_activation_fn() 64 | layers['expand'] = Conv2d1x1(squeezed_channels, channels, bias=True) 65 | layers['gate'] = gating_fn() 66 | 67 | super().__init__(layers) 68 | 69 | def _forward(self, input): 70 | for module in self: 71 | input = module(input) 72 | return input 73 | 74 | def forward(self, x): 75 | return x * self._forward(x) 76 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/stage.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from typing import Union, List 3 | 4 | 5 | class Stage(nn.Sequential): 6 | def __init__(self, *args): 7 | if len(args) == 1 and isinstance(args[0], list): 8 | args = args[0] 9 | super().__init__(*args) 10 | 11 | def append(self, m: Union[nn.Module, List[nn.Module]]): 12 | if isinstance(m, nn.Module): 13 | self.add_module(str(len(self)), m) 14 | elif isinstance(m, list): 15 | [self.append(i) for i in m] 16 | else: 17 | ValueError('') 18 | -------------------------------------------------------------------------------- /cvm/models/ops/blocks/vanilla_conv2d.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from . import factory 3 | 4 | 5 | class Conv2d3x3(nn.Conv2d): 6 | def __init__( 7 | self, 8 | in_channels: int, 9 | out_channels: int, 10 | stride: int = 1, 11 | padding: int = None, 12 | dilation: int = 1, 13 | bias: bool = False, 14 | groups: int = 1 15 | ): 16 | padding = padding if padding is not None else dilation 17 | super().__init__( 18 | in_channels, out_channels, 3, stride=stride, 19 | padding=padding, dilation=dilation, bias=bias, groups=groups 20 | ) 21 | 22 | 23 | class Conv2d1x1(nn.Conv2d): 24 | def __init__( 25 | self, 26 | in_channels: int, 27 | out_channels: int, 28 | stride: int = 1, 29 | padding: int = 0, 30 | bias: bool = False, 31 | groups: int = 1 32 | ): 33 | super().__init__( 34 | in_channels, out_channels, 1, stride=stride, 35 | padding=padding, bias=bias, groups=groups 36 | ) 37 | 38 | 39 | class Conv2d3x3BN(nn.Sequential): 40 | def __init__( 41 | self, 42 | in_channels: int, 43 | out_channels: int, 44 | stride: int = 1, 45 | padding: int = None, 46 | dilation: int = 1, 47 | bias: bool = False, 48 | groups: int = 1, 49 | normalizer_fn: nn.Module = None 50 | ): 51 | normalizer_fn = normalizer_fn or factory._NORMALIZER 52 | padding = padding if padding is not None else dilation 53 | 54 | super().__init__( 55 | Conv2d3x3(in_channels, out_channels, stride=stride, 56 | padding=padding, dilation=dilation, bias=bias, groups=groups) 57 | ) 58 | if normalizer_fn: 59 | self.add_module(str(self.__len__()), normalizer_fn(out_channels)) 60 | 61 | 62 | class Conv2d1x1BN(nn.Sequential): 63 | def __init__( 64 | self, 65 | in_channels: int, 66 | out_channels: int, 67 | stride: int = 1, 68 | padding: int = 0, 69 | bias: bool = False, 70 | groups: int = 1, 71 | normalizer_fn: nn.Module = None 72 | ): 73 | normalizer_fn = normalizer_fn or factory._NORMALIZER 74 | 75 | super().__init__( 76 | Conv2d1x1(in_channels, out_channels, stride=stride, 77 | padding=padding, bias=bias, groups=groups) 78 | ) 79 | if normalizer_fn: 80 | self.add_module(str(self.__len__()), normalizer_fn(out_channels)) 81 | 82 | 83 | class Conv2d1x1Block(nn.Sequential): 84 | def __init__( 85 | self, 86 | in_channels: int, 87 | out_channels: int, 88 | stride: int = 1, 89 | padding: int = 0, 90 | bias: bool = False, 91 | groups: int = 1, 92 | normalizer_fn: nn.Module = None, 93 | activation_fn: nn.Module = None, 94 | norm_position: str = None 95 | ): 96 | super().__init__( 97 | Conv2d1x1(in_channels, out_channels, stride=stride, 98 | padding=padding, bias=bias, groups=groups), 99 | *factory.norm_activation(out_channels, normalizer_fn, activation_fn, norm_position) 100 | ) 101 | 102 | 103 | class Conv2dBlock(nn.Sequential): 104 | def __init__( 105 | self, 106 | in_channels, 107 | out_channels, 108 | kernel_size: int = 3, 109 | stride: int = 1, 110 | padding: int = None, 111 | dilation: int = 1, 112 | bias: bool = False, 113 | groups: int = 1, 114 | normalizer_fn: nn.Module = None, 115 | activation_fn: nn.Module = None, 116 | norm_position: str = None, 117 | ): 118 | if padding is None: 119 | padding = ((kernel_size - 1) * (dilation - 1) + kernel_size) // 2 120 | 121 | super().__init__( 122 | nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, 123 | bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups), 124 | *factory.norm_activation(out_channels, normalizer_fn, activation_fn, norm_position) 125 | ) 126 | -------------------------------------------------------------------------------- /cvm/models/ops/functional.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import fft 3 | 4 | __all__ = ['channel_shuffle', 'make_divisible', 5 | 'get_gaussian_kernel1d', 'get_gaussian_kernel2d', 6 | 'get_gaussian_bandpass_kernel2d', 'get_gaussian_kernels2d', 7 | 'get_distance_grid', 'spectral_filter'] 8 | 9 | 10 | def channel_shuffle(x, groups): 11 | batchsize, num_channels, height, width = x.data.size() 12 | channels_per_group = num_channels // groups 13 | 14 | # reshape 15 | x = x.view(batchsize, groups, 16 | channels_per_group, height, width) 17 | x = torch.transpose(x, 1, 2).contiguous() 18 | 19 | # flatten 20 | x = x.view(batchsize, -1, height, width) 21 | return x 22 | 23 | 24 | def make_divisible(value, divisor, min_value=None): 25 | if min_value is None: 26 | min_value = divisor 27 | 28 | new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) 29 | 30 | # Make sure that round down does not go down by more than 10%. 31 | if new_value < 0.9 * value: 32 | new_value += divisor 33 | 34 | return new_value 35 | 36 | 37 | def get_gaussian_kernel1d(kernel_size, sigma: float, normalize: bool = True): 38 | ksize_half = (kernel_size - 1) * 0.5 39 | 40 | x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) 41 | pdf = torch.exp(-0.5 * (x / sigma).pow(2)) 42 | return pdf / pdf.sum() if normalize else pdf 43 | 44 | 45 | def get_gaussian_kernel2d(kernel_size, sigma: float, normalize: bool = True): 46 | ksize_half = (kernel_size - 1) * 0.5 47 | 48 | xs = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) 49 | ys = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) 50 | 51 | x, y = torch.meshgrid(xs, ys, indexing='xy') 52 | 53 | pdf = torch.exp(-0.5 * ((x * x + y * y) / (sigma * sigma))) 54 | 55 | return pdf / pdf.sum() if normalize else pdf 56 | 57 | 58 | def get_gaussian_bandpass_kernel2d(kernel_size, sigma: float, W: float): 59 | ksize_half = (kernel_size - 1) * 0.5 60 | 61 | xs = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) 62 | ys = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) 63 | 64 | x, y = torch.meshgrid(xs, ys, indexing='xy') 65 | 66 | d2 = x * x + y * y 67 | d = torch.sqrt(d2) 68 | 69 | return torch.exp(-((d2 - sigma * sigma) / (d * W)).pow(2)) 70 | 71 | 72 | def get_gaussian_kernels2d(kernel_size, sigma: torch.Tensor, normalize: bool = True): 73 | ksize_half = (kernel_size - 1) * 0.5 74 | 75 | xs = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) 76 | ys = torch.linspace(-ksize_half, ksize_half, steps=kernel_size) 77 | 78 | x, y = torch.meshgrid(xs, ys, indexing='xy') 79 | 80 | pdf = torch.exp(-0.5 * ((x * x + y * y).repeat(sigma.shape) / torch.pow(sigma, 2))) 81 | 82 | return pdf / pdf.sum([-2, -1], keepdim=True) if normalize else pdf 83 | 84 | 85 | def get_distance_grid(size): 86 | size_half = (size - 1) * 0.5 87 | 88 | xs = torch.linspace(-size_half, size_half, steps=size) 89 | ys = torch.linspace(-size_half, size_half, steps=size) 90 | 91 | x, y = torch.meshgrid(xs, ys, indexing='xy') 92 | 93 | return torch.sqrt(x * x + y * y) 94 | 95 | 96 | def spectral_filter(x, callback): 97 | fre_x = fft.fftshift(fft.fft2(x)) 98 | 99 | fre_x = callback(fre_x) 100 | 101 | return fft.ifft2(fft.ifftshift(fre_x)).real 102 | -------------------------------------------------------------------------------- /cvm/models/resmlp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, config, load_from_local_or_url 6 | from typing import Any 7 | 8 | 9 | class Affine(nn.Module): 10 | def __init__(self, dim): 11 | super().__init__() 12 | 13 | self.alpha = nn.Parameter(torch.ones(1, 1, dim)) 14 | self.beta = nn.Parameter(torch.zeros(1, 1, dim)) 15 | 16 | def forward(self, x): 17 | return self.alpha * x + self.beta 18 | 19 | 20 | class ResMlpBlock(nn.Module): 21 | def __init__( 22 | self, 23 | hidden_dim, 24 | sequence_len, 25 | layerscale_init: float = 1e-4, 26 | dropout_rate: float = 0., 27 | drop_path_rate: float = 0. 28 | ): 29 | super().__init__() 30 | 31 | self.affine_1 = Affine(hidden_dim) 32 | self.linear_patches = nn.Linear(sequence_len, sequence_len) 33 | self.layerscale_1 = nn.Parameter(layerscale_init * torch.ones(hidden_dim)) 34 | self.drop1 = blocks.StochasticDepth(1.0 - drop_path_rate) 35 | 36 | self.affine_2 = Affine(hidden_dim) 37 | self.mlp_channels = blocks.MlpBlock(hidden_dim, hidden_dim * 4, dropout_rate=dropout_rate) 38 | self.layerscale_2 = nn.Parameter(layerscale_init * torch.ones(hidden_dim)) 39 | self.drop2 = blocks.StochasticDepth(1.0 - drop_path_rate) 40 | 41 | def forward(self, x): 42 | x = x + self.drop1(self.layerscale_1 * self.linear_patches(self.affine_1(x).transpose(1, 2)).transpose(1, 2)) 43 | x = x + self.drop2(self.layerscale_2 * self.mlp_channels(self.affine_2(x))) 44 | return x 45 | 46 | 47 | @export 48 | class ResMLP(nn.Module): 49 | def __init__( 50 | self, 51 | image_size: int = 224, 52 | in_channels: int = 3, 53 | num_classes: int = 1000, 54 | patch_size: int = 32, 55 | hidden_dim: int = 768, 56 | depth: int = 12, 57 | dropout_rate: float = 0., 58 | drop_path_rate: float = 0., 59 | **kwargs: Any 60 | ): 61 | super().__init__() 62 | 63 | num_patches = (image_size // patch_size) ** 2 64 | 65 | self.stem = nn.Conv2d(in_channels, hidden_dim, 66 | kernel_size=patch_size, stride=patch_size) 67 | 68 | self.blocks = nn.Sequential( 69 | *[ResMlpBlock( 70 | hidden_dim, 71 | num_patches, 72 | dropout_rate=dropout_rate, 73 | drop_path_rate=drop_path_rate 74 | ) for _ in range(depth)] 75 | ) 76 | 77 | self.affine = Affine(hidden_dim) 78 | self.classifier = nn.Linear(hidden_dim, num_classes) 79 | 80 | def forward(self, x): 81 | x = self.stem(x) 82 | x = x.flatten(2).transpose(1, 2) 83 | x = self.blocks(x) 84 | x = self.affine(x) 85 | x = x.mean(dim=1) 86 | x = self.classifier(x) 87 | return x 88 | 89 | 90 | def _resmlp( 91 | image_size: int = 224, 92 | patch_size: int = 16, 93 | hidden_dim: int = 768, 94 | depth: int = 12, 95 | pretrained: bool = False, 96 | pth: str = None, 97 | progress: bool = True, 98 | **kwargs: Any 99 | ): 100 | model = ResMLP(image_size, patch_size=patch_size, 101 | hidden_dim=hidden_dim, depth=depth, **kwargs) 102 | 103 | if pretrained: 104 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 105 | return model 106 | 107 | 108 | @export 109 | def resmlp_s12_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 110 | return _resmlp(224, 16, 384, 12, pretrained, pth, progress, **kwargs) 111 | 112 | 113 | @export 114 | def resmlp_s24_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 115 | return _resmlp(224, 16, 384, 24, pretrained, pth, progress, **kwargs) 116 | 117 | 118 | @export 119 | def resmlp_b24_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 120 | return _resmlp(224, 16, 768, 24, pretrained, pth, progress, **kwargs) 121 | -------------------------------------------------------------------------------- /cvm/models/rexnet.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import torch 3 | import torch.nn as nn 4 | 5 | from .ops import blocks 6 | from .utils import export, config, load_from_local_or_url 7 | from typing import Any 8 | 9 | 10 | class InvertedResidualBlock(blocks.InvertedResidualBlock): 11 | def __init__( 12 | self, 13 | inp, 14 | oup, 15 | t, kernel_size: int = 3, 16 | stride: int = 1, 17 | padding: int = 1, 18 | rd_ratio: float = None, 19 | se_ind: bool = True, 20 | dw_se_act: nn.Module = nn.ReLU6 21 | ): 22 | super().__init__(inp, oup, t, kernel_size=kernel_size, stride=stride, 23 | padding=padding, rd_ratio=rd_ratio, se_ind=se_ind, dw_se_act=dw_se_act) 24 | 25 | self.apply_residual = (stride == 1) and (inp <= oup) 26 | self.branch2 = nn.Identity() if self.apply_residual else None 27 | self.combine = blocks.Combine('ADD') if self.apply_residual else None 28 | 29 | def forward(self, x): 30 | out = self.branch1(x) 31 | if self.apply_residual: 32 | out[:, 0:self.inp] += self.branch2(x) 33 | return out 34 | 35 | 36 | @export 37 | class ReXNet(nn.Module): 38 | 39 | @blocks.activation(partial(nn.SiLU, inplace=True)) 40 | @blocks.se(divisor=1, use_norm=True) 41 | def __init__( 42 | self, 43 | in_channels: int = 3, 44 | num_classes: int = 1000, 45 | width_multiplier: float = 1.0, 46 | dropout_rate: float = 0.2, 47 | thumbnail: bool = False, 48 | **kwargs: Any 49 | ): 50 | super().__init__() 51 | 52 | FRONT_S = 1 if thumbnail else 2 53 | 54 | n = [2, 2, 3, 3, 5] # repeats 55 | s = [FRONT_S, 2, 2, 1, 2] 56 | ratios = [0, 1/12, 1/12, 1/12, 1/12] 57 | 58 | self.depth = (sum(n[:]) + 1) * 3 59 | increase = 180 / (self.depth // 3 * 1.0) 60 | 61 | def multiplier(x): return int(round(x * width_multiplier)) 62 | 63 | features = [ 64 | blocks.Conv2dBlock(in_channels, multiplier(32), 3, FRONT_S), 65 | InvertedResidualBlock(multiplier(32), multiplier(16), 1) 66 | ] 67 | 68 | inplanes, planes = 16, 16 + increase 69 | for i, layers in enumerate(n): 70 | features.append(InvertedResidualBlock(multiplier(inplanes), 71 | multiplier(planes), 6, stride=s[i], rd_ratio=ratios[i])) 72 | inplanes, planes = planes, planes + increase 73 | for _ in range(layers - 1): 74 | features.append(InvertedResidualBlock(multiplier(inplanes), multiplier(planes), 6, rd_ratio=ratios[i])) 75 | inplanes, planes = planes, planes + increase 76 | 77 | features.append(blocks.Conv2d1x1Block(multiplier(inplanes), multiplier(1280))) 78 | 79 | self.features = nn.Sequential(*features) 80 | 81 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 82 | self.classifier = nn.Sequential( 83 | nn.Dropout(dropout_rate), 84 | nn.Linear(multiplier(1280), num_classes) 85 | ) 86 | 87 | def forward(self, x): 88 | x = self.features(x) 89 | x = self.pool(x) 90 | x = torch.flatten(x, 1) 91 | x = self.classifier(x) 92 | return x 93 | 94 | 95 | def _rexnet( 96 | width_multiplier: float = 1.0, 97 | pretrained: bool = False, 98 | pth: str = None, 99 | progress: bool = True, 100 | **kwargs: Any 101 | ): 102 | model = ReXNet(width_multiplier=width_multiplier, **kwargs) 103 | 104 | if pretrained: 105 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 106 | return model 107 | 108 | 109 | @export 110 | def rexnet_x0_9(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 111 | return _rexnet(0.9, pretrained, pth, progress, **kwargs) 112 | 113 | 114 | @export 115 | def rexnet_x1_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 116 | return _rexnet(1.0, pretrained, pth, progress, **kwargs) 117 | 118 | 119 | @export 120 | def rexnet_x1_3(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 121 | return _rexnet(1.3, pretrained, pth, progress, **kwargs) 122 | 123 | 124 | @export 125 | def rexnet_x1_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 126 | return _rexnet(1.5, pretrained, pth, progress, **kwargs) 127 | 128 | 129 | @export 130 | def rexnet_x2_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 131 | return _rexnet(2.0, pretrained, pth, progress, **kwargs) 132 | 133 | 134 | class PlainBlock(nn.Sequential): 135 | def __init__(self, inplanes, planes, stride: int = 1): 136 | super().__init__( 137 | blocks.DepthwiseConv2d(inplanes, inplanes, stride=stride), 138 | nn.BatchNorm2d(inplanes), 139 | nn.ReLU(inplace=True), 140 | blocks.PointwiseBlock(inplanes, planes), 141 | nn.BatchNorm2d(planes), 142 | nn.SiLU(inplace=True) 143 | ) 144 | 145 | 146 | @export 147 | class ReXNetPlain(nn.Module): 148 | def __init__( 149 | self, 150 | in_channels: int = 3, 151 | num_classes: int = 1000, 152 | dropout_rate: float = 0.2, 153 | thumbnail: bool = False, 154 | **kwargs: Any 155 | ): 156 | super().__init__() 157 | 158 | FRONT_S = 1 if thumbnail else 2 159 | 160 | self.features = nn.Sequential( 161 | blocks.Conv2dBlock(in_channels, 32, stride=FRONT_S, activation_fn=partial(nn.SiLU, inplace=True)), 162 | PlainBlock(32, 96, stride=FRONT_S), 163 | PlainBlock(96, 144), 164 | PlainBlock(144, 192, stride=2), 165 | PlainBlock(192, 240), 166 | PlainBlock(240, 288, stride=2), 167 | PlainBlock(288, 336), 168 | PlainBlock(336, 384), 169 | PlainBlock(384, 432), 170 | PlainBlock(432, 480), 171 | PlainBlock(480, 528), 172 | PlainBlock(528, 576, stride=2), 173 | PlainBlock(576, 624), 174 | PlainBlock(624, 1024), 175 | blocks.Conv2d1x1Block(1024, 1280) 176 | ) 177 | 178 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 179 | self.classifier = nn.Sequential( 180 | nn.Dropout(dropout_rate), 181 | nn.Linear(1280, num_classes) 182 | ) 183 | 184 | def forward(self, x): 185 | x = self.features(x) 186 | x = self.pool(x) 187 | x = torch.flatten(x, 1) 188 | x = self.classifier(x) 189 | return x 190 | 191 | 192 | @export 193 | def rexnet_plain(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 194 | model = ReXNetPlain(**kwargs) 195 | 196 | if pretrained: 197 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 198 | return model 199 | -------------------------------------------------------------------------------- /cvm/models/seg/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn import * 2 | from .deeplabv3 import * 3 | from .unet import * 4 | from .deeplabv3_plus import * -------------------------------------------------------------------------------- /cvm/models/seg/deeplabv3.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import torch.nn as nn 3 | from cvm import models 4 | from ..ops import blocks 5 | from ..utils import export, get_out_channels, load_from_local_or_url 6 | from .heads import FCNHead, ClsHead 7 | from .segmentation_model import SegmentationModel 8 | 9 | 10 | class DeepLabHead(nn.Sequential): 11 | def __init__( 12 | self, 13 | in_channels: int, 14 | out_channels: int = 256, 15 | num_classes: int = 32, 16 | ): 17 | super().__init__( 18 | blocks.ASPP(in_channels, out_channels, [12, 24, 36]), 19 | blocks.Conv2dBlock(out_channels, out_channels), 20 | blocks.Conv2d1x1(out_channels, num_classes) 21 | ) 22 | 23 | 24 | @export 25 | class DeepLabV3(SegmentationModel): 26 | ... 27 | 28 | 29 | @export 30 | def create_deeplabv3( 31 | backbone: str = 'resnet50_v1', 32 | num_classes: int = 21, 33 | aux_loss: bool = False, 34 | cls_loss: bool = False, 35 | dropout_rate: float = 0.1, 36 | pretrained_backbone: bool = False, 37 | pretrained: bool = False, 38 | pth: str = None, 39 | progress: bool = True, 40 | **kwargs: Any 41 | ): 42 | if pretrained: 43 | pretrained_backbone = False 44 | 45 | backbone = models.__dict__[backbone]( 46 | pretrained=pretrained_backbone, 47 | dilations=[1, 1, 2, 4], 48 | **kwargs 49 | ).features 50 | 51 | aux_head = FCNHead(get_out_channels(backbone.stage3), None, num_classes, dropout_rate) if aux_loss else None 52 | cls_head = ClsHead(get_out_channels(backbone.stage4), num_classes) if cls_loss else None 53 | decode_head = DeepLabHead(get_out_channels(backbone.stage4), num_classes=num_classes) 54 | 55 | model = DeepLabV3(backbone, [3, 4] if aux_loss else [4], decode_head, aux_head, cls_head) 56 | 57 | if pretrained: 58 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 59 | return model 60 | 61 | 62 | @export 63 | def deeplabv3_resnet50_v1(*args, **kwargs: Any): 64 | return create_deeplabv3('resnet50_v1', *args, **kwargs) 65 | 66 | 67 | @export 68 | def deeplabv3_mobilenet_v3_small(*args, **kwargs: Any): 69 | return create_deeplabv3('mobilenet_v3_small', *args, **kwargs) 70 | 71 | 72 | @export 73 | def deeplabv3_mobilenet_v3_large(*args, **kwargs: Any): 74 | return create_deeplabv3('mobilenet_v3_large', *args, **kwargs) 75 | 76 | 77 | @export 78 | def deeplabv3_regnet_x_400mf(*args, **kwargs: Any): 79 | return create_deeplabv3('regnet_x_400mf', *args, **kwargs) 80 | 81 | 82 | @export 83 | def deeplabv3_mobilenet_v1_x1_0(*args, **kwargs: Any): 84 | return create_deeplabv3('mobilenet_v1_x1_0', *args, **kwargs) 85 | 86 | 87 | @export 88 | def deeplabv3_sd_mobilenet_v1_x1_0(*args, **kwargs: Any): 89 | return create_deeplabv3('sd_mobilenet_v1_x1_0', *args, **kwargs) 90 | 91 | 92 | @export 93 | def deeplabv3_mobilenet_v2_x1_0(*args, **kwargs: Any): 94 | return create_deeplabv3('mobilenet_v2_x1_0', *args, **kwargs) 95 | 96 | 97 | @export 98 | def deeplabv3_sd_mobilenet_v2_x1_0(*args, **kwargs: Any): 99 | return create_deeplabv3('sd_mobilenet_v2_x1_0', *args, **kwargs) 100 | 101 | 102 | @export 103 | def deeplabv3_shufflenet_v2_x2_0(*args, **kwargs: Any): 104 | return create_deeplabv3('shufflenet_v2_x2_0', *args, **kwargs) 105 | 106 | 107 | @export 108 | def deeplabv3_sd_shufflenet_v2_x2_0(*args, **kwargs: Any): 109 | return create_deeplabv3('sd_shufflenet_v2_x2_0', *args, **kwargs) 110 | 111 | 112 | @export 113 | def deeplabv3_efficientnet_b0(*args, **kwargs: Any): 114 | return create_deeplabv3('efficientnet_b0', *args, **kwargs) 115 | 116 | 117 | @export 118 | def deeplabv3_sd_efficientnet_b0(*args, **kwargs: Any): 119 | return create_deeplabv3('sd_efficientnet_b0', *args, **kwargs) 120 | -------------------------------------------------------------------------------- /cvm/models/seg/deeplabv3_plus.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | import torch 3 | import torch.nn as nn 4 | from cvm import models 5 | from ..ops import blocks 6 | from ..utils import export, get_out_channels, load_from_local_or_url 7 | from torch.nn import functional as F 8 | from .heads import FCNHead, ClsHead 9 | from .segmentation_model import SegmentationModel 10 | 11 | 12 | class DeepLabPlusHead(nn.Module): 13 | def __init__( 14 | self, 15 | aspp_in_channels: int, 16 | feautes_channels: int, 17 | out_channels: int = 256, 18 | num_classes: int = 32, 19 | ): 20 | super().__init__() 21 | 22 | self.aspp = blocks.ASPP(aspp_in_channels, out_channels, [12, 24, 36]) 23 | self.cat = blocks.Combine('CONCAT') 24 | 25 | self.conv3x3 = blocks.Conv2d3x3(out_channels + feautes_channels, num_classes) 26 | 27 | def forward(self, x, low_level_feautes): 28 | size = low_level_feautes.shape[-2:] 29 | aspp_features = self.aspp(x) 30 | aspp_features = F.interpolate(aspp_features, size=size, mode="bilinear", align_corners=False) 31 | features = self.cat([aspp_features, low_level_feautes]) 32 | features = self.conv3x3(features) 33 | 34 | return features 35 | 36 | 37 | @export 38 | class DeepLabV3Plus(SegmentationModel): 39 | def forward(self, x): 40 | size = x.shape[-2:] 41 | 42 | stages = self.backbone(x) 43 | 44 | out = self.decode_head(stages[f'stage{self.out_stages[-1]}'], stages[f'stage{self.out_stages[0]}'], ) 45 | out = self.interpolate(out, size=size) 46 | 47 | res = {'out': out} 48 | 49 | if self.aux_head: 50 | aux = self.aux_head(stages[f'stage{self.out_stages[-2]}']) 51 | aux = self.interpolate(aux, size=size) 52 | res['aux'] = aux 53 | 54 | if self.cls_head: 55 | cls = self.cls_head(stages[f'stage{self.out_stages[-1]}']) 56 | cls = cls.reshape(cls.shape[0], cls.shape[1], 1, 1) 57 | res['out'] = out * torch.sigmoid(cls) 58 | 59 | return res 60 | 61 | 62 | @export 63 | def create_deeplabv3_plus( 64 | backbone: str = 'resnet50_v1', 65 | num_classes: int = 21, 66 | aux_loss: bool = False, 67 | cls_loss: bool = False, 68 | dropout_rate: float = 0.1, 69 | pretrained_backbone: bool = False, 70 | pretrained: bool = False, 71 | pth: str = None, 72 | progress: bool = True, 73 | **kwargs: Any 74 | ): 75 | if pretrained: 76 | pretrained_backbone = False 77 | 78 | backbone = models.__dict__[backbone]( 79 | pretrained=pretrained_backbone, 80 | dilations=[1, 1, 2, 4], 81 | **kwargs 82 | ).features 83 | 84 | aux_head = FCNHead(get_out_channels(backbone.stage3), None, num_classes, dropout_rate) if aux_loss else None 85 | cls_head = ClsHead(get_out_channels(backbone.stage4), num_classes) if cls_loss else None 86 | decode_head = DeepLabPlusHead(get_out_channels(backbone.stage4), 87 | get_out_channels(backbone.stage2), num_classes=num_classes) 88 | 89 | model = DeepLabV3Plus(backbone, [2, 3, 4] if aux_loss else [2, 4], decode_head, aux_head, cls_head) 90 | 91 | if pretrained: 92 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 93 | return model 94 | 95 | 96 | @export 97 | def deeplabv3_plus_resnet50_v1(*args, **kwargs: Any): 98 | return create_deeplabv3_plus('resnet50_v1', *args, **kwargs) 99 | 100 | 101 | @export 102 | def deeplabv3_plus_mobilenet_v3_small(*args, **kwargs: Any): 103 | return create_deeplabv3_plus('mobilenet_v3_small', *args, **kwargs) 104 | 105 | 106 | @export 107 | def deeplabv3_plus_mobilenet_v3_large(*args, **kwargs: Any): 108 | return create_deeplabv3_plus('mobilenet_v3_large', *args, **kwargs) 109 | 110 | 111 | @export 112 | def deeplabv3_plus_regnet_x_400mf(*args, **kwargs: Any): 113 | return create_deeplabv3_plus('regnet_x_400mf', *args, **kwargs) 114 | 115 | 116 | @export 117 | def deeplabv3_plus_mobilenet_v1_x1_0(*args, **kwargs: Any): 118 | return create_deeplabv3_plus('mobilenet_v1_x1_0', *args, **kwargs) 119 | 120 | 121 | @export 122 | def deeplabv3_plus_sd_mobilenet_v1_x1_0(*args, **kwargs: Any): 123 | return create_deeplabv3_plus('sd_mobilenet_v1_x1_0', *args, **kwargs) 124 | 125 | 126 | @export 127 | def deeplabv3_plus_mobilenet_v2_x1_0(*args, **kwargs: Any): 128 | return create_deeplabv3_plus('mobilenet_v2_x1_0', *args, **kwargs) 129 | 130 | 131 | @export 132 | def deeplabv3_plus_sd_mobilenet_v2_x1_0(*args, **kwargs: Any): 133 | return create_deeplabv3_plus('sd_mobilenet_v2_x1_0', *args, **kwargs) 134 | 135 | 136 | @export 137 | def deeplabv3_plus_shufflenet_v2_x2_0(*args, **kwargs: Any): 138 | return create_deeplabv3_plus('shufflenet_v2_x2_0', *args, **kwargs) 139 | 140 | 141 | @export 142 | def deeplabv3_plus_sd_shufflenet_v2_x2_0(*args, **kwargs: Any): 143 | return create_deeplabv3_plus('sd_shufflenet_v2_x2_0', *args, **kwargs) 144 | 145 | 146 | @export 147 | def deeplabv3_plus_efficientnet_b0(*args, **kwargs: Any): 148 | return create_deeplabv3_plus('efficientnet_b0', *args, **kwargs) 149 | 150 | 151 | @export 152 | def deeplabv3_plus_sd_efficientnet_b0(*args, **kwargs: Any): 153 | return create_deeplabv3_plus('sd_efficientnet_b0', *args, **kwargs) 154 | -------------------------------------------------------------------------------- /cvm/models/seg/fcn.py: -------------------------------------------------------------------------------- 1 | from cvm import models 2 | from ..utils import export, get_out_channels, load_from_local_or_url 3 | from .heads import ClsHead, FCNHead 4 | from typing import Any 5 | from .segmentation_model import SegmentationModel 6 | 7 | 8 | @export 9 | class FCN(SegmentationModel): 10 | ... 11 | 12 | 13 | @export 14 | def create_fcn( 15 | backbone: str = 'resnet50_v1', 16 | num_classes: int = 21, 17 | aux_loss: bool = False, 18 | cls_loss: bool = False, 19 | dropout_rate: float = 0.1, 20 | pretrained_backbone: bool = False, 21 | pretrained: bool = False, 22 | pth: str = None, 23 | progress: bool = True, 24 | **kwargs: Any 25 | ): 26 | if pretrained: 27 | pretrained_backbone = False 28 | 29 | backbone = models.__dict__[backbone]( 30 | pretrained=pretrained_backbone, 31 | dilations=[1, 1, 2, 4], 32 | **kwargs 33 | ).features 34 | 35 | aux_head = FCNHead(get_out_channels(backbone.stage3), None, num_classes, dropout_rate) if aux_loss else None 36 | cls_head = ClsHead(get_out_channels(backbone.stage4), num_classes) if cls_loss else None 37 | decode_head = FCNHead(get_out_channels(backbone.stage4), None, num_classes, dropout_rate) 38 | 39 | model = FCN(backbone, [3, 4] if aux_loss else [4], decode_head, aux_head, cls_head) 40 | 41 | if pretrained: 42 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 43 | return model 44 | 45 | 46 | @export 47 | def fcn_resnet50_v1(*args, **kwargs: Any): 48 | return create_fcn('resnet50_v1', *args, **kwargs) 49 | 50 | 51 | @export 52 | def fcn_mobilenet_v3_small(*args, **kwargs: Any): 53 | return create_fcn('mobilenet_v3_small', *args, **kwargs) 54 | 55 | 56 | @export 57 | def fcn_mobilenet_v3_large(*args, **kwargs: Any): 58 | return create_fcn('mobilenet_v3_large', *args, **kwargs) 59 | 60 | 61 | @export 62 | def fcn_regnet_x_400mf(*args, **kwargs: Any): 63 | return create_fcn('regnet_x_400mf', *args, **kwargs) 64 | 65 | 66 | @export 67 | def fcn_mobilenet_v1_x1_0(*args, **kwargs: Any): 68 | return create_fcn('mobilenet_v1_x1_0', *args, **kwargs) 69 | 70 | 71 | @export 72 | def fcn_sd_mobilenet_v1_x1_0(*args, **kwargs: Any): 73 | return create_fcn('sd_mobilenet_v1_x1_0', *args, **kwargs) 74 | 75 | 76 | @export 77 | def fcn_mobilenet_v2_x1_0(*args, **kwargs: Any): 78 | return create_fcn('mobilenet_v2_x1_0', *args, **kwargs) 79 | 80 | 81 | @export 82 | def fcn_sd_mobilenet_v2_x1_0(*args, **kwargs: Any): 83 | return create_fcn('sd_mobilenet_v2_x1_0', *args, **kwargs) 84 | 85 | 86 | @export 87 | def fcn_shufflenet_v2_x2_0(*args, **kwargs: Any): 88 | return create_fcn('shufflenet_v2_x2_0', *args, **kwargs) 89 | 90 | 91 | @export 92 | def fcn_sd_shufflenet_v2_x2_0(*args, **kwargs: Any): 93 | return create_fcn('sd_shufflenet_v2_x2_0', *args, **kwargs) 94 | 95 | 96 | @export 97 | def fcn_efficientnet_b0(*args, **kwargs: Any): 98 | return create_fcn('efficientnet_b0', *args, **kwargs) 99 | 100 | 101 | @export 102 | def fcn_sd_efficientnet_b0(*args, **kwargs: Any): 103 | return create_fcn('sd_efficientnet_b0', *args, **kwargs) 104 | -------------------------------------------------------------------------------- /cvm/models/seg/heads.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class FCNHead(nn.Sequential): 5 | def __init__( 6 | self, 7 | in_channels: int = 2048, 8 | channels: int = None, 9 | num_classes: int = 32, 10 | dropout_rate: float = 0.1, 11 | ): 12 | channels = channels or int(in_channels / 4.0) 13 | super().__init__( 14 | nn.Conv2d(in_channels, channels, 3, padding=1, bias=False), 15 | nn.BatchNorm2d(channels), 16 | nn.ReLU(), 17 | nn.Dropout(dropout_rate), 18 | nn.Conv2d(channels, num_classes, 1) 19 | ) 20 | 21 | 22 | class ClsHead(nn.Sequential): 23 | def __init__( 24 | self, 25 | in_channels, 26 | num_classes: int 27 | ): 28 | super().__init__( 29 | nn.AdaptiveAvgPool2d((1, 1)), 30 | nn.Flatten(1), 31 | nn.Linear(in_channels, num_classes) 32 | ) 33 | -------------------------------------------------------------------------------- /cvm/models/seg/segmentation_model.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import functional as F 5 | from typing import List, Optional 6 | from ..ops.blocks.stage import Stage 7 | 8 | from torchvision.models.feature_extraction import create_feature_extractor 9 | 10 | __all__ = ['SegmentationModel'] 11 | 12 | 13 | class SegmentationModel(nn.Module): 14 | def __init__( 15 | self, 16 | backbone: nn.Module, 17 | out_stages: List[int], 18 | decode_head: nn.Module = None, 19 | aux_head: Optional[nn.Module] = None, 20 | cls_head: Optional[nn.Module] = None 21 | ): 22 | super().__init__() 23 | 24 | if out_stages is None: 25 | out_stages = [4] 26 | 27 | self.backbone = create_feature_extractor( 28 | backbone, 29 | return_nodes=[f'stage{i}' for i in out_stages], 30 | tracer_kwargs={'leaf_modules': [Stage]} 31 | ) 32 | self.out_stages = out_stages 33 | self.decode_head = decode_head 34 | self.aux_head = aux_head 35 | self.cls_head = cls_head 36 | self.interpolate = partial(F.interpolate, mode='bilinear', align_corners=False) 37 | 38 | def forward(self, x): 39 | size = x.shape[-2:] 40 | 41 | stages = self.backbone(x) 42 | 43 | out = self.decode_head(stages[f'stage{self.out_stages[-1]}']) 44 | out = self.interpolate(out, size=size) 45 | 46 | res = {'out': out} 47 | 48 | if self.aux_head: 49 | aux = self.aux_head(stages[f'stage{self.out_stages[-2]}']) 50 | aux = self.interpolate(aux, size=size) 51 | res['aux'] = aux 52 | 53 | if self.cls_head: 54 | cls = self.cls_head(stages[f'stage{self.out_stages[-1]}']) 55 | cls = cls.reshape(cls.shape[0], cls.shape[1], 1, 1) 56 | res['out'] = out * torch.sigmoid(cls) 57 | 58 | return res 59 | -------------------------------------------------------------------------------- /cvm/models/seg/unet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from ..ops import blocks 4 | from ..utils import export, load_from_local_or_url 5 | from typing import Any 6 | 7 | 8 | @export 9 | class UNet(nn.Module): 10 | def __init__( 11 | self, 12 | in_channels: int = 3, 13 | num_classes: int = 2, 14 | filters: int = [64, 128, 256, 512, 1024], 15 | **kwargs: Any 16 | ): 17 | super().__init__() 18 | 19 | for i in range(4): 20 | self.add_module(f'encode_conv{i+1}', nn.Sequential( 21 | blocks.Conv2dBlock(filters[i - 1] if i else in_channels, filters[i]), 22 | blocks.Conv2dBlock(filters[i], filters[i]) 23 | )) 24 | self.add_module(f'down{i+1}', nn.MaxPool2d(2, 2)) 25 | 26 | self.u = nn.Sequential( 27 | blocks.Conv2dBlock(filters[3], filters[4]), 28 | blocks.Conv2dBlock(filters[4], filters[4]) 29 | ) 30 | 31 | filters.reverse() 32 | for i in range(4): 33 | self.add_module(f'up{i+1}', nn.ConvTranspose2d(filters[i], filters[i + 1], 4, stride=2, padding=1)) 34 | self.add_module(f'decode_conv{i+1}', nn.Sequential( 35 | blocks.Combine('CONCAT'), 36 | blocks.Conv2dBlock(filters[i], filters[i+1]), 37 | blocks.Conv2dBlock(filters[i + 1], filters[i + 1]) 38 | )) 39 | 40 | self.output = blocks.Conv2d1x1(filters[-1], num_classes, bias=True) 41 | 42 | def forward(self, x): 43 | e1 = self.encode_conv1(x) 44 | e2 = self.encode_conv2(self.down1(e1)) 45 | e3 = self.encode_conv3(self.down2(e2)) 46 | e4 = self.encode_conv4(self.down3(e3)) 47 | 48 | u = self.u(self.down4(e4)) 49 | 50 | d1 = self.decode_conv1([e4, self.up1(u)]) 51 | d2 = self.decode_conv2([e3, self.up2(d1)]) 52 | d3 = self.decode_conv3([e2, self.up3(d2)]) 53 | d4 = self.decode_conv4([e1, self.up4(d3)]) 54 | 55 | return self.output(d4) 56 | 57 | 58 | @export 59 | def unet( 60 | pretrained: bool = False, 61 | pth: str = None, 62 | progress: bool = True, 63 | **kwargs: Any 64 | ): 65 | model = UNet(**kwargs) 66 | 67 | if pretrained: 68 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 69 | return model 70 | -------------------------------------------------------------------------------- /cvm/models/shufflenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, config, load_from_local_or_url 6 | from typing import Any, OrderedDict, List 7 | 8 | 9 | class ShuffleAddBlock(nn.Module): 10 | def __init__(self, channels, g: int = 2): 11 | super().__init__() 12 | 13 | self.branch1 = nn.Sequential(OrderedDict([ 14 | ('gconv1', blocks.Conv2d1x1Block(channels, channels, groups=g)), 15 | ('shuffle', blocks.ChannelShuffle(groups=g)), 16 | ('dwconv', blocks.DepthwiseConv2dBN(channels, channels, 3)), 17 | ('gconv2', blocks.Conv2d1x1BN(channels, channels, groups=g)) 18 | ])) 19 | 20 | self.branch2 = nn.Identity() 21 | self.combine = blocks.Combine('ADD') 22 | self.relu = nn.ReLU(inplace=True) 23 | 24 | def forward(self, x): 25 | x = self.combine([self.branch1(x), self.branch2(x)]) 26 | x = self.relu(x) 27 | return x 28 | 29 | 30 | class ShuffleCatBlock(nn.Module): 31 | def __init__( 32 | self, 33 | inp, 34 | oup, 35 | g: int = 2, 36 | stride: int = 2, 37 | apply_first: bool = True 38 | ): 39 | super().__init__() 40 | 41 | g_1st = g if apply_first else 1 42 | 43 | self.branch1 = nn.Sequential(OrderedDict([ 44 | ('gconv1', blocks.Conv2d1x1Block(inp, oup, groups=g_1st)), 45 | ('shuffle', blocks.ChannelShuffle(groups=g)), 46 | ('dwconv', blocks.DepthwiseConv2dBN(oup, oup, stride=stride)), 47 | ('gconv2', blocks.Conv2d1x1BN(oup, oup, groups=g)) 48 | ])) 49 | 50 | self.branch2 = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1) 51 | self.combine = blocks.Combine('CONCAT') 52 | self.relu = nn.ReLU(inplace=True) 53 | 54 | def forward(self, x): 55 | x = self.combine([self.branch1(x), self.branch2(x)]) 56 | x = self.relu(x) 57 | return x 58 | 59 | 60 | @export 61 | class ShuffleNet(nn.Module): 62 | def __init__( 63 | self, 64 | in_channels: int = 3, 65 | num_classes: int = 1000, 66 | repeats: List[int] = [4, 84, 4], 67 | channels: List[int] = [], 68 | g: int = 3, 69 | thumbnail: bool = False, 70 | **kwargs: Any 71 | ): 72 | super().__init__() 73 | 74 | FRONT_S = 1 if thumbnail else 2 75 | 76 | self.features = nn.Sequential(OrderedDict([ 77 | ('stem', blocks.Conv2dBlock(in_channels, channels[0], 3, FRONT_S)), 78 | ('stage1', nn.MaxPool2d(kernel_size=3, stride=2, padding=1) if not thumbnail else nn.Identity()), 79 | ('stage2', self.make_layers(repeats[0], channels[0], channels[1], g)), 80 | ('stage3', self.make_layers(repeats[1], channels[1], channels[2], g)), 81 | ('stage4', self.make_layers(repeats[2], channels[2], channels[3], g)) 82 | ])) 83 | 84 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 85 | self.classifier = nn.Linear(channels[3], num_classes) 86 | 87 | @staticmethod 88 | def make_layers(repeat, inp, oup, g): 89 | layers = [ShuffleCatBlock(inp, oup - inp, stride=2, g=g)] 90 | for _ in range(repeat - 1): 91 | layers.append(ShuffleAddBlock(oup, g=g)) 92 | 93 | return blocks.Stage(layers) 94 | 95 | def forward(self, x): 96 | x = self.features(x) 97 | x = self.pool(x) 98 | x = torch.flatten(x, 1) 99 | x = self.classifier(x) 100 | return x 101 | 102 | 103 | def _shufflenet( 104 | repeats: List[int], 105 | channels: List[int], 106 | g: int, 107 | pretrained: bool = False, 108 | pth: str = None, 109 | progress: bool = True, 110 | **kwargs: Any 111 | ): 112 | model = ShuffleNet(repeats=repeats, channels=channels, g=g, **kwargs) 113 | 114 | if pretrained: 115 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 116 | return model 117 | 118 | 119 | @export 120 | def shufflenet_g1(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 121 | return _shufflenet([4, 8, 4], [24, 144, 288, 576], 1, pretrained, pth, progress, **kwargs) 122 | 123 | 124 | @export 125 | def shufflenet_g2(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 126 | return _shufflenet([4, 8, 4], [24, 200, 400, 800], 2, pretrained, pth, progress, **kwargs) 127 | 128 | 129 | @export 130 | def shufflenet_g3(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 131 | return _shufflenet([4, 8, 4], [24, 240, 480, 960], 3, pretrained, pth, progress, **kwargs) 132 | 133 | 134 | @export 135 | def shufflenet_g4(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 136 | return _shufflenet([4, 8, 4], [24, 272, 544, 1088], 4, pretrained, pth, progress, **kwargs) 137 | 138 | 139 | @export 140 | def shufflenet_g8(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 141 | return _shufflenet([4, 8, 4], [24, 384, 768, 1536], 8, pretrained, pth, progress, **kwargs) 142 | -------------------------------------------------------------------------------- /cvm/models/shufflenetv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, config, load_from_local_or_url 6 | from typing import Any, OrderedDict, List 7 | 8 | 9 | class ShuffleBlockV2(nn.Module): 10 | def __init__( 11 | self, 12 | inp, 13 | oup, 14 | stride: int = 1, 15 | dilation: int = 1 16 | ): 17 | super().__init__() 18 | 19 | self.inp = inp 20 | self.oup = oup // 2 21 | self.stride = stride if dilation == 1 else 1 22 | self.dilation = max(1, dilation // stride) 23 | self.split = None 24 | 25 | if stride == 1: 26 | self.inp = inp // 2 27 | self.split = blocks.ChannelChunk(2) 28 | 29 | self.branch1 = nn.Identity() 30 | if stride != 1: 31 | self.branch1 = nn.Sequential(OrderedDict([ 32 | ('dwconv', blocks.DepthwiseConv2dBN(self.inp, self.inp, stride=self.stride, dilation=self.dilation)), 33 | ('1x1', blocks.Conv2d1x1Block(self.inp, self.oup)) 34 | ])) 35 | 36 | self.branch2 = nn.Sequential(OrderedDict([ 37 | ('1x1-1', blocks.Conv2d1x1Block(self.inp, self.oup)), 38 | ('dwconv', blocks.DepthwiseConv2dBN(self.oup, self.oup, stride=self.stride, dilation=self.dilation)), 39 | ('1x1-2', blocks.Conv2d1x1Block(self.oup, self.oup)) 40 | ])) 41 | 42 | self.combine = blocks.Combine('CONCAT') 43 | self.shuffle = blocks.ChannelShuffle(groups=2) 44 | 45 | def forward(self, x): 46 | if isinstance(self.branch1, nn.Identity): 47 | x1, x2 = self.split(x) 48 | x2 = self.branch2(x2) 49 | else: 50 | x1 = self.branch1(x) 51 | x2 = self.branch2(x) 52 | 53 | out = self.combine([x1, x2]) 54 | out = self.shuffle(out) 55 | return out 56 | 57 | 58 | @export 59 | class ShuffleNetV2(nn.Module): 60 | def __init__( 61 | self, 62 | in_channels: int = 3, 63 | num_classes: int = 1000, 64 | repeats: List[int] = [4, 8, 4], 65 | channels: List[int] = [24, 48, 96, 192, 1024], 66 | dropout_rate: float = 0.0, 67 | dilations: List[int] = None, 68 | thumbnail: bool = False, 69 | **kwargs: Any 70 | ): 71 | super().__init__() 72 | 73 | self.block = ShuffleBlockV2 74 | dilations = dilations or [1, 1, 1, 1] 75 | assert len(dilations) == 4, '' 76 | 77 | FRONT_S = 1 if thumbnail else 2 78 | 79 | self.features = nn.Sequential(OrderedDict([ 80 | ('stem', blocks.Conv2dBlock(in_channels, channels[0], 3, FRONT_S)), 81 | ('stage1', nn.MaxPool2d(3, stride=2, padding=1) if not thumbnail else nn.Identity()), 82 | ('stage2', self.make_layers(repeats[0], channels[0], channels[1], dilations[1])), 83 | ('stage3', self.make_layers(repeats[1], channels[1], channels[2], dilations[2])), 84 | ('stage4', self.make_layers(repeats[2], channels[2], channels[3], dilations[3])), 85 | ])) 86 | 87 | self.features[-1].append( 88 | blocks.Conv2d1x1Block(channels[3], channels[4]) 89 | ) 90 | 91 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 92 | self.classifier = nn.Sequential( 93 | nn.Dropout(dropout_rate, inplace=True), 94 | nn.Linear(channels[4], num_classes) 95 | ) 96 | 97 | self.features[-1].out_channels = channels[-1] 98 | self.features[-2].out_channels = channels[-3] 99 | self.features[-3].out_channels = channels[-4] 100 | 101 | def make_layers(self, repeat, inp, oup, dilation): 102 | layers = [self.block(inp, oup, stride=2, dilation=dilation)] 103 | 104 | for _ in range(repeat - 1): 105 | layers.append(self.block(oup, oup, dilation=dilation)) 106 | 107 | return blocks.Stage(layers) 108 | 109 | def forward(self, x): 110 | x = self.features(x) 111 | x = self.pool(x) 112 | x = torch.flatten(x, 1) 113 | x = self.classifier(x) 114 | return x 115 | 116 | 117 | def _shufflenet_v2( 118 | repeats: List[int], 119 | channels: List[int], 120 | pretrained: bool = False, 121 | pth: str = None, 122 | progress: bool = True, 123 | **kwargs: Any 124 | ): 125 | model = ShuffleNetV2(repeats=repeats, channels=channels, **kwargs) 126 | 127 | if pretrained: 128 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 129 | return model 130 | 131 | 132 | @export 133 | def shufflenet_v2_x0_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 134 | return _shufflenet_v2([4, 8, 4], [24, 48, 96, 192, 1024], pretrained, pth, progress, **kwargs) 135 | 136 | 137 | @export 138 | def shufflenet_v2_x1_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 139 | return _shufflenet_v2([4, 8, 4], [24, 116, 232, 464, 1024], pretrained, pth, progress, **kwargs) 140 | 141 | 142 | @export 143 | def shufflenet_v2_x1_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 144 | return _shufflenet_v2([4, 8, 4], [24, 176, 352, 704, 1024], pretrained, pth, progress, **kwargs) 145 | 146 | 147 | @export 148 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1-shufflenets-weights/shufflenet_v2_x2_0-35a176a6.pth') 149 | def shufflenet_v2_x2_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 150 | return _shufflenet_v2([4, 8, 4], [24, 244, 488, 976, 2048], pretrained, pth, progress, **kwargs) 151 | -------------------------------------------------------------------------------- /cvm/models/squeezenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, load_from_local_or_url 6 | from typing import Any 7 | 8 | 9 | class FireBlock(nn.Module): 10 | def __init__(self, inp, oup): 11 | super().__init__() 12 | 13 | planes = oup // 8 14 | 15 | self.squeeze = blocks.Conv2d1x1(inp, planes) 16 | self.relu1 = nn.ReLU(inplace=True) 17 | self.expand1x1 = blocks.Conv2d1x1(planes, oup // 2, bias=True) 18 | self.expand3x3 = blocks.Conv2d3x3(planes, oup // 2, bias=True) 19 | self.combine = blocks.Combine('CONCAT') 20 | self.relu2 = nn.ReLU(inplace=True) 21 | 22 | def forward(self, x): 23 | x = self.squeeze(x) 24 | x = self.relu1(x) 25 | x = self.combine([self.expand1x1(x), self.expand3x3(x)]) 26 | x = self.relu2(x) 27 | return x 28 | 29 | 30 | @export 31 | class SqueezeNet(nn.Module): 32 | def __init__( 33 | self, 34 | in_channels: int = 3, 35 | num_classes: int = 1000, 36 | dropout_rate: float = 0.5, 37 | thumbnail: bool = False, 38 | **kwargs: Any 39 | ): 40 | super().__init__() 41 | 42 | FRONT_S = 1 if thumbnail else 2 43 | maxpool = nn.Identity() if thumbnail else nn.MaxPool2d(3, 2, ceil_mode=True) 44 | 45 | self.features = nn.Sequential( 46 | nn.Conv2d(in_channels, 96, 7, stride=FRONT_S), 47 | maxpool, 48 | 49 | FireBlock(96, 128), 50 | FireBlock(128, 128), 51 | FireBlock(128, 256), 52 | 53 | nn.MaxPool2d(3, stride=2, ceil_mode=True), 54 | 55 | FireBlock(256, 256), 56 | FireBlock(256, 384), 57 | FireBlock(384, 384), 58 | FireBlock(384, 512), 59 | 60 | nn.MaxPool2d(3, stride=2, ceil_mode=True), 61 | 62 | FireBlock(512, 512) 63 | ) 64 | 65 | self.classifier = nn.Sequential( 66 | nn.Dropout(dropout_rate), 67 | blocks.Conv2d1x1(512, num_classes, bias=True), 68 | nn.ReLU(inplace=True), 69 | nn.AdaptiveAvgPool2d((1, 1)) 70 | ) 71 | 72 | def forward(self, x): 73 | x = self.features(x) 74 | x = self.classifier(x) 75 | return x 76 | 77 | 78 | @export 79 | def squeezenet( 80 | pretrained: bool = False, 81 | pth: str = None, 82 | progress: bool = True, 83 | **kwargs: Any 84 | ): 85 | model = SqueezeNet(**kwargs) 86 | 87 | if pretrained: 88 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 89 | return model 90 | -------------------------------------------------------------------------------- /cvm/models/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import functools 4 | import torch 5 | import torch.nn as nn 6 | from .ops.blocks import Stage 7 | 8 | __all__ = ['export', 'config', 'load_from_local_or_url', 'get_out_channels'] 9 | 10 | 11 | def export(obj): 12 | if hasattr(sys.modules[obj.__module__], '__all__'): 13 | assert obj.__name__ not in sys.modules[ 14 | obj.__module__].__all__, f'Duplicate name: {obj.__name__}' 15 | 16 | sys.modules[obj.__module__].__all__.append(obj.__name__) 17 | else: 18 | sys.modules[obj.__module__].__all__ = [obj.__name__] 19 | return obj 20 | 21 | 22 | def config(url='', **settings): 23 | def decorator(func): 24 | @functools.wraps(func) 25 | def wrapper(*args, **kwargs): 26 | kwargs['url'] = url 27 | # kwargs['arch'] = func.__name__ 28 | return func(*args, **{**settings, **kwargs}) 29 | return wrapper 30 | 31 | return decorator 32 | 33 | 34 | def load_from_local_or_url(model, pth=None, url=None, progress=True): 35 | assert pth is not None or url is not None, 'The "pth" and "url" can not both be None.' 36 | 37 | if pth is not None: 38 | state_dict = torch.load(os.path.expanduser(pth)) 39 | else: 40 | state_dict = torch.hub.load_state_dict_from_url(url, progress=progress) 41 | 42 | model.load_state_dict(state_dict) 43 | 44 | 45 | def get_out_channels(module: nn.Module): 46 | # block has out_channels 47 | if isinstance(module, Stage) and hasattr(module, 'out_channels'): 48 | return module.out_channels 49 | 50 | # or get channels of the last Conv2d 51 | out_channels = 0 52 | for m in module.modules(): 53 | if isinstance(m, nn.Conv2d): 54 | out_channels = m.out_channels 55 | 56 | return out_channels 57 | -------------------------------------------------------------------------------- /cvm/models/vae/__init__.py: -------------------------------------------------------------------------------- 1 | from .vae import * 2 | from .cvae import * -------------------------------------------------------------------------------- /cvm/models/vae/cvae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ..utils import export, load_from_local_or_url 5 | from typing import Any 6 | 7 | 8 | @export 9 | class ConditionalVAE(nn.Module): 10 | """ 11 | Paper: [Learning Structured Output Representation using Deep Conditional Generative Models](https://papers.nips.cc/paper/2015/hash/8d55a249e6baa5c06772297520da2051-Abstract.html) 12 | """ 13 | def __init__( 14 | self, 15 | image_size, 16 | nz: int = 100, 17 | **kwargs: Any 18 | ): 19 | super().__init__() 20 | 21 | self.image_size = image_size 22 | self.nz = nz 23 | 24 | self.embeds_en = nn.Embedding(10, 200) 25 | 26 | self.embeds_de = nn.Embedding(10, 10) 27 | 28 | # Q(z|X) 29 | self.encoder = nn.Sequential( 30 | nn.Linear(self.image_size ** 2 + 200, 512), 31 | nn.LeakyReLU(0.2, inplace=True), 32 | nn.Linear(512, 512), 33 | nn.LeakyReLU(0.2, inplace=True), 34 | nn.Linear(512, 256), 35 | nn.LeakyReLU(0.2, inplace=True), 36 | nn.Linear(256, self.nz * 2) 37 | ) 38 | 39 | # P(X|z) 40 | self.decoder = nn.Sequential( 41 | nn.Linear(self.nz + 10, 256), 42 | nn.LeakyReLU(0.2, inplace=True), 43 | nn.Linear(256, 512), 44 | nn.LeakyReLU(0.2, inplace=True), 45 | nn.Linear(512, 512), 46 | nn.LeakyReLU(0.2, inplace=True), 47 | nn.Linear(512, self.image_size ** 2), 48 | nn.Sigmoid(), 49 | nn.Unflatten(1, (1, image_size, image_size)) 50 | ) 51 | 52 | def sample_z(self, mu, logvar, c): 53 | eps = torch.randn_like(logvar) 54 | 55 | return torch.cat([mu + eps * torch.exp(0.5 * logvar), c], dim=1) 56 | 57 | def forward(self, x, c): 58 | x = torch.flatten(x, 1) 59 | 60 | x = torch.cat([x, self.embeds_en(c)], dim=1) 61 | 62 | mu, logvar = torch.chunk(self.encoder(x), 2, dim=1) 63 | 64 | z = self.sample_z(mu, logvar, self.embeds_de(c)) 65 | 66 | x = self.decoder(z) 67 | return x, mu, logvar 68 | 69 | 70 | @export 71 | def cvae( 72 | pretrained: bool = False, 73 | pth: str = None, 74 | progress: bool = True, 75 | **kwargs: Any 76 | ): 77 | model = ConditionalVAE(**kwargs) 78 | 79 | if pretrained: 80 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 81 | return model 82 | -------------------------------------------------------------------------------- /cvm/models/vae/vae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ..utils import export, load_from_local_or_url 5 | from typing import Any 6 | 7 | 8 | @export 9 | class VAE(nn.Module): 10 | def __init__( 11 | self, 12 | image_size, 13 | nz: int = 100, 14 | **kwargs: Any 15 | ): 16 | super().__init__() 17 | 18 | self.image_size = image_size 19 | self.nz = nz 20 | 21 | # Q(z|X) 22 | self.encoder = nn.Sequential( 23 | nn.Flatten(1), 24 | nn.Linear(self.image_size ** 2, 512), 25 | nn.LeakyReLU(0.2, inplace=True), 26 | nn.Linear(512, 512), 27 | nn.LeakyReLU(0.2, inplace=True), 28 | nn.Linear(512, 256), 29 | nn.LeakyReLU(0.2, inplace=True), 30 | nn.Linear(256, self.nz * 2) 31 | ) 32 | 33 | # P(X|z) 34 | self.decoder = nn.Sequential( 35 | nn.Linear(self.nz, 256), 36 | nn.LeakyReLU(0.2, inplace=True), 37 | nn.Linear(256, 512), 38 | nn.LeakyReLU(0.2, inplace=True), 39 | nn.Linear(512, 512), 40 | nn.LeakyReLU(0.2, inplace=True), 41 | nn.Linear(512, self.image_size ** 2), 42 | nn.Sigmoid(), 43 | nn.Unflatten(1, (1, image_size, image_size)) 44 | ) 45 | 46 | def sample_z(self, mu, logvar): 47 | eps = torch.randn_like(logvar) 48 | 49 | return mu + eps * torch.exp(0.5 * logvar) 50 | 51 | def forward(self, x): 52 | mu, logvar = torch.chunk(self.encoder(x), 2, dim=1) 53 | 54 | z = self.sample_z(mu, logvar) 55 | 56 | x = self.decoder(z) 57 | return x, mu, logvar 58 | 59 | 60 | @export 61 | def vae( 62 | pretrained: bool = False, 63 | pth: str = None, 64 | progress: bool = True, 65 | **kwargs: Any 66 | ): 67 | model = VAE(**kwargs) 68 | 69 | if pretrained: 70 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 71 | return model 72 | -------------------------------------------------------------------------------- /cvm/models/vggnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, load_from_local_or_url 6 | from typing import Any, List, OrderedDict 7 | 8 | 9 | @export 10 | class VGGNet(nn.Module): 11 | def __init__( 12 | self, 13 | in_channels: int = 3, 14 | num_classes: int = 1000, 15 | layers: List[int] = [1, 1, 2, 2, 2], 16 | dropout_rate: float = 0.5, 17 | thumbnail: bool = False, 18 | **kwargs: Any 19 | ): 20 | super().__init__() 21 | 22 | maxpool1 = nn.Identity() if thumbnail else nn.MaxPool2d(2, stride=2) 23 | maxpool2 = nn.Identity() if thumbnail else nn.MaxPool2d(2, stride=2) 24 | 25 | self.features = nn.Sequential(OrderedDict([ 26 | ('stem', blocks.Stage( 27 | *self.make_layers(in_channels, 64, layers[0]), 28 | maxpool1 29 | )), 30 | ('stage1', blocks.Stage( 31 | *self.make_layers(64, 128, layers[1]), 32 | maxpool2 33 | )), 34 | ('stage2', blocks.Stage( 35 | *self.make_layers(128, 256, layers[2]), 36 | nn.MaxPool2d(kernel_size=2, stride=2) 37 | )), 38 | ('stage3', blocks.Stage( 39 | *self.make_layers(256, 512, layers[3]), 40 | nn.MaxPool2d(kernel_size=2, stride=2) 41 | )), 42 | ('stage4', blocks.Stage( 43 | *self.make_layers(512, 512, layers[4]), 44 | nn.MaxPool2d(kernel_size=2, stride=2) 45 | )) 46 | ])) 47 | 48 | self.pool = nn.AdaptiveAvgPool2d((7, 7)) 49 | 50 | self.classifier = nn.Sequential( 51 | nn.Linear(512 * 7 * 7, 4096), 52 | nn.ReLU(inplace=True), 53 | nn.Dropout(dropout_rate), 54 | nn.Linear(4096, 4096), 55 | nn.ReLU(inplace=True), 56 | nn.Dropout(dropout_rate), 57 | nn.Linear(4096, num_classes) 58 | ) 59 | 60 | self.reset_parameters() 61 | 62 | def reset_parameters(self) -> None: 63 | for m in self.modules(): 64 | if isinstance(m, nn.Conv2d): 65 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 66 | if m.bias is not None: 67 | nn.init.constant_(m.bias, 0) 68 | elif isinstance(m, nn.BatchNorm2d): 69 | nn.init.constant_(m.weight, 1) 70 | nn.init.constant_(m.bias, 0) 71 | elif isinstance(m, nn.Linear): 72 | nn.init.normal_(m.weight, 0, 0.01) 73 | nn.init.constant_(m.bias, 0) 74 | 75 | def forward(self, x): 76 | x = self.features(x) 77 | x = self.pool(x) 78 | x = torch.flatten(x, 1) 79 | x = self.classifier(x) 80 | return x 81 | 82 | @staticmethod 83 | def make_layers(inp, oup, n): 84 | layers = [blocks.Conv2dBlock(inp, oup, bias=True)] 85 | 86 | for _ in range(n - 1): 87 | layers.append(blocks.Conv2dBlock(oup, oup, bias=True)) 88 | 89 | return layers 90 | 91 | 92 | def _vgg( 93 | layers: List[int], 94 | pretrained: bool = False, 95 | pth: str = None, 96 | progress: bool = True, 97 | **kwargs: Any 98 | ): 99 | model = VGGNet(layers=layers, **kwargs) 100 | 101 | if pretrained: 102 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 103 | return model 104 | 105 | 106 | @export 107 | @blocks.normalizer(None) 108 | def vgg11(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 109 | return _vgg([1, 1, 2, 2, 2], pretrained, pth, progress, **kwargs) 110 | 111 | 112 | @export 113 | @blocks.normalizer(None) 114 | def vgg13(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 115 | return _vgg([2, 2, 2, 2, 2], pretrained, pth, progress, **kwargs) 116 | 117 | 118 | @export 119 | @blocks.normalizer(None) 120 | def vgg16(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 121 | return _vgg([2, 2, 3, 3, 3], pretrained, pth, progress, **kwargs) 122 | 123 | 124 | @export 125 | @blocks.normalizer(None) 126 | def vgg19(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 127 | return _vgg([2, 2, 4, 4, 4], pretrained, pth, progress, **kwargs) 128 | 129 | 130 | @export 131 | def vgg11_bn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 132 | return _vgg([1, 1, 2, 2, 2], pretrained, pth, progress, **kwargs) 133 | 134 | 135 | @export 136 | def vgg13_bn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 137 | return _vgg([2, 2, 2, 2, 2], pretrained, pth, progress, **kwargs) 138 | 139 | 140 | @export 141 | def vgg16_bn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 142 | return _vgg([2, 2, 3, 3, 3], pretrained, pth, progress, **kwargs) 143 | 144 | 145 | @export 146 | def vgg19_bn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 147 | return _vgg([2, 2, 4, 4, 4], pretrained, pth, progress, **kwargs) 148 | -------------------------------------------------------------------------------- /cvm/models/vision_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, config, load_from_local_or_url 6 | from typing import Any 7 | from functools import partial 8 | 9 | 10 | class MultiheadSelfAttention(nn.MultiheadAttention): 11 | def forward(self, x): 12 | x, _ = super().forward(x, x, x, need_weights=False) 13 | return x 14 | 15 | 16 | class EncoderBlock(nn.Module): 17 | def __init__( 18 | self, 19 | embed_dim, 20 | num_heads: int = 8, 21 | qkv_bias: bool = False, 22 | mlp_ratio: float = 4.0, 23 | dropout_rate: float = 0., 24 | attn_dropout_rate: float = 0., 25 | drop_path_rate: float = 0., 26 | normalizer_fn: nn.Module = partial(nn.LayerNorm, eps=1e-6), 27 | ): 28 | super().__init__() 29 | 30 | self.msa = nn.Sequential( 31 | normalizer_fn(embed_dim), 32 | MultiheadSelfAttention(embed_dim, num_heads, dropout=attn_dropout_rate, bias=qkv_bias, batch_first=True), 33 | nn.Dropout(dropout_rate), 34 | blocks.StochasticDepth(1 - drop_path_rate) 35 | ) 36 | 37 | self.mlp = nn.Sequential( 38 | normalizer_fn(embed_dim), 39 | blocks.MlpBlock(embed_dim, int(embed_dim * mlp_ratio), dropout_rate=dropout_rate), 40 | blocks.StochasticDepth(1 - drop_path_rate) 41 | ) 42 | 43 | def forward(self, x): 44 | x = x + self.msa(x) 45 | x = x + self.mlp(x) 46 | return x 47 | 48 | 49 | @export 50 | class VisionTransformer(nn.Module): 51 | r""" 52 | Paper: An Image is Worth 16x16 Words. Transformers for Image Recognition at Scale, https://arxiv.org/abs/2010.11929 53 | """ 54 | def __init__( 55 | self, 56 | image_size: int = 224, 57 | in_channels: int = 3, 58 | num_classes: int = 1000, 59 | patch_size: int = 16, 60 | hidden_dim: int = 768, 61 | num_blocks: int = 12, 62 | num_heads: int = 12, 63 | mlp_ratio: float = 4., 64 | qkv_bias: bool = True, 65 | dropout_rate: float = 0., 66 | attn_dropout_rate: float = 0., 67 | drop_path_rate: float = 0., 68 | classifier: str = 'token', 69 | normalizer_fn: nn.Module = partial(nn.LayerNorm, eps=1e-6), 70 | **kwargs: Any 71 | ): 72 | super().__init__() 73 | 74 | self.num_patches = (image_size // patch_size) ** 2 75 | self.classifier = classifier 76 | 77 | self.class_token = nn.Parameter(torch.zeros(1, 1, hidden_dim)) 78 | self.positions = nn.Parameter(torch.normal(mean=0.0, std=0.02, size=[1, self.num_patches + 1, hidden_dim])) 79 | 80 | self.embedding = nn.Conv2d(in_channels, hidden_dim, patch_size, stride=patch_size) 81 | 82 | self.drop = nn.Dropout(dropout_rate) 83 | 84 | # encoder 85 | self.encoder = nn.Sequential(*[ 86 | EncoderBlock( 87 | hidden_dim, num_heads, qkv_bias=qkv_bias, mlp_ratio=mlp_ratio, 88 | dropout_rate=dropout_rate, attn_dropout_rate=attn_dropout_rate, 89 | drop_path_rate=drop_path_rate, normalizer_fn=normalizer_fn 90 | ) for _ in range(num_blocks) 91 | ]) 92 | 93 | self.norm = normalizer_fn(hidden_dim) 94 | 95 | self.head = nn.Linear(hidden_dim, num_classes) 96 | 97 | def forward(self, x): 98 | # NCHW -> (N, hidden_dim, NP_H, NP_W) 99 | x = self.embedding(x) 100 | # (N, hidden_dim, NP_H, NP_W) -> (N, hidden_dim, NP) 101 | x = torch.flatten(x, start_dim=2) 102 | # (N, hidden_dim, NP) -> (N, NP, hidden_dim) 103 | x = x.permute(0, 2, 1) 104 | 105 | class_tokens = self.class_token.expand(x.shape[0], -1, -1) 106 | x = torch.cat([class_tokens, x], dim=1) + self.positions 107 | 108 | x = self.drop(x) 109 | x = self.encoder(x) 110 | x = self.norm(x) 111 | 112 | x = x[:, 0] if self.classifier == 'token' else x.mean(dim=1) 113 | return self.head(x) 114 | 115 | 116 | def _vit( 117 | image_size: int = 224, 118 | patch_size: int = 32, 119 | hidden_dim: int = 768, 120 | num_blocks: int = 12, 121 | num_heads: int = 12, 122 | pretrained: bool = False, 123 | pth: str = None, 124 | progress: bool = True, 125 | **kwargs: Any 126 | ): 127 | model = VisionTransformer(image_size, patch_size=patch_size, hidden_dim=hidden_dim, 128 | num_blocks=num_blocks, num_heads=num_heads, 129 | normalizer_fn=partial(nn.LayerNorm, eps=1e-6), **kwargs) 130 | 131 | if pretrained: 132 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 133 | return model 134 | 135 | 136 | @export 137 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.1-vit-weights/torch-vit_b_32-f0b6fb13.pth') 138 | def vit_b_32(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 139 | return _vit(224, 32, 768, 12, 12, pretrained, pth, progress, **kwargs) 140 | 141 | 142 | @export 143 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.1-vit-weights/torch-vit_b_16-1d93d631.pth') 144 | def vit_b_16(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 145 | return _vit(224, 16, 768, 12, 12, pretrained, pth, progress, **kwargs) 146 | 147 | 148 | @export 149 | def vit_l_32(pretrained: bool = True, pth: str = None, progress: bool = True, **kwargs: Any): 150 | return _vit(224, 32, 1024, 24, 16, pretrained, pth, progress, **kwargs) 151 | 152 | 153 | @export 154 | def vit_l_16(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 155 | return _vit(224, 16, 1024, 24, 16, pretrained, pth, progress, **kwargs) 156 | 157 | 158 | @export 159 | def vit_h_32(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 160 | return _vit(224, 32, 1280, 32, 16, pretrained, pth, progress, **kwargs) 161 | 162 | 163 | @export 164 | def vit_h_16(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 165 | return _vit(224, 16, 1280, 32, 16, pretrained, pth, progress, **kwargs) 166 | -------------------------------------------------------------------------------- /cvm/models/xception.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .ops import blocks 5 | from .utils import export, load_from_local_or_url 6 | from typing import OrderedDict, Any 7 | 8 | 9 | class SeparableConv2d(nn.Sequential): 10 | def __init__(self, inplanes, planes): 11 | super().__init__( 12 | blocks.DepthwiseConv2d(inplanes, inplanes), 13 | blocks.PointwiseConv2d(inplanes, planes), 14 | nn.BatchNorm2d(planes) 15 | ) 16 | 17 | 18 | class XceptionBlock(nn.Module): 19 | def __init__( 20 | self, 21 | inp, 22 | oup, 23 | stride: int = 1, 24 | expand_first: bool = True, 25 | first_relu: bool = True 26 | ): 27 | super().__init__() 28 | 29 | layers = OrderedDict([]) 30 | if first_relu: 31 | layers['relu1'] = nn.ReLU(inplace=True) 32 | 33 | planes = oup if expand_first else inp 34 | 35 | layers['conv1'] = SeparableConv2d(inp, planes) 36 | layers['relu2'] = nn.ReLU(inplace=True) 37 | layers['conv2'] = SeparableConv2d(planes, oup) 38 | 39 | self.branch1 = nn.Sequential(layers) 40 | 41 | self.branch2 = nn.Identity() 42 | 43 | if stride != 1: 44 | self.branch1.add_module('maxpool', nn.MaxPool2d(3, 2, padding=1)) 45 | self.branch2 = nn.Sequential( 46 | blocks.PointwiseConv2d(inp, oup, stride=2), 47 | nn.BatchNorm2d(oup) 48 | ) 49 | else: 50 | self.branch1.add_module('relu3', nn.ReLU(inplace=True)) 51 | self.branch1.add_module('conv3', SeparableConv2d(oup, oup)) 52 | 53 | self.combine = blocks.Combine('ADD') 54 | 55 | def forward(self, x): 56 | return self.combine([self.branch1(x), self.branch2(x)]) 57 | 58 | 59 | @export 60 | class Xception(nn.Module): 61 | def __init__( 62 | self, 63 | in_channels: int = 3, 64 | num_classes: int = 1000, 65 | thumbnail: bool = False, 66 | **kwargs: Any 67 | ): 68 | super().__init__() 69 | 70 | FRONT_S = 1 if thumbnail else 2 71 | 72 | self.features = nn.Sequential( 73 | blocks.Conv2dBlock(in_channels, 32, stride=FRONT_S, padding=0), 74 | blocks.Conv2dBlock(32, 64, padding=0), 75 | 76 | XceptionBlock(64, 128, stride=FRONT_S, first_relu=False), 77 | XceptionBlock(128, 256, stride=2), 78 | XceptionBlock(256, 728, stride=2), 79 | 80 | *[XceptionBlock(728, 728) for _ in range(8)], 81 | 82 | XceptionBlock(728, 1024, stride=2, expand_first=False), 83 | 84 | SeparableConv2d(1024, 1536), 85 | nn.ReLU(inplace=True), 86 | SeparableConv2d(1536, 2048), 87 | nn.ReLU(inplace=True) 88 | ) 89 | 90 | self.avg = nn.AdaptiveAvgPool2d((1, 1)) 91 | 92 | self.classifier = nn.Linear(2048, num_classes) 93 | 94 | def forward(self, x): 95 | x = self.features(x) 96 | x = self.avg(x) 97 | x = torch.flatten(x, 1) 98 | x = self.classifier(x) 99 | return x 100 | 101 | 102 | @export 103 | def xception(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any): 104 | model = Xception(**kwargs) 105 | 106 | if pretrained: 107 | load_from_local_or_url(model, pth, kwargs.get('url', None), progress) 108 | return model 109 | -------------------------------------------------------------------------------- /cvm/scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | from .cosine_lr import * 2 | from .step_lr import * -------------------------------------------------------------------------------- /cvm/scheduler/cosine_lr.py: -------------------------------------------------------------------------------- 1 | import math 2 | import warnings 3 | import torch.optim as optim 4 | 5 | 6 | __all__ = ['WarmUpCosineLR'] 7 | 8 | 9 | class WarmUpCosineLR(optim.lr_scheduler._LRScheduler): 10 | def __init__(self, optimizer, warmup_steps, steps, min_lr=0.1, last_epoch=-1, verbose=False): 11 | self.warmup_steps = warmup_steps 12 | self.steps = steps - self.warmup_steps 13 | self.min_lr = min_lr 14 | super().__init__(optimizer, last_epoch, verbose) 15 | 16 | def get_lr(self): 17 | if not self._get_lr_called_within_step: 18 | warnings.warn("To get the last learning rate computed by the scheduler, " 19 | "please use `get_last_lr()`.", UserWarning) 20 | 21 | if self.last_epoch < self.warmup_steps: 22 | return [base_lr * (float(1 + self.last_epoch) / self.warmup_steps) for base_lr in self.base_lrs] 23 | 24 | return [self.min_lr + (base_lr - self.min_lr) * (1 + math.cos(math.pi * (1 + self.last_epoch - self.warmup_steps) / self.steps)) / 2 25 | for base_lr in self.base_lrs] 26 | 27 | def __repr__(self) -> str: 28 | return f'WarmUpCosineLR(warmup_steps={self.warmup_steps}, steps={self.steps}, min_lr={self.min_lr})' 29 | -------------------------------------------------------------------------------- /cvm/scheduler/step_lr.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import torch.optim as optim 3 | 4 | 5 | __all__ = ['WarmUpStepLR'] 6 | 7 | 8 | class WarmUpStepLR(optim.lr_scheduler._LRScheduler): 9 | def __init__(self, optimizer, warmup_steps, step_size, gamma=0.1, last_epoch=-1, verbose=False): 10 | self.warmup_steps = warmup_steps 11 | self.step_size = step_size 12 | self.gamma = gamma 13 | super().__init__(optimizer, last_epoch, verbose) 14 | 15 | def get_lr(self): 16 | if not self._get_lr_called_within_step: 17 | warnings.warn("To get the last learning rate computed by the scheduler, " 18 | "please use `get_last_lr()`.", UserWarning) 19 | 20 | if self.last_epoch < self.warmup_steps: 21 | return [base_lr * (float(1 + self.last_epoch) / self.warmup_steps) for base_lr in self.base_lrs] 22 | 23 | milestone = ((self.last_epoch - self.warmup_steps) // self.step_size) 24 | return [base_lr * self.gamma ** milestone for base_lr in self.base_lrs] 25 | 26 | def __repr__(self) -> str: 27 | return f'WarmUpStepLR(warmup_steps={self.warmup_steps}, step_size={self.step_size}, gamma={self.gamma})' 28 | -------------------------------------------------------------------------------- /cvm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import * 2 | from .utils import * 3 | from .augment import * 4 | from .ema import * 5 | from .factory import * 6 | from .seg_transforms import * 7 | from .metrics import * -------------------------------------------------------------------------------- /cvm/utils/coco.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | 4 | import torch 5 | import torch.utils.data 6 | import torchvision 7 | from PIL import Image 8 | from pycocotools import mask as coco_mask 9 | from .seg_transforms import Compose 10 | 11 | 12 | class FilterAndRemapCocoCategories: 13 | def __init__(self, categories, remap=True): 14 | self.categories = categories 15 | self.remap = remap 16 | 17 | def __call__(self, image, anno): 18 | anno = [obj for obj in anno if obj["category_id"] in self.categories] 19 | if not self.remap: 20 | return image, anno 21 | anno = copy.deepcopy(anno) 22 | for obj in anno: 23 | obj["category_id"] = self.categories.index(obj["category_id"]) 24 | return image, anno 25 | 26 | 27 | def convert_coco_poly_to_mask(segmentations, height, width): 28 | masks = [] 29 | for polygons in segmentations: 30 | rles = coco_mask.frPyObjects(polygons, height, width) 31 | mask = coco_mask.decode(rles) 32 | if len(mask.shape) < 3: 33 | mask = mask[..., None] 34 | mask = torch.as_tensor(mask, dtype=torch.uint8) 35 | mask = mask.any(dim=2) 36 | masks.append(mask) 37 | if masks: 38 | masks = torch.stack(masks, dim=0) 39 | else: 40 | masks = torch.zeros((0, height, width), dtype=torch.uint8) 41 | return masks 42 | 43 | 44 | class ConvertCocoPolysToMask: 45 | def __call__(self, image, anno): 46 | w, h = image.size 47 | segmentations = [obj["segmentation"] for obj in anno] 48 | cats = [obj["category_id"] for obj in anno] 49 | if segmentations: 50 | masks = convert_coco_poly_to_mask(segmentations, h, w) 51 | cats = torch.as_tensor(cats, dtype=masks.dtype) 52 | # merge all instance masks into a single segmentation map 53 | # with its corresponding categories 54 | target, _ = (masks * cats[:, None, None]).max(dim=0) 55 | # discard overlapping instances 56 | target[masks.sum(0) > 1] = 255 57 | else: 58 | target = torch.zeros((h, w), dtype=torch.uint8) 59 | target = Image.fromarray(target.numpy()) 60 | return image, target 61 | 62 | 63 | def _coco_remove_images_without_annotations(dataset, cat_list=None): 64 | def _has_valid_annotation(anno): 65 | # if it's empty, there is no annotation 66 | if len(anno) == 0: 67 | return False 68 | # if more than 1k pixels occupied in the image 69 | return sum(obj["area"] for obj in anno) > 1000 70 | 71 | if not isinstance(dataset, torchvision.datasets.CocoDetection): 72 | raise TypeError( 73 | f"This function expects dataset of type torchvision.datasets.CocoDetection, instead got {type(dataset)}" 74 | ) 75 | 76 | ids = [] 77 | for ds_idx, img_id in enumerate(dataset.ids): 78 | ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) 79 | anno = dataset.coco.loadAnns(ann_ids) 80 | if cat_list: 81 | anno = [obj for obj in anno if obj["category_id"] in cat_list] 82 | if _has_valid_annotation(anno): 83 | ids.append(ds_idx) 84 | 85 | dataset = torch.utils.data.Subset(dataset, ids) 86 | return dataset 87 | 88 | 89 | def get_coco(root, image_set, transforms): 90 | PATHS = { 91 | "train": ("train2017", os.path.join("annotations", "instances_train2017.json")), 92 | "val": ("val2017", os.path.join("annotations", "instances_val2017.json")), 93 | # "train": ("val2017", os.path.join("annotations", "instances_val2017.json")) 94 | } 95 | CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72] 96 | 97 | transforms = Compose([FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask(), transforms]) 98 | 99 | img_folder, ann_file = PATHS[image_set] 100 | img_folder = os.path.join(root, img_folder) 101 | ann_file = os.path.join(root, ann_file) 102 | 103 | dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms) 104 | 105 | if image_set == "train": 106 | dataset = _coco_remove_images_without_annotations(dataset, CAT_LIST) 107 | 108 | return dataset 109 | -------------------------------------------------------------------------------- /cvm/utils/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class ExponentialMovingAverage(torch.optim.swa_utils.AveragedModel): 5 | """Maintains moving averages of model parameters using an exponential decay. 6 | ``ema_avg = decay * avg_model_param + (1 - decay) * model_param`` 7 | `torch.optim.swa_utils.AveragedModel `_ 8 | is used to compute the EMA. 9 | """ 10 | 11 | def __init__(self, model, decay, device="cpu"): 12 | def ema_avg(avg_model_param, model_param, num_averaged): 13 | return decay * avg_model_param + (1 - decay) * model_param 14 | 15 | super().__init__(model, device, ema_avg) 16 | 17 | def update_parameters(self, model): 18 | for p_swa, p_model in zip(self.module.state_dict().values(), model.state_dict().values()): 19 | device = p_swa.device 20 | p_model_ = p_model.detach().to(device) 21 | if self.n_averaged == 0: 22 | p_swa.detach().copy_(p_model_) 23 | else: 24 | p_swa.detach().copy_(self.avg_fn(p_swa.detach(), p_model_, self.n_averaged.to(device))) 25 | self.n_averaged += 1 26 | -------------------------------------------------------------------------------- /cvm/utils/logger.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import logging 4 | from os.path import dirname, abspath, exists, join 5 | import torch.distributed as dist 6 | from .utils import is_dist_avail_and_initialized 7 | 8 | __all__ = ['make_logger'] 9 | 10 | 11 | def make_logger(run_name, log_dir='logs', rank: int = 0): 12 | logger = logging.getLogger(run_name) 13 | logger.propagate = False 14 | 15 | log_filepath = join(log_dir, f'{run_name}_{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.log') 16 | 17 | log_dir = dirname(abspath(log_filepath)) 18 | if not exists(log_dir) and rank == 0: 19 | os.makedirs(log_dir) 20 | 21 | if is_dist_avail_and_initialized(): 22 | dist.barrier() 23 | 24 | if not logger.handlers and rank == 0: # execute only if logger doesn't already exist 25 | file_handler = logging.FileHandler(log_filepath, mode='a', encoding='utf-8') 26 | stream_handler = logging.StreamHandler(os.sys.stdout) 27 | 28 | formatter = logging.Formatter( 29 | '%(asctime)s - %(filename)s:%(lineno)d[%(levelname)s]: %(message)s', 30 | datefmt='%H:%M:%S' 31 | ) 32 | 33 | file_handler.setFormatter(formatter) 34 | stream_handler.setFormatter(formatter) 35 | 36 | logger.addHandler(file_handler) 37 | logger.addHandler(stream_handler) 38 | logger.setLevel(logging.INFO) 39 | return logger 40 | -------------------------------------------------------------------------------- /cvm/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | __all__ = ['accuracy', 'accuracy_k', 'ConfusionMatrix'] 4 | 5 | 6 | def accuracy(output, target, topk=(1,)): 7 | """Computes the accuracy over the k top predictions for the specified values of k""" 8 | with torch.inference_mode(): 9 | maxk = max(topk) 10 | batch_size = target.size(0) 11 | if target.ndim == 2: 12 | target = target.max(dim=1)[1] 13 | 14 | _, pred = output.topk(maxk, 1, True, True) 15 | pred = pred.t() 16 | correct = pred.eq(target[None]) 17 | 18 | res = [] 19 | for k in topk: 20 | correct_k = correct[:k].flatten().sum(dtype=torch.float32) 21 | res.append(correct_k * (100.0 / batch_size)) 22 | return res 23 | 24 | 25 | def accuracy_k(output: torch.Tensor, target): 26 | 27 | with torch.inference_mode(): 28 | output = output.max(dim=1)[1] 29 | if target.ndim == 2: 30 | target = target.max(dim=1)[1] 31 | 32 | mask = output.eq(target) 33 | 34 | return target[mask] 35 | 36 | 37 | class ConfusionMatrix: 38 | def __init__(self, num_classes, eps=1e-6): 39 | self.n = num_classes 40 | self.mat = None 41 | self.eps = eps 42 | 43 | def update(self, pr, gt): 44 | if self.mat is None: 45 | self.mat = torch.zeros( 46 | (self.n, self.n), dtype=torch.int64, device=pr.device) 47 | 48 | with torch.inference_mode(): 49 | k = (gt >= 0) & (gt < self.n) 50 | inds = self.n * gt[k].to(torch.int64) + pr[k] 51 | self.mat += torch.bincount(inds, minlength=self.n ** 2).reshape(self.n, self.n) 52 | 53 | def all_reduce(self): 54 | if not torch.distributed.is_available(): 55 | return 56 | if not torch.distributed.is_initialized(): 57 | return 58 | 59 | torch.distributed.barrier() 60 | torch.distributed.all_reduce(self.mat) 61 | 62 | @property 63 | def intersection(self): 64 | return torch.diag(self.mat) 65 | 66 | @property 67 | def union(self): 68 | return self.mat.sum(0) + self.mat.sum(1) 69 | 70 | @property 71 | def iou(self): 72 | return (self.intersection / (self.union - self.intersection + self.eps)).tolist() 73 | 74 | @property 75 | def mean_iou(self): 76 | return (self.intersection / (self.union - self.intersection + self.eps)).mean().item() 77 | 78 | @property 79 | def pa(self): 80 | return (self.intersection.sum() / self.mat.sum()).item() 81 | 82 | @property 83 | def mean_pa(self): 84 | return (self.intersection / self.mat.sum(1)).tolist() 85 | -------------------------------------------------------------------------------- /cvm/utils/seg_transforms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import torchvision.transforms.v2 as T 4 | from torchvision.transforms.v2 import functional as TF 5 | 6 | 7 | class Compose(T.Compose): 8 | def __init__(self, transforms): 9 | super().__init__(transforms) 10 | 11 | def __call__(self, images, targets): 12 | for t in self.transforms: 13 | images, targets = t(images, targets) 14 | return images, targets 15 | 16 | 17 | class ToImage: 18 | def __call__(self, images, targets): 19 | return TF.to_image(images), torch.as_tensor(np.array(targets), dtype=torch.int64) 20 | 21 | def __repr__(self): 22 | return self.__class__.__name__ + '()' 23 | 24 | 25 | class RandomHorizontalFlip(torch.nn.Module): 26 | def __init__(self, p=0.5): 27 | super().__init__() 28 | self.p = p 29 | 30 | def forward(self, images, targets): 31 | if torch.rand(1) < self.p: 32 | return TF.hflip(images), TF.hflip(targets) 33 | return images, targets 34 | 35 | def __repr__(self): 36 | return self.__class__.__name__ + '(p={})'.format(self.p) 37 | 38 | 39 | class RandomVerticalFlip(torch.nn.Module): 40 | def __init__(self, p=0.5): 41 | super().__init__() 42 | self.p = p 43 | 44 | def forward(self, images, targets): 45 | if torch.rand(1) < self.p: 46 | return TF.vflip(images), TF.vflip(targets) 47 | return images, targets 48 | 49 | def __repr__(self): 50 | return self.__class__.__name__ + '(p={})'.format(self.p) 51 | 52 | 53 | class ToDtype(T.ToDtype): 54 | def __init__(self, dtype, scale: bool = False) -> None: 55 | super().__init__(dtype=dtype, scale=scale) 56 | 57 | def forward(self, images, targets): 58 | return super().forward(images), targets 59 | 60 | 61 | class Normalize(T.Normalize): 62 | def __init__(self, mean, std, inplace=False): 63 | super().__init__(mean, std, inplace) 64 | 65 | def forward(self, images, targets): 66 | return super().forward(images), targets 67 | 68 | 69 | class Resize(T.Resize): 70 | def __init__(self, size, interpolation=TF.InterpolationMode.BILINEAR): 71 | super().__init__(size, interpolation=interpolation) 72 | 73 | def forward(self, images, targets): 74 | images = TF.resize(images, self.size, self.interpolation) 75 | targets = TF.resize(targets, self.size, TF.InterpolationMode.NEAREST) 76 | 77 | return images, targets 78 | 79 | 80 | class RandomCrop(T.RandomCrop): 81 | def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"): 82 | super().__init__( 83 | size, 84 | padding=padding, 85 | pad_if_needed=pad_if_needed, 86 | fill=fill, 87 | padding_mode=padding_mode 88 | ) 89 | 90 | def forward(self, images, targets): 91 | if self.padding is not None: 92 | img = TF.pad(images, self.padding, self.fill, self.padding_mode) 93 | 94 | width, height = TF.get_image_size(img) 95 | # pad the width if needed 96 | if self.pad_if_needed and width < self.size[1]: 97 | padding = [self.size[1] - width, 0] 98 | img = TF.pad(img, padding, self.fill, self.padding_mode) 99 | # pad the height if needed 100 | if self.pad_if_needed and height < self.size[0]: 101 | padding = [0, self.size[0] - height] 102 | img = TF.pad(img, padding, self.fill, self.padding_mode) 103 | 104 | i, j, h, w = self.get_params(img, self.size) 105 | 106 | return TF.crop(img, i, j, h, w), TF.crop(targets, i, j, h, w) 107 | 108 | 109 | class RandomResizedCrop(T.RandomResizedCrop): 110 | def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=TF.InterpolationMode.BILINEAR): 111 | super().__init__(size, scale=scale, ratio=ratio, interpolation=interpolation) 112 | 113 | def forward(self, images, targets): 114 | i, j, h, w = self.get_params(images, self.scale, self.ratio) 115 | images = TF.resized_crop(images, i, j, h, w, self.size, self.interpolation) 116 | targets = TF.resized_crop(targets, i, j, h, w, self.size, TF.InterpolationMode.NEAREST) 117 | return images, targets 118 | -------------------------------------------------------------------------------- /cvm/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.22' -------------------------------------------------------------------------------- /flops.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import torch 4 | from cvm.utils import list_models, create_model 5 | from fvcore.nn import FlopCountAnalysis, flop_count_str, flop_count_table 6 | 7 | 8 | def print_model(model, str: bool = False, max_depth: int = 3): 9 | model.eval() 10 | flops = FlopCountAnalysis(model, input) 11 | 12 | print(flop_count_str(flops) if str else flop_count_table(flops, max_depth=max_depth)) 13 | 14 | 15 | if __name__ == '__main__': 16 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 17 | parser.add_argument('--model', '-m', type=str) 18 | parser.add_argument('--str', action='store_true') 19 | parser.add_argument('--list-models', type=str, default=None) 20 | parser.add_argument('--in-channels', type=int, default=3) 21 | parser.add_argument('--num-classes', type=int, default=1000) 22 | parser.add_argument('--image-size', type=int, default=224) 23 | parser.add_argument('--max-depth', type=int, default=3) 24 | 25 | args = parser.parse_args() 26 | 27 | input = torch.randn(1, args.in_channels, args.image_size, args.image_size) 28 | 29 | thumbnail = True if args.image_size < 100 else False 30 | 31 | if args.list_models: 32 | print(json.dumps(list_models(args.list_models), indent=4)) 33 | else: 34 | print_model( 35 | create_model( 36 | args.model, 37 | thumbnail=thumbnail, 38 | in_channels=args.in_channels, 39 | num_classes=args.num_classes, 40 | cuda=False, 41 | ), 42 | args.str, 43 | args.max_depth 44 | ) 45 | -------------------------------------------------------------------------------- /info.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from torchinfo import summary 3 | from cvm.utils import create_model 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 7 | parser.add_argument('--model', '-m', type=str) 8 | 9 | args = parser.parse_args() 10 | 11 | model = create_model(args.model, cuda=False) 12 | 13 | summary( 14 | model, 15 | input_size=(1, 3, 224, 224), 16 | col_names=("output_size", "num_params", 'mult_adds') 17 | ) 18 | -------------------------------------------------------------------------------- /profiler.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | from tqdm import tqdm 4 | from torch.profiler.profiler import tensorboard_trace_handler 5 | from cvm.utils import create_model 6 | 7 | if __name__ == '__main__': 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--model', type=str, default='micronet_b1_0') 10 | parser.add_argument('--batch-size', type=int, default=64, metavar='N') 11 | parser.add_argument('--amp', action='store_true') 12 | args = parser.parse_args() 13 | 14 | model = create_model(args.model) 15 | model.eval() 16 | 17 | images = torch.randn([args.batch_size, 3, 224, 224]).cuda() 18 | 19 | suffix = '_torch' if args.torch else '' 20 | with torch.profiler.profile( 21 | schedule=torch.profiler.schedule( 22 | wait=1, 23 | warmup=2, 24 | active=2, 25 | repeat=1 26 | ), 27 | profile_memory=True, 28 | on_trace_ready=tensorboard_trace_handler( 29 | f'logs/profiles/{args.model}{suffix}' 30 | ), 31 | with_stack=True, 32 | record_shapes=True, 33 | with_flops=True, 34 | activities=[ 35 | torch.profiler.ProfilerActivity.CPU, 36 | torch.profiler.ProfilerActivity.CUDA 37 | ] 38 | ) as prof, tqdm(total=5) as pbar: 39 | for _ in range(5): 40 | with torch.amp.autocast(device_type='cuda',enabled=args.amp): 41 | output = model(images) 42 | 43 | prof.step() 44 | pbar.update() 45 | 46 | print('>>>>>>>> DONE!!!') 47 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.10.0 2 | torchvision>=0.11.1 3 | fvcore 4 | torchinfo 5 | tqdm 6 | nvidia-dali-cuda110>=1.7.0 -------------------------------------------------------------------------------- /resize_imagenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import shutil 4 | from cvm.utils import * 5 | import cv2 6 | from tqdm import tqdm 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='ImageNet Resizing') 11 | parser.add_argument('--src', type=str, default='/datasets/ILSVRC2012') 12 | parser.add_argument('--dst', type=str, default='/datasets/ILSVRC2012_R') 13 | parser.add_argument('--max-size', type=int, default=256) 14 | 15 | return parser.parse_args() 16 | 17 | 18 | if __name__ == '__main__': 19 | args = parse_args() 20 | print(args) 21 | 22 | dirs = os.listdir(os.path.join(args.src, 'train')) 23 | dirs.sort() 24 | for i, cls in enumerate(dirs): 25 | files = os.listdir(os.path.join(args.src, 'train', cls)) 26 | 27 | if not os.path.exists(os.path.join(args.dst, 'train', cls)): 28 | os.makedirs(os.path.join(args.dst, 'train', cls)) 29 | 30 | for f in tqdm(files, desc=f'Resizing [{i:>3}/{len(dirs)}]', unit='images', leave=False, ascii=True): 31 | src_file, dst_file = os.path.join(args.src, 'train', cls, f), os.path.join(args.dst, 'train', cls, f) 32 | image = cv2.imread(src_file) 33 | 34 | if min(image.shape[0], image.shape[1]) <= args.max_size: 35 | shutil.copyfile(src_file, dst_file) 36 | else: 37 | if image.shape[0] < image.shape[1]: 38 | size = (int((image.shape[1] / image.shape[0]) * args.max_size), args.max_size) 39 | else: 40 | size = (args.max_size, int((image.shape[0] / image.shape[1]) * args.max_size)) 41 | 42 | image = cv2.resize(image, size, interpolation=cv2.INTER_AREA) 43 | cv2.imwrite(dst_file, image) 44 | -------------------------------------------------------------------------------- /results.md: -------------------------------------------------------------------------------- 1 | ## Classification 2 | 3 | ### ImageNet-1K 4 | 5 | | Model | Year | Params | FLOPS | Loader | Resize | Crop | Top@1 | Top@5 | Real Top@1 | Real Top@5 | 6 | | -------------------------- | ---- | ------: | ------: | -----: | :----: | :---: | :----: | :----: | :--------: | :--------: | 7 | | ResNet18 | 2015 | 11.69M | 1.819G | DALI | 256 | 224 | 71.060 | 89.922 | | | 8 | | MobileNet V1 $\times$ 0.35 | 2017 | 0.766M | 0.079G | DALI | 232 | 224 | 58.842 | 81.974 | | | 9 | | MobileNet V1 $\times$ 0.5 | 2017 | 1.332M | 0.155G | DALI | 232 | 224 | 65.126 | 86.190 | | | 10 | | MobileNet V1 $\times$ 0.75 | 2017 | 2.586M | 0.333G | DALI | 232 | 224 | 69.688 | 89.474 | | | 11 | | MobileNet V1 $\times$ 1.0 | 2017 | 4.232M | 0.579G | DALI | 232 | 224 | 73.264 | 91.222 | 80.357 | 94.310 | 12 | | MobileNet V2 $\times$ 0.35 | 2018 | 1.677M | 0.065G | DALI | 232 | 224 | 58.804 | 81.160 | | | 13 | | MobileNet V2 $\times$ 0.5 | 2018 | 1.969M | 0.104G | DALI | 232 | 224 | 63.458 | 84.890 | | | 14 | | MobileNet V2 $\times$ 0.75 | 2018 | 2.636M | 0.221G | DALI | 232 | 224 | 68.448 | 88.298 | | | 15 | | MobileNet V2 $\times$ 1.0 | 2018 | 3.505M | 0.314G | DALI | 232 | 224 | 72.154 | 90.736 | | | 16 | | ShuffleNet V2 $\times$ 2.0 | 2018 | 7.394M | 0.591G | DALI | 232 | 224 | 74.368 | 92.050 | | | 17 | | ViT-B/32 | 2020 | 88.224M | 4.414G | DALI | 232 | 224 | 75.438 | 92.264 | | | 18 | | ViT-B/16 | 2020 | 86.568M | 17.583G | DALI | 232 | 224 | 80.972 | 95.290 | | | 19 | | VGNetG $\times$ 1.0MP | 2022 | 1.000M | 0.144G | DALI | 232 | 224 | 68.128 | 88.312 | | | 20 | | VGNetG $\times$ 1.0MP+SE | 2022 | 1.146M | 0.145G | DALI | 232 | 224 | 70.122 | 89.524 | | | 21 | | VGNetG $\times$ 1.5MP | 2022 | 1.506M | 0.191G | DALI | 232 | 224 | 70.494 | 89.684 | | | 22 | | VGNetG $\times$ 1.5MP+SE | 2022 | 1.706M | 0.192G | DALI | 232 | 224 | 72.422 | 90.664 | | | 23 | | VGNetG $\times$ 2.0MP | 2022 | 2.01M | 0.304G | DALI | 232 | 224 | 72.314 | 90.730 | | | 24 | | VGNetG $\times$ 2.0MP+SE | 2022 | 2.349M | 0.306G | DALI | 232 | 224 | 74.324 | 91.788 | | | 25 | | VGNetG $\times$ 2.5MP | 2022 | 2.497M | 0.403G | DALI | 232 | 224 | 73.740 | 91.516 | | | 26 | | VGNetG $\times$ 2.5MP+SE | 2022 | 2.927M | 0.405G | DALI | 232 | 224 | 75.590 | 92.568 | | | 27 | | RegNetX-400MF | 2020 | 5.496M | 0.420G | DALI | 256 | 224 | 73.156 | 91.320 | | | 28 | | ConvNeXt-Tiny | 2022 | 28.589M | 4.470G | DALI | 236 | 224 | 82.428 | 96.132 | | | 29 | | ConvNeXt-Small | 2022 | 50.224M | 8.705G | DALI | 230 | 224 | 83.544 | 96.640 | | | 30 | | ConvNeXt-Base | 2022 | 88.591M | 15.384G | DALI | 232 | 224 | 83.936 | 96.886 | | | 31 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | 5 | 6 | def run_script(script: str, args: str = ''): 7 | cmd = f'torchrun --standalone --nnodes=1 --nproc_per_node={torch.cuda.device_count()} {script} {args}' 8 | print(f'\n====\n > {cmd}\n====\n') 9 | os.system(cmd) 10 | time.sleep(1) 11 | 12 | 13 | if __name__ == '__main__': 14 | # ImageNet-1K 15 | imagenet = f'train.py '\ 16 | '--data-dir "/datasets/ILSVRC2012" '\ 17 | '--crop-size 192 --val-resize-size 232 --val-crop-size 224 ' \ 18 | '--workers 16 '\ 19 | '--amp '\ 20 | '--dali --dali-cpu '\ 21 | '--lr 0.2 --lr-sched cosine --momentum 0.9 --wd 0.0001 --no-bias-bn-wd '\ 22 | '--batch-size 512 '\ 23 | '--epochs 100 --warmup-epochs 5 '\ 24 | '--print-freq 250 ' \ 25 | '--label-smoothing 0.1 ' 26 | # '--mixup-alpha 0.8 --cutmix-alpha 1.0 ' \ 27 | # '--color-jitter 0.4 --random-erasing 0.25 '\ 28 | # '--augment rand-m9-mstd0.5 '\ 29 | # '--model-ema --model-ema-decay 0.9999 ' 30 | 31 | # ImageNet-398 32 | tiny_imagenet = f'train.py '\ 33 | '--data-dir "/datasets/TINY_ILSVRC2012" '\ 34 | '--crop-size 176 --val-resize-size 232 --val-crop-size 224 --num-classes 398 ' \ 35 | '--workers 8 '\ 36 | '--amp '\ 37 | '--lr 0.4 --lr-sched cosine --momentum 0.9 --no-bias-bn-wd '\ 38 | '--batch-size 1024 '\ 39 | '--warmup-epochs 5 '\ 40 | '--print-freq 90 ' \ 41 | '--label-smoothing 0.1 ' 42 | 43 | mnist = f'train.py '\ 44 | '--dataset MNIST --data-dir "/datasets/MNIST" --in-channels 1 --hflip 0.0 '\ 45 | '--crop-size 28 --val-resize-size 28 --val-crop-size 28 --crop-padding 4 --num-classes 10 ' \ 46 | '--workers 8 '\ 47 | '--amp '\ 48 | '--lr 0.4 --lr-sched cosine --momentum 0.9 --wd 0.001 --no-bias-bn-wd '\ 49 | '--batch-size 2048 --epochs 30 '\ 50 | '--warmup-epochs 3 '\ 51 | '--print-freq 10 ' \ 52 | '--label-smoothing 0.1 ' 53 | 54 | # CIFAR10/100 55 | cifar = f'train.py '\ 56 | '--dataset CIFAR100 --data-dir "/datasets/CIFAR100" '\ 57 | '--crop-size 32 --val-resize-size 32 --val-crop-size 32 ' \ 58 | '--workers 8 '\ 59 | '--amp '\ 60 | '--lr 0.4 --lr-sched cosine --momentum 0.9 --wd 0.0005 --no-bias-bn-wd '\ 61 | '--batch-size 1024 '\ 62 | '--epochs 100 --warmup-epochs 5 '\ 63 | '--print-freq 15 ' \ 64 | '--label-smoothing 0.1 '\ 65 | '--random-erasing 0.25 --dropout-rate 0.25 --augment torch/autoaug-cifar10 ' 66 | 67 | # VOC segmentation 68 | voc = f'train_seg.py '\ 69 | '--dataset VOCSegmentation --data-dir "/datasets/PASCAL_VOC" '\ 70 | '--workers 8 '\ 71 | '--amp '\ 72 | '--lr 0.01 --lr-sched cosine --momentum 0.9 --wd 0.0001 --no-bias-bn-wd '\ 73 | '--batch-size 16 '\ 74 | '--print-freq 30 ' \ 75 | '--epochs 100 --aux-loss' 76 | 77 | run_script(imagenet, '--model mobilenet_v1_x1_0') 78 | # run_script(voc, '--pretrained-backbone --model seg/fcn_regnet_x_400mf') 79 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from codecs import open 3 | from setuptools import find_packages, setup 4 | 5 | exec(open('cvm/version.py').read()) 6 | setup( 7 | name='cvm', 8 | version=__version__, 9 | description='Computer Vision Models', 10 | url='https://github.com/ffiirree/cv-models', 11 | author='Liangqi Zhang', 12 | author_email='zhliangqi@gmail.com', 13 | python_requires='>=3.8', 14 | install_requires=[ 15 | 'torch >= 1.12', 16 | 'torchvision', 17 | 'fvcore', 18 | 'torchinfo', 19 | 'tqdm', 20 | 'pycocotools', 21 | 'nvidia-dali-cuda110 >= 1.16' 22 | ], 23 | packages=find_packages(exclude=['tests']) 24 | ) -------------------------------------------------------------------------------- /tests/test_blocks.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import pytest 3 | import torch 4 | import torch.nn as nn 5 | from cvm.models.ops import blocks 6 | 7 | 8 | def test_se_block_forward(): 9 | inputs = torch.randn(16, 3, 56, 56) 10 | 11 | se = blocks.SEBlock(3, 0.25) 12 | 13 | outputs = se(inputs) 14 | assert outputs.shape == inputs.shape 15 | assert isinstance(se.act, nn.ReLU) 16 | assert isinstance(se.gate, nn.Sigmoid) 17 | 18 | 19 | def test_se_block_decorator(): 20 | with blocks.se(inner_nonlinear=nn.SiLU, gating_fn=nn.Hardsigmoid): 21 | se = blocks.SEBlock(3, 0.25) 22 | 23 | assert isinstance(se.act, nn.SiLU) 24 | assert isinstance(se.gate, nn.Hardsigmoid) 25 | 26 | 27 | def test_normalizer_decorator(): 28 | with blocks.normalizer(None): 29 | layers = blocks.norm_activation(3) 30 | 31 | assert len(layers) == 1 32 | assert isinstance(layers[0], nn.ReLU) 33 | 34 | with blocks.normalizer(nn.LayerNorm, position='before'): 35 | layers = blocks.norm_activation(3) 36 | 37 | assert len(layers) == 2 38 | assert isinstance(layers[0], nn.LayerNorm) 39 | assert isinstance(layers[1], nn.ReLU) 40 | 41 | with blocks.normalizer(partial(nn.BatchNorm2d, eps=0.1), position='after'): 42 | layers = blocks.norm_activation(3) 43 | 44 | assert len(layers) == 2 45 | assert isinstance(layers[0], nn.ReLU) 46 | assert isinstance(layers[1], nn.BatchNorm2d) 47 | assert layers[1].eps == 0.1 48 | 49 | 50 | def test_nonlinear_decorator(): 51 | with blocks.nonlinear(None): 52 | layers = blocks.norm_activation(3) 53 | 54 | assert len(layers) == 1 55 | assert isinstance(layers[0], nn.BatchNorm2d) 56 | 57 | with blocks.nonlinear(nn.SiLU): 58 | layers = blocks.norm_activation(3) 59 | 60 | assert len(layers) == 2 61 | assert isinstance(layers[0], nn.BatchNorm2d) 62 | assert isinstance(layers[1], nn.SiLU) 63 | -------------------------------------------------------------------------------- /tests/test_models.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import torch 3 | from cvm.models.core import SegmentationModel 4 | from cvm.utils import list_models, create_model 5 | 6 | 7 | @pytest.mark.parametrize('name', list_models('cvm')) 8 | def test_model_forward(name): 9 | model = create_model( 10 | name, 11 | dropout_rate=0., 12 | drop_path_rate=0., 13 | num_classes=10, 14 | cuda=False 15 | ) 16 | 17 | model.eval() 18 | 19 | inputs = torch.randn((1, 3, 224, 224)) 20 | outputs = model(inputs) 21 | 22 | if name in ['unet', 'vae', 'dcgan']: 23 | ... 24 | elif isinstance(model, SegmentationModel): 25 | assert outputs[0].shape == torch.Size([1, 10, 224, 224]) 26 | assert not torch.isnan(outputs[0]).any(), 'Output included NaNs' 27 | else: 28 | assert outputs.shape == torch.Size([1, 10]) 29 | assert not torch.isnan(outputs).any(), 'Output included NaNs' 30 | -------------------------------------------------------------------------------- /validate.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import torch 4 | from tqdm import tqdm 5 | from cvm.utils import accuracy, AverageMeter, create_loader, create_model, list_models, list_datasets 6 | from cvm.data import ImageNet1KRealLabelsEvaluator 7 | from cvm.models.ops.functional import * 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') 12 | parser.add_argument('--dataset', type=str, default='ImageNet', choices=list_datasets() + ['ImageNet'], 13 | help='path to the ImageNet dataset.') 14 | parser.add_argument('--data-dir', type=str, default='/datasets/ILSVRC2012', 15 | help='path to the ImageNet dataset.') 16 | parser.add_argument('--model', '-m', type=str, default='mobilenet_v1_x1_0', choices=list_models(), 17 | help='type of model to use. (default: mobilenet_v1_x1_0)') 18 | parser.add_argument('--real-labels', type=str, default=None) 19 | parser.add_argument('--model-path', type=str, default=None) 20 | parser.add_argument('--model-weights', type=str, default='DEFAULT') 21 | parser.add_argument('--workers', '-j', type=int, default=8, metavar='N', 22 | help='number of data loading workers pre GPU. (default: 4)') 23 | parser.add_argument('--batch-size', type=int, default=256, metavar='N', 24 | help='mini-batch size, this is the total batch size of all GPUs. (default: 256)') 25 | parser.add_argument('--num-classes', type=int, default=1000, metavar='N', 26 | help='number of label classes') 27 | parser.add_argument('--in-channels', type=int, default=3, metavar='N') 28 | parser.add_argument('--crop-size', type=int, default=224) 29 | parser.add_argument('--resize-size', type=int, default=232) 30 | parser.add_argument('--dali', action='store_true', help='use nvidia dali.') 31 | parser.add_argument('--dali-cpu', action='store_true', 32 | help='runs CPU based version of DALI pipeline. (default: false)') 33 | parser.add_argument('--bandpass', type=int, nargs='+', default=None) 34 | parser.add_argument('--bandreject', type=int, nargs='+', default=None) 35 | parser.add_argument('--filter-type', type=str, default="ideal", choices=['ideal', 'gaussian']) 36 | return parser.parse_args() 37 | 38 | 39 | def validate(val_loader, model, real_evaluator, args): 40 | top1 = AverageMeter() 41 | top5 = AverageMeter() 42 | 43 | mask = get_distance_grid(args.crop_size) 44 | 45 | model.eval() 46 | for (images, target) in tqdm(val_loader, desc='validating', unit='batch'): 47 | if args.bandpass is not None: 48 | assert len(args.bandpass) == 2, '--bandpass : [min, max]' 49 | if args.filter_type == 'ideal': 50 | kernel = (mask < args.bandpass[0]) | (mask > args.bandpass[1]) 51 | images = spectral_filter(images, lambda fr: torch.masked_fill(fr, kernel.to(fr.device), 0.0)) 52 | elif args.filter_type == 'gaussian': 53 | kernel = get_gaussian_bandpass_kernel2d( 54 | images.size()[-1], 55 | (args.bandpass[0] + args.bandpass[1]) / 2, 56 | args.bandpass[1] - args.bandpass[0] 57 | ) 58 | images = spectral_filter(images, lambda fr: fr * kernel.to(fr.device)) 59 | 60 | if args.bandreject is not None: 61 | assert len(args.bandreject) == 2, '--bandreject : [min, max]' 62 | if args.filter_type == 'ideal': 63 | kernel = (mask > args.bandreject[0]) & (mask < args.bandreject[1]) 64 | images = spectral_filter(images, lambda fr: lambda fr: torch.masked_fill(fr, kernel.to(fr.device), 0.0)) 65 | elif args.filter_type == 'gaussian': 66 | kernel = get_gaussian_bandpass_kernel2d( 67 | images.size()[-1], 68 | (args.bandpass[0] + args.bandpass[1]) / 2, 69 | args.bandpass[1] - args.bandpass[0] 70 | ) 71 | images = spectral_filter(images, lambda fr: fr * (1.0 - kernel.to(fr.devcie))) 72 | 73 | with torch.inference_mode(): 74 | output = model(images) 75 | 76 | if real_evaluator: 77 | real_evaluator.put(output) 78 | 79 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 80 | 81 | top1.update(acc1.item(), images.size(0)) 82 | top5.update(acc5.item(), images.size(0)) 83 | 84 | print( 85 | f' ================\n - top1: {top1.avg:6.3f}\n - top5: {top5.avg:6.3f}\n ================' 86 | ) 87 | if real_evaluator: 88 | print( 89 | f'Real Labels: \n ================\n - top1: {real_evaluator.accuracy[1]:6.3f}\n - top5: {real_evaluator.accuracy[5]:6.3f}\n ================' 90 | ) 91 | 92 | 93 | if __name__ == '__main__': 94 | assert torch.cuda.is_available(), 'CUDA IS NOT AVAILABLE!!' 95 | torch.backends.cudnn.benchmark = True 96 | 97 | args = parse_args() 98 | print(json.dumps(vars(args), indent=4)) 99 | 100 | assert not (args.real_labels and args.dali), '' 101 | 102 | model = create_model( 103 | args.model, 104 | in_channels=args.in_channels, 105 | num_classes=args.num_classes, 106 | thumbnail=(args.crop_size < 128), 107 | pretrained=True, 108 | pth=args.model_path, 109 | weights=args.model_weights 110 | ) 111 | 112 | val_loader = create_loader( 113 | args.dataset, 114 | root=args.data_dir, 115 | is_training=False, 116 | batch_size=args.batch_size, 117 | val_resize_size=args.resize_size, 118 | val_crop_size=args.crop_size, 119 | workers=args.workers, 120 | dali=args.dali, 121 | dali_cpu=args.dali_cpu 122 | ) 123 | 124 | real_evaluator = ImageNet1KRealLabelsEvaluator( 125 | val_loader.dataset.samples, 126 | args.real_labels 127 | ) if args.real_labels else None 128 | 129 | validate(val_loader, model, real_evaluator, args) 130 | --------------------------------------------------------------------------------