├── .gitignore
├── LICENSE
├── README.md
├── adversarial_attack.py
├── benchmark.py
├── convert.py
├── cvm
    ├── __init__.py
    ├── attacks
    │   ├── __init__.py
    │   ├── attack.py
    │   ├── fgsm.py
    │   └── pgd.py
    ├── data
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── imagenet_1k.py
    │   ├── imagenet_1k_real_labels.py
    │   ├── loader.py
    │   └── samplers.py
    ├── loss
    │   ├── __init__.py
    │   └── soft_label_cross_entropy_loss.py
    ├── models
    │   ├── __init__.py
    │   ├── alexnet.py
    │   ├── convmixer.py
    │   ├── convnext.py
    │   ├── densenet.py
    │   ├── det
    │   │   ├── __init__.py
    │   │   └── yolov1.py
    │   ├── efficientnet.py
    │   ├── efficientnetv2.py
    │   ├── gan
    │   │   ├── __init__.py
    │   │   └── dcgan.py
    │   ├── ghostnet.py
    │   ├── googlenet.py
    │   ├── gssdnet.py
    │   ├── inception_v3.py
    │   ├── inception_v4.py
    │   ├── mlp_mixer.py
    │   ├── mnasnet.py
    │   ├── mobilenet.py
    │   ├── mobilenetv2.py
    │   ├── mobilenetv3.py
    │   ├── ops
    │   │   ├── __init__.py
    │   │   ├── blocks
    │   │   │   ├── __init__.py
    │   │   │   ├── adder.py
    │   │   │   ├── affine.py
    │   │   │   ├── aspp.py
    │   │   │   ├── bottleneck.py
    │   │   │   ├── cbam.py
    │   │   │   ├── channel.py
    │   │   │   ├── depthwise_separable_conv2d.py
    │   │   │   ├── drop.py
    │   │   │   ├── efficient_channel_attention.py
    │   │   │   ├── factory.py
    │   │   │   ├── gather_excite.py
    │   │   │   ├── gaussian_blur.py
    │   │   │   ├── global_context.py
    │   │   │   ├── inception.py
    │   │   │   ├── interpolate.py
    │   │   │   ├── inverted_residual_block.py
    │   │   │   ├── mlp.py
    │   │   │   ├── non_local.py
    │   │   │   ├── norm.py
    │   │   │   ├── selective_kernel.py
    │   │   │   ├── squeeze_excite.py
    │   │   │   ├── stage.py
    │   │   │   └── vanilla_conv2d.py
    │   │   └── functional.py
    │   ├── regnet.py
    │   ├── resmlp.py
    │   ├── resnet.py
    │   ├── rexnet.py
    │   ├── seg
    │   │   ├── __init__.py
    │   │   ├── deeplabv3.py
    │   │   ├── deeplabv3_plus.py
    │   │   ├── fcn.py
    │   │   ├── heads.py
    │   │   ├── segmentation_model.py
    │   │   └── unet.py
    │   ├── shufflenet.py
    │   ├── shufflenetv2.py
    │   ├── squeezenet.py
    │   ├── utils.py
    │   ├── vae
    │   │   ├── __init__.py
    │   │   ├── cvae.py
    │   │   └── vae.py
    │   ├── vggnet.py
    │   ├── vgnet.py
    │   ├── vision_transformer.py
    │   └── xception.py
    ├── scheduler
    │   ├── __init__.py
    │   ├── cosine_lr.py
    │   └── step_lr.py
    ├── utils
    │   ├── __init__.py
    │   ├── augment.py
    │   ├── coco.py
    │   ├── ema.py
    │   ├── factory.py
    │   ├── logger.py
    │   ├── metrics.py
    │   ├── seg_transforms.py
    │   └── utils.py
    └── version.py
├── flops.py
├── info.py
├── profiler.py
├── real_labels.json
├── requirements.txt
├── resize_imagenet.py
├── results.md
├── run.py
├── setup.py
├── tests
    ├── test_blocks.py
    └── test_models.py
├── train.py
├── train_gan.py
├── train_seg.py
├── train_vae.py
└── validate.py


/.gitignore:
--------------------------------------------------------------------------------
1 | logs/
2 | __pycache__/
3 | .vscode
4 | test.py
5 | build/
6 | *.egg-info/
7 | *.pth
8 | images/
9 | out/


--------------------------------------------------------------------------------
/adversarial_attack.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import argparse
  3 | import torch
  4 | 
  5 | from tqdm import tqdm
  6 | 
  7 | from cvm.utils import *
  8 | from cvm.attacks import *
  9 | 
 10 | 
 11 | def parse_args():
 12 |     parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation')
 13 |     parser.add_argument('--dataset', type=str, default='ImageNet', choices=list_datasets() + ['ImageNet'],
 14 |                         help='path to the ImageNet dataset.')
 15 |     parser.add_argument('--data-dir', type=str, default='/datasets/ILSVRC2012',
 16 |                         help='path to the ImageNet dataset.')
 17 |     parser.add_argument('--model', '-m', type=str, default='mobilenet_v1_x1_0', choices=list_models(),
 18 |                         help='type of model to use. (default: mobilenet_v1_x1_0)')
 19 |     parser.add_argument('--num-classes', type=int, default=1000, metavar='N',
 20 |                         help='number of label classes')
 21 |     parser.add_argument('--in-channels', type=int, default=3, metavar='N')
 22 |     parser.add_argument('--model-path', type=str, default=None)
 23 |     parser.add_argument('--model-weights', type=str, default='DEFAULT')
 24 |     parser.add_argument('--workers', '-j', type=int, default=8, metavar='N',
 25 |                         help='number of data loading workers pre GPU. (default: 3)')
 26 |     parser.add_argument('--batch-size', type=int, default=256, metavar='N',
 27 |                         help='mini-batch size, this is the total batch size of all GPUs. (default: 256)')
 28 |     parser.add_argument('--crop-size', type=int, default=224)
 29 |     parser.add_argument('--resize-size', type=int, default=232)
 30 |     parser.add_argument('--dali', action='store_true', help='use nvidia dali.')
 31 |     parser.add_argument('--dali-cpu', action='store_true',
 32 |                         help='runs CPU based version of DALI pipeline. (default: false)')
 33 |     parser.add_argument('--method', type=str, default='PGD', choices=['FGSM', 'PGD'])
 34 |     parser.add_argument('--attack-eps', type=float, default=4/255, metavar='E')
 35 |     parser.add_argument('--attack-steps', type=int, default=2, metavar='N')
 36 |     parser.add_argument('--attack-alpha', type=float, default=2/255, metavar='A')
 37 |     parser.add_argument('--attack-target', type=int, default=-1, metavar='T')
 38 |     return parser.parse_args()
 39 | 
 40 | 
 41 | if __name__ == '__main__':
 42 |     assert torch.cuda.is_available(), 'CUDA IS NOT AVAILABLE!!'
 43 |     torch.backends.cudnn.benchmark = True
 44 | 
 45 |     args = parse_args()
 46 |     init_distributed_mode(args)
 47 | 
 48 |     if args.local_rank == 0:
 49 |         print(json.dumps(vars(args), indent=4))
 50 | 
 51 |     model = create_model(
 52 |         args.model,
 53 |         pretrained=True,
 54 |         thumbnail=(args.crop_size < 128),
 55 |         pth=args.model_path,
 56 |         weights=args.model_weights,
 57 |         distributed=args.distributed,
 58 |         local_rank=args.local_rank,
 59 |         in_channels=args.in_channels,
 60 |         num_classes=args.num_classes
 61 |     )
 62 | 
 63 |     val_loader = create_loader(
 64 |         args.dataset,
 65 |         root=args.data_dir,
 66 |         is_training=False,
 67 |         batch_size=args.batch_size,
 68 |         val_resize_size=args.resize_size,
 69 |         val_crop_size=args.crop_size,
 70 |         crop_size=args.crop_size,
 71 |         workers=args.workers,
 72 |         dali=args.dali,
 73 |         dali_cpu=args.dali_cpu,
 74 |         distributed=args.distributed,
 75 |         local_rank=args.local_rank
 76 |     )
 77 | 
 78 |     if args.local_rank == 0:
 79 |         if val_loader.type != "dali":
 80 |             print(f'Validation: \n{val_loader.dataset.transform}')
 81 | 
 82 |     attacker = None
 83 |     if args.method == 'FGSM':
 84 |         attacker = FGSM(model, args.attack_eps)
 85 |     elif args.method == 'PGD':
 86 |         attacker = PGD(model, args.attack_eps, args.attack_steps, args.attack_alpha)
 87 |     else:
 88 |         raise ValueError(f'Invalid attacker: {args.method}.')
 89 | 
 90 |     attacker.set_nomarlized(get_dataset_mean(args.dataset), get_dataset_std(args.dataset))
 91 | 
 92 |     if args.local_rank == 0:
 93 |         print(f'Attacker: {attacker}')
 94 | 
 95 |     top1 = AverageMeter()
 96 |     top5 = AverageMeter()
 97 |     model.eval()
 98 |     for (images, target) in tqdm(val_loader, desc='validating', unit='batch'):
 99 | 
100 |         if args.attack_target >= 0:
101 |             target.fill_(args.attack_target)
102 | 
103 |         images = attacker.perturb(images, target, args.attack_target >= 0)
104 | 
105 |         with torch.inference_mode():
106 |             output = model(images)
107 | 
108 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
109 | 
110 |         top1.update(acc1.item(), images.size(0))
111 |         top5.update(acc5.item(), images.size(0))
112 | 
113 |     acc = f'\n -- top1={top1.avg:6.3f}, top5={top5.avg:6.3f}\n'
114 |     if args.local_rank == 0:
115 |         print(acc)
116 | 


--------------------------------------------------------------------------------
/benchmark.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | import time
 4 | from cvm.utils import create_model
 5 | 
 6 | 
 7 | class InferenceBenchmarkRunner():
 8 |     def __init__(self, model, input, device='cuda', amp=False) -> None:
 9 |         self.model = model
10 |         self.input = input
11 |         self.device = device
12 |         self.amp = amp
13 | 
14 |         self.model = model.to(self.device)
15 |         self.model.eval()
16 |         self.input = input.to(self.device)
17 | 
18 |     def timestamp(self, sync=False):
19 |         if sync and self.device == 'cuda':
20 |             torch.cuda.synchronize(device=self.device)
21 | 
22 |         return time.perf_counter()
23 | 
24 |     def infer(self):
25 |         start = self.timestamp()
26 |         with torch.amp.autocast(device_type='cuda', enabled=self.amp):
27 |             output = self.model(self.input)
28 |         end = self.timestamp(True)
29 |         return end - start
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
34 |     parser.add_argument('--model', '-m', type=str)
35 |     parser.add_argument('--batch-size', type=int, default=16)
36 |     parser.add_argument('--amp', action='store_true')
37 |     parser.add_argument('--device', type=str, default='cuda')
38 | 
39 |     args = parser.parse_args()
40 |     print(args)
41 | 
42 |     model = create_model(args.model)
43 | 
44 |     input = torch.randn(args.batch_size, 3, 224, 224)
45 | 
46 |     runner = InferenceBenchmarkRunner(model, input, args.device, args.amp)
47 | 
48 |     with torch.no_grad():
49 |         for _ in range(50):
50 |             runner.infer()
51 | 
52 |         total_step = 0
53 |         run_start = runner.timestamp()
54 |         for i in range(50):
55 |             delta_fwd = runner.infer()
56 |             total_step += delta_fwd
57 | 
58 |         run_end = runner.timestamp(True)
59 |         run_elapsed = run_end - run_start
60 |         print(f'Inference benchmark: {round(50 / run_elapsed, 2):.2f} batches/s, {round(1000 * total_step / 50, 2)} ms')
61 | 


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import argparse
 3 | import shutil
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser()
 7 |     parser.add_argument('--input', '-i', type=str)
 8 |     parser.add_argument('--output', '-o', type=str)
 9 |     args = parser.parse_args()
10 | 
11 |     with open(args.input, 'rb') as f:
12 |         sha_hash = hashlib.sha256(f.read()).hexdigest()
13 | 
14 |     final_filename = f'logs/{args.output}-{sha_hash[:8]}.pth'
15 |     shutil.copy(args.input, final_filename)
16 |     print(f'Saved: {final_filename}')
17 | 


--------------------------------------------------------------------------------
/cvm/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__
2 | 
3 | from cvm import models
4 | from cvm import utils
5 | from cvm import loss
6 | from cvm import scheduler
7 | from cvm import data
8 | 


--------------------------------------------------------------------------------
/cvm/attacks/__init__.py:
--------------------------------------------------------------------------------
1 | from .fgsm import FGSM
2 | from .pgd import PGD
3 | 
4 | __all__ = ['FGSM', 'PGD']


--------------------------------------------------------------------------------
/cvm/attacks/attack.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import torch
 3 | 
 4 | from typing import Callable
 5 | 
 6 | 
 7 | class Attacker(abc.ABC):
 8 |     def __init__(self, model, epsilon: float = 0.03, mean=None, std=None):
 9 |         super().__init__()
10 | 
11 |         self.model = model
12 |         self.model.eval()
13 | 
14 |         self.epsilon = epsilon
15 | 
16 |         self.mean = None
17 |         self.std = None
18 | 
19 |         self.normalized = None  # None, False, True
20 | 
21 |     def set_nomarlized(self, mean, std):
22 |         self.mean = mean
23 |         self.std = std
24 | 
25 |         self.normalized = True
26 | 
27 |     def normalize(self, x: torch.Tensor):
28 |         mean = torch.as_tensor(self.mean, dtype=x.dtype, device=x.device)
29 |         std = torch.as_tensor(self.std, dtype=x.dtype, device=x.device)
30 | 
31 |         if mean.ndim == 1:
32 |             mean = mean.view(-1, 1, 1)
33 |         if std.ndim == 1:
34 |             std = std.view(-1, 1, 1)
35 | 
36 |         return (x - mean) / std
37 | 
38 |     def inverse_normalize(self, x: torch.Tensor):
39 |         mean = torch.as_tensor(self.mean, dtype=x.dtype, device=x.device)
40 |         std = torch.as_tensor(self.std, dtype=x.dtype, device=x.device)
41 | 
42 |         if mean.ndim == 1:
43 |             mean = mean.view(-1, 1, 1)
44 |         if std.ndim == 1:
45 |             std = std.view(-1, 1, 1)
46 | 
47 |         return x * std + mean
48 | 
49 |     def prepare_inputs(self, x):
50 |         if self.normalized is True:
51 |             x = self.inverse_normalize(x)
52 |             self.normalized = False
53 | 
54 |         x.requires_grad_(True)
55 |         return x
56 | 
57 |     def unprepare_inputs(self, x):
58 |         if self.normalized is False:
59 |             x = self.normalize(x)
60 |             self.normalized = True
61 | 
62 |         return x
63 | 
64 |     def forward(self, x):
65 |         if self.normalized is False:
66 |             x = self.normalize(x)
67 | 
68 |         return self.model(x)
69 | 
70 |     perturb: Callable
71 | 


--------------------------------------------------------------------------------
/cvm/attacks/fgsm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from .attack import Attacker
 4 | 
 5 | 
 6 | class FGSM(Attacker):
 7 |     r"""
 8 |     'Explaining and Harnessing Adversarial Examples', https://arxiv.org/abs/1412.6572
 9 |     """
10 | 
11 |     def __init__(self, model, epsilon: float = 6/255):
12 |         super().__init__(model, epsilon=epsilon)
13 | 
14 |     def perturb(self, images: torch.Tensor, labels: torch.Tensor = None, targeted: bool = False):
15 |         images_adv = images.clone().detach()
16 | 
17 |         images_adv = self.prepare_inputs(images_adv)
18 | 
19 |         loss = F.cross_entropy(self.forward(images_adv), labels)
20 |         grad = torch.autograd.grad(loss, images_adv)[0]
21 | 
22 |         eta = self.epsilon * torch.sign(grad)
23 | 
24 |         if not targeted:
25 |             images_adv = (images_adv + eta).detach()
26 |         else:
27 |             images_adv = (images_adv - eta).detach()
28 | 
29 |         images_adv = torch.clamp(images_adv, min=0, max=1.0)
30 | 
31 |         return self.unprepare_inputs(images_adv)
32 | 
33 |     def __repr__(self) -> str:
34 |         return f'FGSM(eps={self.epsilon:>6.4f}({self.epsilon * 255.0:>.1f}/255.0), normalized={self.normalized}, mean={self.mean}, std={self.std})'
35 | 


--------------------------------------------------------------------------------
/cvm/attacks/pgd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from .attack import Attacker
 4 | 
 5 | 
 6 | class PGD(Attacker):
 7 |     r"""
 8 |     'Towards Deep Learning Models Resistant to Adversarial Attacks', https://arxiv.org/abs/1706.06083
 9 |     """
10 | 
11 |     def __init__(self, model, epsilon: float = 6/255, steps: int = 3, alpha: float = 2/255):
12 |         super().__init__(model, epsilon=epsilon)
13 | 
14 |         self.steps = steps
15 |         self.alpha = alpha
16 | 
17 |     def perturb(self, images: torch.Tensor, labels: torch.Tensor = None, targeted: bool = False):
18 |         images_adv = images.detach().clone()
19 | 
20 |         images_adv = self.prepare_inputs(images_adv)
21 |         images_nat = images_adv.clone().detach()
22 | 
23 |         for _ in range(self.steps):
24 |             images_adv.requires_grad_(True)
25 | 
26 |             loss = F.cross_entropy(self.forward(images_adv), labels)
27 |             grad = torch.autograd.grad(loss, images_adv)[0]
28 | 
29 |             eta = self.alpha * torch.sign(grad)
30 | 
31 |             if not targeted:
32 |                 images_adv = (images_adv + eta).detach()
33 |             else:
34 |                 images_adv = (images_adv - eta).detach()
35 | 
36 |             images_adv = torch.clamp(images_adv, images_nat - self.epsilon, images_nat + self.epsilon)
37 |             images_adv = torch.clamp(images_adv, min=0, max=1.0)
38 | 
39 |         return self.unprepare_inputs(images_adv)
40 | 
41 |     def __repr__(self) -> str:
42 |         return f'PGD(eps={self.epsilon:>6.4f}({self.epsilon * 255.0:>.1f}/255.0), steps={self.steps}, alpha={self.alpha:>6.4f}({self.alpha * 255.0:>.1f}/255.0), normalized={self.normalized}, mean={self.mean}, std={self.std})'
43 | 


--------------------------------------------------------------------------------
/cvm/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .constants import *
2 | from .imagenet_1k import *
3 | from .imagenet_1k_real_labels import *
4 | from .loader import *


--------------------------------------------------------------------------------
/cvm/data/constants.py:
--------------------------------------------------------------------------------
 1 | IMAGE_MEAN = (0.485, 0.456, 0.406)
 2 | IMAGE_STD = (0.229, 0.224, 0.225)
 3 | 
 4 | CIFAR_MEAN = (0.491, 0.482, 0.446)
 5 | CIFAR_STD = (0.247, 0.243, 0.261)
 6 | 
 7 | MNIST_MEAN = (0.1307,)
 8 | MNIST_STD = (0.3081,)
 9 | 
10 | VOC_MEAN = (0.485, 0.456, 0.406)
11 | VOC_STD = (0.229, 0.224, 0.225)
12 | 
13 | 
14 | CIFAR_IMAGE_SIZE = 32
15 | MNIST_IMAGE_SIZE = 28
16 | 


--------------------------------------------------------------------------------
/cvm/data/imagenet_1k_real_labels.py:
--------------------------------------------------------------------------------
 1 | """ Real labels evaluator for ImageNet
 2 |     [1] Are we done with ImageNet?. arXiv:2006.07159
 3 | """
 4 | import os
 5 | import json
 6 | import torch
 7 | import numpy as np
 8 | 
 9 | __all__ = ['ImageNet1KRealLabelsEvaluator']
10 | 
11 | 
12 | class ImageNet1KRealLabelsEvaluator:
13 | 
14 |     def __init__(self, samples, labels_file='real_labels.json', topk=(1, 5)):
15 |         with open(labels_file) as f:
16 |             self.labels = {
17 |                 f'ILSVRC2012_val_{i + 1:08d}.JPEG': labels for i, labels in enumerate(json.load(f))
18 |             }
19 | 
20 |         assert len(samples) == len(self.labels)
21 | 
22 |         self.samples = samples
23 |         self.topk = topk
24 |         self.res = {k: [] for k in topk}
25 |         self.index = 0
26 | 
27 |     def put(self, output: torch.Tensor):
28 |         maxk = max(self.topk)
29 |         _, pred = output.topk(maxk, 1, True, True)
30 |         pred = pred.cpu().numpy()
31 | 
32 |         for topk_label in pred:
33 |             filename = os.path.basename(self.samples[self.index][0])
34 | 
35 |             if self.labels[filename]:
36 |                 for k in self.topk:
37 |                     self.res[k].append(
38 |                         any([p in self.labels[filename] for p in topk_label[:k]])
39 |                     )
40 |             self.index += 1
41 | 
42 |     @property
43 |     def accuracy(self):
44 |         return {k: float(np.mean(self.res[k])) * 100 for k in self.topk}
45 | 


--------------------------------------------------------------------------------
/cvm/data/loader.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class DataIterator:
 5 |     def __init__(
 6 |         self,
 7 |         loader,
 8 |         type: str = 'dali'
 9 |     ):
10 |         self.loader = loader
11 |         self.type = type
12 |         self._counter = 0
13 |         self.itor = self
14 | 
15 |     def __iter__(self):
16 |         self.itor = iter(self.loader)
17 |         return self
18 | 
19 |     def __next__(self):
20 |         batch = next(self.itor)
21 | 
22 |         if self.type == 'dali':
23 |             input = batch[0]["data"]
24 |             target = batch[0]["label"].squeeze(-1).long()
25 |         else:
26 |             input = batch[0].cuda(non_blocking=True)
27 |             target = batch[1].cuda(non_blocking=True)
28 | 
29 |         return input, target
30 | 
31 |     @property
32 |     def sampler(self):
33 |         return self.loader.sampler if self.type == 'torch' else None
34 | 
35 |     @property
36 |     def dataset(self):
37 |         return self.loader.dataset if self.type == 'torch' else None
38 | 
39 |     def reset(self):
40 |         self._counter += 1
41 | 
42 |         if self.type == 'dali':
43 |             self.loader.reset()
44 |         elif isinstance(self.sampler, torch.utils.data.distributed.DistributedSampler):
45 |             self.loader.sampler.set_epoch(self._counter)
46 | 
47 |     def __len__(self):
48 |         return len(self.loader)
49 | 


--------------------------------------------------------------------------------
/cvm/data/samplers.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | 
 7 | class RASampler(torch.utils.data.Sampler):
 8 |     """Sampler that restricts data loading to a subset of the dataset for distributed,
 9 |     with repeated augmentation.
10 |     It ensures that different each augmented version of a sample will be visible to a
11 |     different process (GPU).
12 |     Heavily based on 'torch.utils.data.DistributedSampler'.
13 | 
14 |     This is borrowed from the DeiT Repo:
15 |     https://github.com/facebookresearch/deit/blob/main/samplers.py
16 |     """
17 | 
18 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, seed=0, repetitions=3):
19 |         if num_replicas is None:
20 |             if not dist.is_available():
21 |                 raise RuntimeError("Requires distributed package to be available!")
22 |             num_replicas = dist.get_world_size()
23 |         if rank is None:
24 |             if not dist.is_available():
25 |                 raise RuntimeError("Requires distributed package to be available!")
26 |             rank = dist.get_rank()
27 |         self.dataset = dataset
28 |         self.num_replicas = num_replicas
29 |         self.rank = rank
30 |         self.epoch = 0
31 |         self.num_samples = int(math.ceil(len(self.dataset) * float(repetitions) / self.num_replicas))
32 |         self.total_size = self.num_samples * self.num_replicas
33 |         self.num_selected_samples = int(math.floor(len(self.dataset) // 256 * 256 / self.num_replicas))
34 |         self.shuffle = shuffle
35 |         self.seed = seed
36 |         self.repetitions = repetitions
37 | 
38 |     def __iter__(self):
39 |         if self.shuffle:
40 |             # Deterministically shuffle based on epoch
41 |             g = torch.Generator()
42 |             g.manual_seed(self.seed + self.epoch)
43 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
44 |         else:
45 |             indices = list(range(len(self.dataset)))
46 | 
47 |         # Add extra samples to make it evenly divisible
48 |         indices = [ele for ele in indices for i in range(self.repetitions)]
49 |         indices += indices[: (self.total_size - len(indices))]
50 |         assert len(indices) == self.total_size
51 | 
52 |         # Subsample
53 |         indices = indices[self.rank : self.total_size : self.num_replicas]
54 |         assert len(indices) == self.num_samples
55 | 
56 |         return iter(indices[: self.num_selected_samples])
57 | 
58 |     def __len__(self):
59 |         return self.num_selected_samples
60 | 
61 |     def set_epoch(self, epoch):
62 |         self.epoch = epoch
63 | 


--------------------------------------------------------------------------------
/cvm/loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .soft_label_cross_entropy_loss import *


--------------------------------------------------------------------------------
/cvm/loss/soft_label_cross_entropy_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | __all__ = ['SoftLabelCrossEntropyLoss']
 6 | 
 7 | 
 8 | class SoftLabelCrossEntropyLoss(nn.Module):
 9 |     def __init__(self):
10 |         super().__init__()
11 | 
12 |     def forward(self, x: torch.Tensor, y: torch.Tensor):
13 |         logprobs = F.log_softmax(x, dim=-1)
14 |         loss = -(logprobs * y).sum(dim=-1)
15 |         return loss.mean()
16 | 


--------------------------------------------------------------------------------
/cvm/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .alexnet import *
 2 | from .vggnet import *
 3 | from .resnet import *
 4 | from .squeezenet import *
 5 | from .googlenet import *
 6 | from .inception_v3 import *
 7 | from .inception_v4 import *
 8 | from .xception import *
 9 | from .densenet import *
10 | from .mobilenet import *
11 | from .mobilenetv2 import *
12 | from .mobilenetv3 import *
13 | from .ghostnet import *
14 | from .shufflenet import *
15 | from .shufflenetv2 import *
16 | from .mnasnet import *
17 | from .efficientnet import *
18 | from .efficientnetv2 import *
19 | from .mlp_mixer import *
20 | from .resmlp import *
21 | from .rexnet import *
22 | from .regnet import *
23 | from .vision_transformer import *
24 | from .convmixer import *
25 | from .convnext import *
26 | from .vgnet import *
27 | from .gssdnet import *
28 | 
29 | from . import gan
30 | from . import vae
31 | from . import seg
32 | from . import det


--------------------------------------------------------------------------------
/cvm/models/alexnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from .utils import export, load_from_local_or_url
 4 | from typing import Any
 5 | 
 6 | 
 7 | @export
 8 | class AlexNet(nn.Module):
 9 |     def __init__(
10 |         self,
11 |         in_channels: int = 3,
12 |         num_classes: int = 1000,
13 |         dropout_rate: float = 0.5,
14 |         thumbnail: bool = False,
15 |         **kwargs: Any
16 |     ):
17 |         super().__init__()
18 | 
19 |         FRONT_S = 1 if thumbnail else 4
20 | 
21 |         self.features = nn.Sequential(
22 |             nn.Conv2d(in_channels, 64, kernel_size=11,
23 |                       stride=FRONT_S, padding=2),
24 |             nn.ReLU(inplace=True),
25 | 
26 |             nn.MaxPool2d(kernel_size=3, stride=2),
27 | 
28 |             nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2),
29 |             nn.ReLU(inplace=True),
30 | 
31 |             nn.MaxPool2d(kernel_size=3, stride=2),
32 | 
33 |             nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
34 |             nn.ReLU(inplace=True),
35 | 
36 |             nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
37 |             nn.ReLU(inplace=True),
38 | 
39 |             nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
40 |             nn.ReLU(inplace=True),
41 | 
42 |             nn.MaxPool2d(kernel_size=3, stride=2)
43 |         )
44 | 
45 |         self.pool = nn.AdaptiveAvgPool2d((6, 6))
46 | 
47 |         self.classifier = nn.Sequential(
48 |             nn.Dropout(dropout_rate),
49 |             nn.Linear(9216, 4096),
50 |             nn.ReLU(inplace=True),
51 |             nn.Dropout(dropout_rate),
52 |             nn.Linear(4096, 4096),
53 |             nn.ReLU(inplace=True),
54 |             nn.Linear(4096, num_classes)
55 |         )
56 | 
57 |     def forward(self, x):
58 |         x = self.features(x)
59 |         x = self.pool(x)
60 |         x = torch.flatten(x, 1)
61 |         x = self.classifier(x)
62 |         return x
63 | 
64 | 
65 | @export
66 | def alexnet(pretrained: bool = False, pth: str = None, progress: bool = False, **kwargs: Any):
67 |     model = AlexNet(**kwargs)
68 | 
69 |     if pretrained:
70 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
71 |     return model
72 | 


--------------------------------------------------------------------------------
/cvm/models/convmixer.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | from .ops import blocks
  6 | from .utils import export, config, load_from_local_or_url
  7 | from typing import Any
  8 | 
  9 | 
 10 | class Residual(nn.Sequential):
 11 |     def __init__(self, *args):
 12 |         super().__init__(*args)
 13 | 
 14 |     def forward(self, x):
 15 |         return self[0](x) + x
 16 | 
 17 | 
 18 | @export
 19 | class ConvMixer(nn.Module):
 20 |     @blocks.normalizer(position='after')
 21 |     def __init__(
 22 |         self,
 23 |         in_channels: int = 3,
 24 |         num_classes: int = 1000,
 25 |         h=None,
 26 |         depth=None,
 27 |         kernel_size: int = 9,
 28 |         patch_size: int = 7,
 29 |         **kwargs: Any
 30 |     ):
 31 |         super().__init__()
 32 | 
 33 |         self.features = nn.Sequential(
 34 |             blocks.Conv2dBlock(in_channels, h, patch_size, stride=patch_size),
 35 | 
 36 |             *[nn.Sequential(
 37 |                 Residual(
 38 |                     blocks.Conv2dBlock(h, h, kernel_size, groups=h, padding='same')
 39 |                 ),
 40 |                 blocks.Conv2d1x1Block(h, h)
 41 |             ) for _ in range(depth)]
 42 |         )
 43 | 
 44 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 45 |         self.classifier = nn.Linear(h, num_classes)
 46 | 
 47 |     def forward(self, x):
 48 |         x = self.features(x)
 49 |         x = self.pool(x)
 50 |         x = torch.flatten(x, 1)
 51 |         x = self.classifier(x)
 52 |         return x
 53 | 
 54 | 
 55 | def _conv_mixer(
 56 |     h,
 57 |     depth,
 58 |     kernel_size: int = 9,
 59 |     patch_size: int = 7,
 60 |     pretrained: bool = False,
 61 |     pth: str = None,
 62 |     progress: bool = True,
 63 |     **kwargs: Any
 64 | ):
 65 | 
 66 |     model = ConvMixer(h=h, depth=depth, kernel_size=kernel_size,
 67 |                       patch_size=patch_size, **kwargs)
 68 | 
 69 |     if pretrained:
 70 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
 71 |     return model
 72 | 
 73 | 
 74 | @export
 75 | @blocks.activation(nn.GELU)
 76 | def conv_mixer_1536_20_k9_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
 77 |     return _conv_mixer(1536, 20, 9, 7, pretrained, pth, progress, **kwargs)
 78 | 
 79 | 
 80 | @export
 81 | @blocks.activation(nn.GELU)
 82 | def conv_mixer_1536_20_k3_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
 83 |     return _conv_mixer(1536, 20, 3, 7, pretrained, pth, progress, **kwargs)
 84 | 
 85 | 
 86 | @export
 87 | @blocks.activation(nn.GELU)
 88 | def conv_mixer_1024_20_k9_p14(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
 89 |     return _conv_mixer(1024, 20, 9, 14, pretrained, pth, progress, **kwargs)
 90 | 
 91 | 
 92 | @export
 93 | @blocks.activation(nn.GELU)
 94 | def conv_mixer_1024_16_k9_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
 95 |     return _conv_mixer(1024, 16, 9, 7, pretrained, pth, progress, **kwargs)
 96 | 
 97 | 
 98 | @export
 99 | @blocks.activation(nn.GELU)
100 | def conv_mixer_1024_12_k8_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
101 |     return _conv_mixer(1024, 12, 8, 7, pretrained, pth, progress, **kwargs)
102 | 
103 | 
104 | @export
105 | @blocks.activation(partial(nn.ReLU, inplace=True))
106 | def conv_mixer_768_32_k7_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
107 |     return _conv_mixer(768, 32, 7, 7, pretrained, pth, progress, **kwargs)
108 | 
109 | 
110 | @export
111 | @blocks.activation(partial(nn.ReLU, inplace=True))
112 | def conv_mixer_768_32_k3_p14(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
113 |     return _conv_mixer(768, 32, 3, 14, pretrained, pth, progress, **kwargs)
114 | 
115 | 
116 | @export
117 | @blocks.activation(nn.GELU)
118 | def conv_mixer_512_16_k8_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
119 |     return _conv_mixer(512, 16, 8, 7, pretrained, pth, progress, **kwargs)
120 | 
121 | 
122 | @export
123 | @blocks.activation(nn.GELU)
124 | def conv_mixer_512_12_k8_p7(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
125 |     return _conv_mixer(512, 12, 8, 7, pretrained, pth, progress, **kwargs)
126 | 


--------------------------------------------------------------------------------
/cvm/models/convnext.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | paper:
  3 |     [ConvNeXt] A ConvNet for the 2020s(https://arxiv.org/abs/2201.03545)
  4 | official code :
  5 |     https://github.com/facebookresearch/ConvNeXt/blob/dcb928723662a1289d31190d09d82378b57b810a/models/convnext.py
  6 | '''
  7 | import torch
  8 | import torch.nn as nn
  9 | from .ops import blocks
 10 | from .utils import export, config, load_from_local_or_url
 11 | from typing import Any, OrderedDict, List
 12 | 
 13 | 
 14 | class ConvNetBlock(nn.Module):
 15 |     def __init__(
 16 |         self,
 17 |         dim: int,
 18 |         kernel_size: int = 7,
 19 |         padding: int = 3,
 20 |         survival_prob: float = 0.0,
 21 |         layer_scale: float = 1e-6
 22 |     ):
 23 |         super().__init__()
 24 | 
 25 |         self.branch1 = nn.Sequential(
 26 |             blocks.DepthwiseConv2d(dim, dim, kernel_size, padding=padding, bias=True),
 27 |             blocks.Permute([0, 2, 3, 1]),
 28 |             nn.LayerNorm(dim, eps=1e-6),
 29 |             nn.Linear(dim, 4 * dim),
 30 |             nn.GELU(),
 31 |             nn.Linear(4 * dim, dim),
 32 |             blocks.Permute([0, 3, 1, 2]),
 33 |             blocks.Scale(dim, layer_scale),
 34 |             blocks.StochasticDepth(survival_prob)
 35 |         )
 36 | 
 37 |         self.branch2 = nn.Identity()
 38 |         self.combine = blocks.Combine('ADD')
 39 | 
 40 |     def forward(self, x):
 41 |         return self.combine([self.branch1(x), self.branch2(x)])
 42 | 
 43 | 
 44 | class DownsamplingBlock(nn.Sequential):
 45 |     def __init__(
 46 |         self,
 47 |         inp: int,
 48 |         oup: int
 49 |     ):
 50 |         super().__init__(
 51 |             blocks.LayerNorm2d(inp, eps=1e-6),
 52 |             nn.Conv2d(inp, oup, kernel_size=2, stride=2)
 53 |         )
 54 | 
 55 | 
 56 | @export
 57 | class ConvNeXt(nn.Module):
 58 |     def __init__(
 59 |         self,
 60 |         in_channels: int = 3,
 61 |         num_classes: int = 1000,
 62 |         layers: List[int] = [3, 3, 9, 3],
 63 |         dims: List[int] = [96, 192, 384, 768],
 64 |         drop_path_rate: float = 0.2,
 65 |         layer_scale: float = 1e-6,
 66 |         thumbnail: bool = False,
 67 |         **kwargs: Any
 68 |     ):
 69 |         super().__init__()
 70 | 
 71 |         FRONT_S = 1 if thumbnail else 4
 72 | 
 73 |         self.features = nn.Sequential(OrderedDict([
 74 |             ('stem', blocks.Stage(
 75 |                 nn.Conv2d(in_channels, dims[0], kernel_size=4, stride=FRONT_S),
 76 |                 blocks.LayerNorm2d(dims[0], eps=1e-6)
 77 |             ))
 78 |         ]))
 79 | 
 80 |         survival_probs = [1 - x.item() for x in torch.linspace(0, drop_path_rate, sum(layers))]
 81 |         for i in range(len(layers)):
 82 |             stage = blocks.Stage([
 83 |                 ConvNetBlock(dims[i], survival_prob=survival_probs[sum(layers[:i]) + j], layer_scale=layer_scale)
 84 |                 for j in range(layers[i])]
 85 |             )
 86 |             if i < 3:
 87 |                 stage.append(DownsamplingBlock(dims[i], dims[i+1]))
 88 | 
 89 |             self.features.add_module(f'stage{i + 1}', stage)
 90 | 
 91 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 92 |         self.classifier = nn.Sequential(
 93 |             blocks.LayerNorm2d(dims[-1], eps=1e-6),
 94 |             nn.Flatten(1),
 95 |             nn.Linear(dims[-1], num_classes)
 96 |         )
 97 | 
 98 |     def forward(self, x):
 99 |         x = self.features(x)
100 |         x = self.pool(x)
101 |         x = self.classifier(x)
102 |         return x
103 | 
104 | 
105 | @export
106 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.2-convnext-weights/torch-convnext_t-98aeea18.pth')
107 | def convnext_t(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
108 |     model = ConvNeXt(layers=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
109 |     if pretrained:
110 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
111 |     return model
112 | 
113 | 
114 | @export
115 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.2-convnext-weights/torch-convnext_s-0ebda7c5.pth')
116 | def convnext_s(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
117 |     model = ConvNeXt(layers=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
118 |     if pretrained:
119 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
120 |     return model
121 | 
122 | 
123 | @export
124 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.2-convnext-weights/torch-convnext_b-1e0fb038.pth')
125 | def convnext_b(pretrained: bool = False, in_22k=False, pth: str = None, progress: bool = True, **kwargs: Any):
126 |     model = ConvNeXt(layers=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
127 |     if pretrained:
128 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
129 |     return model
130 | 
131 | 
132 | @export
133 | def convnext_l(pretrained: bool = False, in_22k=False, pth: str = None, progress: bool = True, **kwargs: Any):
134 |     model = ConvNeXt(layers=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
135 |     if pretrained:
136 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
137 |     return model
138 | 
139 | 
140 | @export
141 | def convnext_xl(pretrained: bool = False, in_22k=False, pth: str = None, progress: bool = True, **kwargs: Any):
142 |     model = ConvNeXt(layers=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs)
143 |     if pretrained:
144 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
145 |     return model
146 | 


--------------------------------------------------------------------------------
/cvm/models/densenet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .ops import blocks
  5 | from .utils import export, load_from_local_or_url
  6 | from typing import Any, OrderedDict, List
  7 | 
  8 | 
  9 | class DenseLayer(nn.Sequential):
 10 |     '''BN-ReLU-Conv'''
 11 | 
 12 |     def __init__(self, inp, oup):
 13 |         super().__init__()
 14 | 
 15 |         super().__init__(OrderedDict([
 16 |             ('norm1', nn.BatchNorm2d(inp)),
 17 |             ('relu1', nn.ReLU(inplace=True)),
 18 |             ('conv1', blocks.Conv2d1x1(inp, oup)),
 19 |             ('norm2', nn.BatchNorm2d(oup)),
 20 |             ('relu2', nn.ReLU(inplace=True)),
 21 |             ('conv2', blocks.Conv2d3x3(oup, 32))
 22 |         ]))
 23 | 
 24 | 
 25 | class TransitionLayer(nn.Sequential):
 26 |     '''BN-ReLU-Conv'''
 27 | 
 28 |     def __init__(self, inp, oup):
 29 |         super().__init__(OrderedDict([
 30 |             ('norm', nn.BatchNorm2d(inp)),
 31 |             ('relu', nn.ReLU(inplace=True)),
 32 |             ('conv', blocks.Conv2d1x1(inp, oup)),
 33 |             ('pool', nn.AvgPool2d(kernel_size=2, stride=2, padding=0))
 34 |         ]))
 35 | 
 36 | 
 37 | class DenseBlock(nn.Module):
 38 |     def __init__(self, inp, oup, n):
 39 |         super().__init__()
 40 | 
 41 |         layers = []
 42 | 
 43 |         for i in range(n):
 44 |             layers.append(DenseLayer(inp + 32 * i, oup))
 45 | 
 46 |         self.features = nn.Sequential(*layers)
 47 | 
 48 |     def forward(self, x):
 49 |         outs = [x]
 50 |         for layer in self.features.children():
 51 |             outs.append(layer(torch.cat(outs, dim=1)))
 52 |         return torch.cat(outs, dim=1)
 53 | 
 54 | 
 55 | @export
 56 | class DenseNet(nn.Module):
 57 |     def __init__(
 58 |         self,
 59 |         in_channels: int = 3,
 60 |         num_classes: int = 1000,
 61 |         layers: List[int] = [2, 2, 2, 2],
 62 |         channels: List[int] = [64, 128, 256, 512],
 63 |         thumbnail: bool = False,
 64 |         **kwargs: Any
 65 |     ):
 66 |         super().__init__()
 67 | 
 68 |         FRONT_S = 1 if thumbnail else 2
 69 | 
 70 |         maxpool = nn.Identity()
 71 |         if not thumbnail:
 72 |             maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 73 | 
 74 |         self.features = nn.Sequential(
 75 |             blocks.Conv2dBlock(in_channels, channels[0], 7, FRONT_S, padding=3),
 76 |             maxpool,
 77 |             DenseBlock(channels[0], 128, layers[0]),
 78 |             TransitionLayer(channels[0] + 32 * layers[0], channels[1]),
 79 |             DenseBlock(channels[1], 128, layers[1]),
 80 |             TransitionLayer(channels[1] + 32 * layers[1], channels[2]),
 81 |             DenseBlock(channels[2], 128, layers[2]),
 82 |             TransitionLayer(channels[2] + 32 * layers[2], channels[3]),
 83 |             DenseBlock(channels[3], 128, layers[3]),
 84 | 
 85 |             nn.BatchNorm2d(channels[3] + 32 * layers[-1]),
 86 |             nn.ReLU(inplace=True)
 87 |         )
 88 | 
 89 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 90 |         self.classifier = nn.Linear(channels[3] + 32 * layers[-1], num_classes)
 91 | 
 92 |     def forward(self, x):
 93 |         x = self.features(x)
 94 |         x = self.pool(x)
 95 |         x = torch.flatten(x, 1)
 96 |         x = self.classifier(x)
 97 |         return x
 98 | 
 99 | 
100 | def _densenet(
101 |     layers: List[int],
102 |     channels: List[int],
103 |     pretrained: bool = False,
104 |     pth: str = None,
105 |     progress: bool = True,
106 |     **kwargs: Any
107 | ):
108 |     model = DenseNet(layers=layers, channels=channels, **kwargs)
109 | 
110 |     if pretrained:
111 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
112 |     return model
113 | 
114 | 
115 | @export
116 | def densenet121(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
117 |     return _densenet([6, 12, 24, 16], [64, 128, 256, 512], pretrained, pth, progress, **kwargs)
118 | 
119 | 
120 | @export
121 | def densenet169(pretrained: bool = False, pth: str = None, progress: bool = True,  **kwargs: Any):
122 |     return _densenet([6, 12, 32, 32], [64, 128, 256, 640], pretrained, pth, progress, **kwargs)
123 | 
124 | 
125 | @export
126 | def densenet201(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
127 |     return _densenet([6, 12, 48, 32], [64, 128, 256, 896], pretrained, pth, progress, **kwargs)
128 | 
129 | 
130 | @export
131 | def densenet264(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
132 |     return _densenet([6, 12, 64, 48], [64, 128, 256, 1408], pretrained, pth, progress, **kwargs)
133 | 


--------------------------------------------------------------------------------
/cvm/models/det/__init__.py:
--------------------------------------------------------------------------------
1 | from .yolov1 import *


--------------------------------------------------------------------------------
/cvm/models/det/yolov1.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from ..ops import blocks
 4 | from ..utils import export, get_out_channels, load_from_local_or_url
 5 | import cvm.models as models
 6 | from typing import Any, List
 7 | 
 8 | 
 9 | @export
10 | class YOLOv1(nn.Module):
11 |     def __init__(
12 |         self,
13 |         backbone: nn.Module,
14 |         grid_size: List[int] = (7, 7),
15 |         num_boxes_per_cell: int = 2,
16 |         num_classes: int = 20
17 |     ):
18 |         super().__init__()
19 | 
20 |         self.backbone = backbone
21 | 
22 |         self.pool = nn.AdaptiveAvgPool2d((7, 7))
23 | 
24 |         self.head = nn.Sequential(
25 |             blocks.Conv2dBlock(get_out_channels(backbone), 512, 3),
26 |             blocks.Conv2d1x1(512, num_classes + 5 * num_boxes_per_cell)
27 |         )
28 | 
29 |     def forward(self, x):
30 |         x = self.backbone(x)
31 |         x = self.pool(x)
32 |         x = self.head(x)
33 |         return x
34 | 
35 | 
36 | def create_yolov1(
37 |     backbone: str = 'resnet50_v1',
38 |     num_classes: int = 21,
39 |     pretrained_backbone: bool = False,
40 |     pretrained: bool = False,
41 |     pth: str = None,
42 |     progress: bool = True,
43 |     **kwargs: Any
44 | ):
45 |     if pretrained:
46 |         pretrained_backbone = False
47 | 
48 |     backbone = models.__dict__[backbone](
49 |         pretrained=pretrained_backbone,
50 |         **kwargs
51 |     ).features
52 | 
53 |     model = YOLOv1(backbone, num_classes=num_classes)
54 | 
55 |     if pretrained:
56 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
57 |     return model
58 | 
59 | 
60 | @export
61 | def yolov1_resnet18_v1(
62 |     num_classes: int = 21,
63 |     pretrained_backbone: bool = False,
64 |     pretrained: bool = False,
65 |     pth: str = None,
66 |     progress: bool = True,
67 |     **kwargs: Any
68 | ):
69 |     return create_yolov1('resnet18_v1', num_classes, pretrained_backbone, pretrained, pth, progress, **kwargs)
70 | 
71 | 
72 | @export
73 | def yolov1_mobilenet_v3_large(
74 |     num_classes: int = 21,
75 |     pretrained_backbone: bool = False,
76 |     pretrained: bool = False,
77 |     pth: str = None,
78 |     progress: bool = True,
79 |     **kwargs: Any
80 | ):
81 |     return create_yolov1('mobilenet_v3_large', num_classes, pretrained_backbone, pretrained, pth, progress, **kwargs)
82 | 
83 | 
84 | @export
85 | def yolov1_regnet_x_400mf(
86 |     num_classes: int = 21,
87 |     pretrained_backbone: bool = False,
88 |     pretrained: bool = False,
89 |     pth: str = None,
90 |     progress: bool = True,
91 |     **kwargs: Any
92 | ):
93 |     return create_yolov1('regnet_x_400mf', num_classes, pretrained_backbone, pretrained, pth, progress, **kwargs)
94 | 


--------------------------------------------------------------------------------
/cvm/models/gan/__init__.py:
--------------------------------------------------------------------------------
1 | from .dcgan import *


--------------------------------------------------------------------------------
/cvm/models/gan/dcgan.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from ..utils import export, load_from_local_or_url
 4 | from typing import Any
 5 | 
 6 | 
 7 | @export
 8 | class DCGAN(nn.Module):
 9 |     def __init__(
10 |         self,
11 |         hidden_dim: int = 100,
12 |         in_channels: int = 3,
13 |         **kwargs: Any
14 |     ) -> None:
15 |         super().__init__()
16 |         
17 |         base_width = 64
18 | 
19 |         self.generator = nn.Sequential(
20 |             # input : (batch_size, hidden_dim, 1, 1)
21 |             nn.ConvTranspose2d(hidden_dim, base_width * 8, kernel_size=4, stride=1, padding=0, bias=False),
22 |             nn.BatchNorm2d(base_width * 8),
23 |             nn.ReLU(True),
24 |             # state size : (batch_size, ngf * 8, 4, 4)
25 |             nn.ConvTranspose2d(base_width * 8, base_width * 4, kernel_size=4, stride=2, padding=1, bias=False),
26 |             nn.BatchNorm2d(base_width * 4),
27 |             nn.ReLU(True),
28 |             # state size : (batch_size, ngf * 4, 8, 8)
29 |             nn.ConvTranspose2d(base_width * 4, base_width * 2, kernel_size=4, stride=2, padding=1, bias=False),
30 |             nn.BatchNorm2d(base_width * 2),
31 |             nn.ReLU(True),
32 |             # state size: (batch_size, ngf * 2, 16, 16)
33 |             nn.ConvTranspose2d(base_width * 2, base_width, kernel_size=4, stride=2, padding=1, bias=False),
34 |             nn.BatchNorm2d(base_width),
35 |             nn.ReLU(True),
36 |             # state size : (batch_size, ngf, 32, 32)
37 |             nn.ConvTranspose2d(base_width, in_channels, kernel_size=4, stride=2, padding=1, bias=False),
38 |             nn.Tanh()
39 |             # state size : (batch_size, nc, 64, 64)
40 |         )
41 | 
42 |         self.discriminator = nn.Sequential(
43 |             # input size : (batch_size, nc, 64, 64)
44 |             nn.Conv2d(in_channels, base_width, kernel_size=4, stride=2, padding=1, bias=False),
45 |             nn.LeakyReLU(0.2, inplace=True),
46 |             # state size : (batch_size, base_width, 32, 32)
47 |             nn.Conv2d(base_width, base_width * 2, 4, 2, 1, bias=False),
48 |             nn.BatchNorm2d(base_width * 2),
49 |             nn.LeakyReLU(0.2, inplace=True),
50 |             #state size : (batch_size, base_width * 2, 16, 16)
51 |             nn.Conv2d(base_width * 2, base_width * 4, 4, 2, 1, bias=False),
52 |             nn.BatchNorm2d(base_width * 4),
53 |             nn.LeakyReLU(0.2, inplace=True),
54 |             # state size : (batch_size, base_width * 4, 8, 8)
55 |             nn.Conv2d(base_width * 4, base_width * 8, 4, 2, 1, bias=False),
56 |             nn.BatchNorm2d(base_width * 8),
57 |             nn.LeakyReLU(0.2, inplace=True),
58 |             # state size : (batch_size, base_width * 8, 4, 4)
59 |             nn.Conv2d(base_width * 8, 1, 4, 1, 0, bias=False),
60 |             nn.Sigmoid(),
61 |             # state size : (batch_size, 1, 1, 1)
62 |             nn.Flatten()
63 |         )
64 | 
65 | 
66 | @export
67 | def dcgan(
68 |     pretrained: bool = False,
69 |     pth: str = None,
70 |     progress: bool = True,
71 |     **kwargs: Any
72 | ):
73 |     model = DCGAN(**kwargs)
74 | 
75 |     if pretrained:
76 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
77 |     return model
78 | 


--------------------------------------------------------------------------------
/cvm/models/googlenet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .ops import blocks
  5 | from .utils import export, load_from_local_or_url
  6 | from typing import Any, List, OrderedDict
  7 | 
  8 | __all__ = ['inception_v1']
  9 | 
 10 | 
 11 | class InceptionBlock(blocks.ConcatBranches):
 12 |     def __init__(
 13 |         self,
 14 |         inp,
 15 |         planes_1x1: int,
 16 |         planes_3x3: List[int],
 17 |         planes_5x5: List[int],
 18 |         planes_pool: int
 19 |     ):
 20 |         super().__init__(OrderedDict([
 21 |             ('branch-1x1', blocks.Conv2d1x1Block(inp, planes_1x1)),
 22 |             ('branch-3x3', nn.Sequential(
 23 |                 blocks.Conv2d1x1Block(inp, planes_3x3[0]),
 24 |                 blocks.Conv2dBlock(planes_3x3[0], planes_3x3[1])
 25 |             )),
 26 |             ('branch-5x5', nn.Sequential(
 27 |                 blocks.Conv2d1x1Block(inp, planes_5x5[0]),
 28 |                 blocks.Conv2dBlock(planes_5x5[0], planes_5x5[1], kernel_size=5, padding=2)
 29 |             )),
 30 |             ('branch-pool', nn.Sequential(
 31 |                 nn.MaxPool2d(3, stride=1, padding=1),
 32 |                 blocks.Conv2d1x1Block(inp, planes_pool)
 33 |             ))
 34 |         ]))
 35 | 
 36 | 
 37 | class InceptionAux(nn.Sequential):
 38 |     def __init__(self, inp, oup):
 39 |         super().__init__(
 40 |             nn.AdaptiveAvgPool2d((4, 4)),
 41 |             blocks.Conv2d1x1Block(inp, 128),
 42 |             nn.Flatten(1),
 43 |             nn.Linear(2048, 1024),
 44 |             nn.ReLU(inplace=True),
 45 |             nn.Dropout(0.7),
 46 |             nn.Linear(1024, oup)
 47 |         )
 48 | 
 49 | 
 50 | @export
 51 | def googlenet(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
 52 |     model = GoogLeNet(**kwargs)
 53 | 
 54 |     if pretrained:
 55 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
 56 |     return model
 57 | 
 58 | 
 59 | inception_v1 = googlenet
 60 | 
 61 | 
 62 | @export
 63 | class GoogLeNet(nn.Module):
 64 |     def __init__(
 65 |         self,
 66 |         in_channels: int = 3,
 67 |         num_classes: int = 1000,
 68 |         thumbnail: bool = False,
 69 |         **kwargs: Any
 70 |     ):
 71 |         super().__init__()
 72 | 
 73 |         FRONT_S = 1 if thumbnail else 2
 74 | 
 75 |         self.stem = nn.Sequential(
 76 |             blocks.Conv2dBlock(in_channels, 64, 7, stride=FRONT_S, padding=3),
 77 |             nn.Identity() if thumbnail else nn.MaxPool2d(3, 2, ceil_mode=True)
 78 |         )
 79 | 
 80 |         self.stage1 = nn.Sequential(
 81 |             blocks.Conv2d1x1Block(64, 64),
 82 |             blocks.Conv2dBlock(64, 192, 3, padding=1),
 83 |             nn.MaxPool2d(3, 2, ceil_mode=True)
 84 |         )
 85 | 
 86 |         self.stage2 = nn.Sequential(OrderedDict([
 87 |             ('inception_3a', InceptionBlock(192, 64, [96, 128], [16, 32], 32)),
 88 |             ('inception_3b', InceptionBlock(256, 128, [128, 192], [32, 96], 64)),
 89 |             ('max_pool', nn.MaxPool2d(3, 2, ceil_mode=True))
 90 |         ]))
 91 | 
 92 |         self.stage3 = nn.Sequential(OrderedDict([
 93 |             ('inception_4a', InceptionBlock(480, 192, [96, 208], [16, 48], 64)),
 94 |             ('inception_4b', InceptionBlock(512, 160, [112, 224], [24, 64], 64)),
 95 |             ('inception_4c', InceptionBlock(512, 128, [128, 256], [24, 64], 64)),
 96 |             ('inception_4d', InceptionBlock(512, 112, [144, 288], [32, 64], 64)),
 97 |             ('inception_4e', InceptionBlock(528, 256, [160, 320], [32, 128], 128)),
 98 |             ('max_pool', nn.MaxPool2d(3, 2, ceil_mode=True))
 99 |         ]))
100 | 
101 |         self.stage4 = nn.Sequential(OrderedDict([
102 |             ('inception_5a', InceptionBlock(832, 256, [160, 320], [32, 128], 128)),
103 |             ('inception_5b', InceptionBlock(832, 384, [192, 384], [48, 128], 128))
104 |         ]))
105 | 
106 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
107 | 
108 |         self.classifiar = nn.Sequential(
109 |             nn.Dropout(0.4),
110 |             nn.Linear(1024, num_classes)
111 |         )
112 | 
113 |         self.aux1 = InceptionAux(512, num_classes)
114 |         self.aux2 = InceptionAux(528, num_classes)
115 | 
116 |     def forward(self, x):
117 |         x = self.stem(x)
118 | 
119 |         x = self.stage1(x)
120 |         x = self.stage2(x)
121 | 
122 |         x = self.stage3.inception_4a(x)
123 |         aux1 = self.aux1(x) if self.training else None
124 |         x = self.stage3.inception_4b(x)
125 |         x = self.stage3.inception_4c(x)
126 |         x = self.stage3.inception_4d(x)
127 |         aux2 = self.aux2(x) if self.training else None
128 |         x = self.stage3.inception_4e(x)
129 | 
130 |         x = self.stage3.max_pool(x)
131 | 
132 |         x = self.stage4(x)
133 | 
134 |         x = self.pool(x)
135 |         x = torch.flatten(x, 1)
136 |         x = self.classifiar(x)
137 | 
138 |         if self.training:
139 |             return x, aux1, aux2
140 |         else:
141 |             return x
142 | 


--------------------------------------------------------------------------------
/cvm/models/inception_v3.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from .ops import blocks
  4 | from .utils import export, load_from_local_or_url
  5 | from typing import Any, List, OrderedDict
  6 | 
  7 | 
  8 | # Figure 5
  9 | class InceptionBlockV5(blocks.ConcatBranches):
 10 |     def __init__(
 11 |         self,
 12 |         inp,
 13 |         planes_1x1: int,
 14 |         planes_5x5: List[int],
 15 |         planes_3x3db: List[int],
 16 |         planes_pool: int
 17 |     ):
 18 |         super().__init__(OrderedDict([
 19 |             ('branch-1x1', blocks.Conv2d1x1Block(inp, planes_1x1)),
 20 |             ('branch-5x5', nn.Sequential(
 21 |                 blocks.Conv2d1x1Block(inp, planes_5x5[0]),
 22 |                 blocks.Conv2dBlock(planes_5x5[0], planes_5x5[1], kernel_size=5, padding=2)
 23 |             )),
 24 |             ('branch-3x3db', nn.Sequential(
 25 |                 blocks.Conv2d1x1Block(inp, planes_3x3db[0]),
 26 |                 blocks.Conv2dBlock(planes_3x3db[0], planes_3x3db[1]),
 27 |                 blocks.Conv2dBlock(planes_3x3db[1], planes_3x3db[1])
 28 |             )),
 29 |             ('branch-pool', nn.Sequential(
 30 |                 nn.AvgPool2d(3, stride=1, padding=1),
 31 |                 blocks.Conv2d1x1Block(inp, planes_pool)
 32 |             ))
 33 |         ]))
 34 | 
 35 | 
 36 | # Figure 6: blocks.InceptionB
 37 | 
 38 | 
 39 | # Figure 7
 40 | class InceptionBlockV7(blocks.ConcatBranches):
 41 |     def __init__(
 42 |         self,
 43 |         inp,
 44 |         planes_1x1: int,
 45 |         planes_3x3: List[int],
 46 |         planes_3x3db: List[int],
 47 |         planes_pool
 48 |     ) -> None:
 49 |         super().__init__(OrderedDict([
 50 |             ('branch_1x1', blocks.Conv2d1x1Block(inp, planes_1x1)),
 51 |             ('branch-3x3', nn.Sequential(
 52 |                 blocks.Conv2d1x1Block(inp, planes_3x3[0]),
 53 |                 blocks.ConcatBranches(OrderedDict([
 54 |                     ('branch-3x3-1', blocks.Conv2dBlock(
 55 |                         planes_3x3[0], planes_3x3[1], kernel_size=(1, 3), padding=(0, 1)
 56 |                     )),
 57 |                     ('branch-3x3-2', blocks.Conv2dBlock(
 58 |                         planes_3x3[0], planes_3x3[1], kernel_size=(3, 1), padding=(1, 0)
 59 |                     ))
 60 |                 ]))
 61 |             )),
 62 |             ('branch-3x3db', nn.Sequential(
 63 |                 blocks.Conv2d1x1Block(inp, planes_3x3db[0]),
 64 |                 blocks.Conv2dBlock(planes_3x3db[0], planes_3x3db[1]),
 65 |                 blocks.ConcatBranches(OrderedDict([
 66 |                     ('branch-3x3db-1', blocks.Conv2dBlock(
 67 |                         planes_3x3db[1], planes_3x3db[1], kernel_size=(1, 3), padding=(0, 1)
 68 |                     )),
 69 |                     ('branch-3x3db-2', blocks.Conv2dBlock(
 70 |                         planes_3x3db[1], planes_3x3db[1], kernel_size=(3, 1), padding=(1, 0)
 71 |                     ))
 72 |                 ]))
 73 |             )),
 74 |             ('branch-pool', nn.Sequential(
 75 |                 nn.AvgPool2d(3, stride=1, padding=1),
 76 |                 blocks.Conv2d1x1Block(inp, planes_pool)
 77 |             ))
 78 |         ]))
 79 | 
 80 | 
 81 | class InceptionV3(nn.Module):
 82 |     r"""
 83 |     Paper: Rethinking the Inception Architecture for Computer Vision, https://arxiv.org/abs/1512.00567
 84 |     Code: https://github.com/keras-team/keras/blob/master/keras/applications/inception_v3.py
 85 |     """
 86 | 
 87 |     def __init__(
 88 |         self,
 89 |         in_channels: int = 3,
 90 |         num_classes: int = 1000,
 91 |         dropout_rate: float = 0.2,
 92 |         thumbnail: bool = False,
 93 |         **kwargs: Any
 94 |     ) -> None:
 95 |         super().__init__()
 96 | 
 97 |         self.stem = blocks.Conv2dBlock(in_channels, 32, kernel_size=3, stride=2, padding=0)
 98 | 
 99 |         self.stage1 = blocks.Stage(
100 |             blocks.Conv2dBlock(32, 32, kernel_size=3, padding=0),
101 |             blocks.Conv2dBlock(32, 64, kernel_size=3, padding=1),
102 |             nn.MaxPool2d(kernel_size=3, stride=2)
103 |         )
104 | 
105 |         self.stage2 = blocks.Stage(
106 |             blocks.Conv2d1x1Block(64, 80),
107 |             blocks.Conv2dBlock(80, 192, kernel_size=3, padding=0),
108 |             nn.MaxPool2d(kernel_size=3, stride=2)
109 |         )
110 | 
111 |         self.stage3 = blocks.Stage(
112 |             InceptionBlockV5(192, 64, [48, 64], [64, 96], 32),                      # mix 0: 35 x 35 x 256
113 |             InceptionBlockV5(256, 64, [48, 64], [64, 96], 64),                      # mix 1: 35 x 35 x 288
114 |             InceptionBlockV5(288, 64, [48, 64], [64, 96], 64),                      # mix 2: 35 x 35 x 288
115 |             blocks.ReductionA(288, 384, [64, 96, 96])                               # mix 3: 17 x 17 x 768
116 |         )
117 | 
118 |         self.stage4 = blocks.Stage(
119 |             blocks.InceptionB(768, 192, [128, 128, 192], [128, 128, 192], 192),     # mix 4: 17 x 17 x 768
120 |             blocks.InceptionB(768, 192, [160, 160, 192], [160, 160, 192], 192),     # mix 5: 17 x 17 x 768
121 |             blocks.InceptionB(768, 192, [160, 160, 192], [160, 160, 192], 192),     # mix 6: 17 x 17 x 768
122 |             blocks.InceptionB(768, 192, [192, 192, 192], [192, 192, 192], 192),     # mix 7: 17 x 17 x 768
123 |             blocks.ReductionB(768, [192, 320], [192, 192])                          # mix 8: 17 x 17 x 1280
124 |         )
125 | 
126 |         self.stage5 = blocks.Stage(
127 |             InceptionBlockV7(1280, 320, [384, 384], [448, 384], 192),               # mixed 9: 8 x 8 x 2048
128 |             InceptionBlockV7(2048, 320, [384, 384], [448, 384], 192),               # mixed 9: 8 x 8 x 2048
129 |         )
130 | 
131 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
132 |         self.classifer = nn.Sequential(
133 |             nn.Dropout(dropout_rate, inplace=True),
134 |             nn.Linear(2048, num_classes)
135 |         )
136 | 
137 |     def forward(self, x):
138 |         x = self.stem(x)
139 |         x = self.stage1(x)
140 |         x = self.stage2(x)
141 |         x = self.stage3(x)
142 |         x = self.stage4(x)
143 |         x = self.stage5(x)
144 |         x = self.pool(x)
145 |         x = torch.flatten(x, start_dim=1)
146 |         x = self.classifer(x)
147 |         return x
148 | 
149 | 
150 | @export
151 | def inception_v3(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
152 |     model = InceptionV3(**kwargs)
153 | 
154 |     if pretrained:
155 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
156 |     return model
157 | 


--------------------------------------------------------------------------------
/cvm/models/mlp_mixer.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | import torch
  3 | import torch.nn as nn
  4 | from .ops import blocks
  5 | from .utils import export, config, load_from_local_or_url
  6 | from typing import Any
  7 | 
  8 | 
  9 | class MixerBlock(nn.Module):
 10 |     def __init__(
 11 |         self,
 12 |         hidden_dim,
 13 |         sequence_len,
 14 |         ratio=(0.5, 4.0),
 15 |         normalizer_fn: nn.Module = partial(nn.LayerNorm, eps=1e-6),
 16 |         dropout_rate: float = 0.,
 17 |         drop_path_rate: float = 0.
 18 |     ):
 19 |         super().__init__()
 20 | 
 21 |         self.norm1 = normalizer_fn(hidden_dim)
 22 |         self.token_mixing = blocks.MlpBlock(sequence_len, int(hidden_dim * ratio[0]), dropout_rate=dropout_rate)
 23 |         self.drop1 = blocks.StochasticDepth(1. - drop_path_rate)
 24 | 
 25 |         self.norm2 = normalizer_fn(hidden_dim)
 26 |         self.channel_mixing = blocks.MlpBlock(hidden_dim, int(hidden_dim * ratio[1]), dropout_rate=dropout_rate)
 27 |         self.drop2 = blocks.StochasticDepth(1. - drop_path_rate)
 28 | 
 29 |     def forward(self, x):
 30 |         x = x + self.drop1(self.token_mixing(self.norm1(x).transpose(1, 2)).transpose(1, 2))
 31 |         x = x + self.drop2(self.channel_mixing(self.norm2(x)))
 32 |         return x
 33 | 
 34 | 
 35 | @export
 36 | class Mixer(nn.Module):
 37 |     r'''
 38 |     See: https://github.com/google-research/vision_transformer/blob/main/vit_jax/models_mixer.py
 39 |     '''
 40 | 
 41 |     def __init__(
 42 |         self,
 43 |         image_size: int = 224,
 44 |         in_channels: int = 3,
 45 |         num_classes: int = 1000,
 46 |         patch_size: int = 32,
 47 |         hidden_dim: int = 768,
 48 |         num_blocks: int = 12,
 49 |         dropout_rate: float = 0.,
 50 |         drop_path_rate: float = 0.,
 51 |         **kwargs: Any
 52 |     ):
 53 |         super().__init__()
 54 | 
 55 |         self.num_blocks = num_blocks
 56 |         self.num_patches = (image_size // patch_size) ** 2
 57 | 
 58 |         self.stem = nn.Conv2d(in_channels, hidden_dim,
 59 |                               kernel_size=patch_size, stride=patch_size)
 60 |         self.mixer = nn.Sequential(
 61 |             *[
 62 |                 MixerBlock(
 63 |                     hidden_dim, self.num_patches, dropout_rate=dropout_rate, drop_path_rate=drop_path_rate
 64 |                 ) for _ in range(self.num_blocks)
 65 |             ]
 66 |         )
 67 |         self.norm = nn.LayerNorm(hidden_dim)
 68 | 
 69 |         self.head = nn.Linear(hidden_dim, num_classes)
 70 | 
 71 |     def forward(self, x):
 72 |         x = self.stem(x)
 73 |         # n c h w -> n p c
 74 |         x = x.flatten(2).transpose(1, 2)
 75 |         x = self.mixer(x)
 76 |         x = self.norm(x)
 77 |         x = x.mean(dim=1)
 78 |         x = self.head(x)
 79 | 
 80 |         return x
 81 | 
 82 | 
 83 | def _mixer(
 84 |     image_size: int = 224,
 85 |     patch_size: int = 32,
 86 |     hidden_dim: int = 768,
 87 |     num_blocks: int = 12,
 88 |     pretrained: bool = False,
 89 |     pth: str = None,
 90 |     progress: bool = True,
 91 |     **kwargs: Any
 92 | ):
 93 |     model = Mixer(image_size, patch_size=patch_size,
 94 |                   hidden_dim=hidden_dim, num_blocks=num_blocks, **kwargs)
 95 | 
 96 |     if pretrained:
 97 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
 98 |     return model
 99 | 
100 | 
101 | @export
102 | def mixer_s32_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
103 |     return _mixer(224, 32, 512, 8, pretrained, pth, progress, **kwargs)
104 | 
105 | 
106 | @export
107 | def mixer_s16_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
108 |     return _mixer(224, 16, 512, 8, pretrained, pth, progress, **kwargs)
109 | 
110 | 
111 | @export
112 | def mixer_b32_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
113 |     return _mixer(224, 32, 768, 12, pretrained, pth, progress, **kwargs)
114 | 
115 | 
116 | @export
117 | def mixer_b16_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
118 |     return _mixer(224, 16, 768, 12, pretrained, pth, progress, **kwargs)
119 | 
120 | 
121 | @export
122 | def mixer_l32_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
123 |     return _mixer(224, 32, 1024, 24, pretrained, pth, progress, **kwargs)
124 | 
125 | 
126 | @export
127 | def mixer_l16_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
128 |     return _mixer(224, 16, 1024, 24, pretrained, pth, progress, **kwargs)
129 | 
130 | 
131 | @export
132 | def mixer_h14_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
133 |     return _mixer(224, 14, 1280, 32, pretrained, pth, progress, **kwargs)
134 | 


--------------------------------------------------------------------------------
/cvm/models/mnasnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .ops import blocks
 5 | from .utils import export, config, load_from_local_or_url
 6 | from typing import Any
 7 | 
 8 | 
 9 | # Paper suggests 0.99 momentum
10 | _BN_MOMENTUM = 0.01
11 | 
12 | 
13 | @export
14 | class MnasNet(nn.Module):
15 |     def __init__(
16 |         self,
17 |         in_channels: int = 3,
18 |         num_classes: int = 1000,
19 |         dropout_rate: float = 0.2,
20 |         thumbnail: bool = False,
21 |         **kwargs: Any
22 |     ):
23 |         super().__init__()
24 | 
25 |         FRONT_S = 1 if thumbnail else 2
26 | 
27 |         t = [1, 6, 3, 6, 6, 6, 6]
28 |         c = [32, 16, 24, 40, 80, 112, 160, 320, 1280]
29 |         n = [1, 2, 3, 4, 2, 3, 1]  # repeats
30 |         s = [1, FRONT_S, 2, 2, 1, 2, 1]
31 |         k = [3, 3, 5, 3, 3, 5, 3]
32 |         se = [0, 0, 0.25, 0, 0.25, 0.25, 0]
33 | 
34 |         features = [blocks.Conv2dBlock(in_channels, c[0], 3, stride=FRONT_S)]
35 | 
36 |         for i in range(len(t)):
37 |             features.append(
38 |                 self.make_layers(c[i], t[i], c[i+1], n[i], s[i], k[i], se[i])
39 |             )
40 | 
41 |         features.append(blocks.Conv2d1x1Block(c[-2], c[-1]))
42 | 
43 |         self.features = nn.Sequential(*features)
44 | 
45 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
46 |         self.classifier = nn.Sequential(
47 |             nn.Dropout(dropout_rate, inplace=True),
48 |             nn.Linear(c[-1], num_classes)
49 |         )
50 | 
51 |     @staticmethod
52 |     def make_layers(
53 |         inp: int,
54 |         t: int,
55 |         oup: int,
56 |         n: int,
57 |         stride: int,
58 |         kernel_size: int = 3,
59 |         rd_ratio: float = None
60 |     ):
61 |         layers = [blocks.InvertedResidualBlock(inp, oup, t, kernel_size, stride, rd_ratio=rd_ratio)]
62 | 
63 |         for _ in range(n - 1):
64 |             layers.append(blocks.InvertedResidualBlock(oup, oup, t, kernel_size, rd_ratio=rd_ratio))
65 | 
66 |         return blocks.Stage(layers)
67 | 
68 |     def forward(self, x):
69 |         x = self.features(x)
70 |         x = self.pool(x)
71 |         x = torch.flatten(x, 1)
72 |         x = self.classifier(x)
73 |         return x
74 | 
75 | 
76 | @export
77 | def mnasnet_a1(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
78 |     model = MnasNet(**kwargs)
79 | 
80 |     if pretrained:
81 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
82 |     return model
83 | 


--------------------------------------------------------------------------------
/cvm/models/mobilenet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .ops import blocks
  5 | from .utils import export, config, load_from_local_or_url
  6 | from typing import Any, OrderedDict, Type, Union, List
  7 | 
  8 | 
  9 | class MobileBlock(nn.Sequential):
 10 |     def __init__(
 11 |         self,
 12 |         inp,
 13 |         oup,
 14 |         kernel_size: int = 3,
 15 |         stride: int = 1,
 16 |         padding: int = None,
 17 |         dilation: int = 1,
 18 |         groups: int = 1
 19 |     ):
 20 |         super().__init__(
 21 |             blocks.DepthwiseBlock(inp, inp, kernel_size, stride, padding, dilation=dilation),
 22 |             blocks.PointwiseBlock(inp, oup, groups=groups)
 23 |         )
 24 | 
 25 | 
 26 | class DepthwiseSeparableBlock(nn.Sequential):
 27 |     def __init__(
 28 |         self,
 29 |         inp,
 30 |         oup,
 31 |         kernel_size: int = 3,
 32 |         stride: int = 1,
 33 |         padding: int = None,
 34 |         dilation: int = 1,
 35 |         groups: int = 1
 36 |     ):
 37 |         super().__init__(
 38 |             blocks.DepthwiseConv2d(inp, inp, kernel_size, stride, padding, dilation=dilation),
 39 |             blocks.PointwiseBlock(inp, oup, groups=groups)
 40 |         )
 41 | 
 42 | 
 43 | @export
 44 | class MobileNet(nn.Module):
 45 |     '''https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.py'''
 46 | 
 47 |     def __init__(
 48 |         self,
 49 |         in_channels: int = 3,
 50 |         num_classes: int = 1000,
 51 |         base_width: int = 32,
 52 |         block: Type[Union[MobileBlock, DepthwiseSeparableBlock]] = MobileBlock,
 53 |         depth_multiplier: float = 1.0,
 54 |         dropout_rate: float = 0.2,
 55 |         dilations: List[int] = None,
 56 |         thumbnail: bool = False,
 57 |         **kwargs: Any
 58 |     ):
 59 |         super().__init__()
 60 | 
 61 |         def depth(d): return max(int(d * depth_multiplier), 8)
 62 | 
 63 |         dilations = dilations or [1, 1, 1, 1]
 64 |         assert len(dilations) == 4, ''
 65 | 
 66 |         FRONT_S = 1 if thumbnail else 2
 67 | 
 68 |         layers = [2, 2, 6, 2]
 69 |         strides = [FRONT_S, 2, 2, 2]
 70 | 
 71 |         self.features = nn.Sequential(OrderedDict([
 72 |             ('stem', blocks.Stage(
 73 |                 blocks.Conv2dBlock(in_channels, depth(base_width), stride=FRONT_S),
 74 |                 block(depth(base_width), depth(base_width) * 2)
 75 |             ))
 76 |         ]))
 77 | 
 78 |         for stage, stride in enumerate(strides):
 79 |             inp = depth(base_width * 2 ** (stage + 1))
 80 |             oup = depth(base_width * 2 ** (stage + 2))
 81 | 
 82 |             self.features.add_module(f'stage{stage+1}', blocks.Stage(
 83 |                 [block(
 84 |                     inp if i == 0 else oup,
 85 |                     oup,
 86 |                     stride=stride if (i == 0 and dilations[stage] == 1) else 1,
 87 |                     dilation=max(dilations[stage] // (stride if i == 0 else 1), 1)
 88 |                 ) for i in range(layers[stage])]
 89 |             ))
 90 | 
 91 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 92 |         self.classifier = nn.Sequential(
 93 |             nn.Dropout(dropout_rate, inplace=True),
 94 |             nn.Linear(oup, num_classes)
 95 |         )
 96 | 
 97 |     def forward(self, x):
 98 |         x = self.features(x)
 99 |         x = self.pool(x)
100 |         x = torch.flatten(x, 1)
101 |         x = self.classifier(x)
102 |         return x
103 | 
104 | 
105 | def _mobilenet_v1(
106 |     depth_multiplier: float = 1.0,
107 |     block: Type[Union[MobileBlock, DepthwiseSeparableBlock]] = MobileBlock,
108 |     pretrained: bool = False,
109 |     pth: str = None,
110 |     progress: bool = True,
111 |     **kwargs: Any
112 | ):
113 |     model = MobileNet(depth_multiplier=depth_multiplier, block=block, **kwargs)
114 | 
115 |     if pretrained:
116 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
117 |     return model
118 | 
119 | 
120 | @export
121 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x1_0-e00006ef.pth')
122 | def mobilenet_v1_x1_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
123 |     return _mobilenet_v1(1.0, MobileBlock, pretrained, pth, progress, **kwargs)
124 | 
125 | 
126 | @export
127 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x0_75-43c1cb04.pth')
128 | def mobilenet_v1_x0_75(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
129 |     return _mobilenet_v1(0.75, MobileBlock, pretrained, pth, progress, **kwargs)
130 | 
131 | 
132 | @export
133 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x0_5-588ee141.pth')
134 | def mobilenet_v1_x0_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
135 |     return _mobilenet_v1(0.5, MobileBlock, pretrained, pth, progress, **kwargs)
136 | 
137 | 
138 | @export
139 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x0_35-cbab38a6.pth')
140 | def mobilenet_v1_x0_35(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
141 |     return _mobilenet_v1(0.35, MobileBlock, pretrained, pth, progress, **kwargs)
142 | 
143 | 
144 | @export
145 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v1_x1_0_wo_dwrelubn-2956d795.pth')
146 | @blocks.normalizer(position='after')
147 | def mobilenet_v1_x1_0_wo_dwrelubn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs):
148 |     return _mobilenet_v1(1.0, DepthwiseSeparableBlock, pretrained, pth, progress, **kwargs)
149 | 


--------------------------------------------------------------------------------
/cvm/models/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | from .ops import blocks
  6 | from .utils import export, config, load_from_local_or_url
  7 | from .ops.functional import make_divisible
  8 | from typing import Any, OrderedDict, List
  9 | 
 10 | 
 11 | @export
 12 | class MobileNetV2(nn.Module):
 13 |     @blocks.activation(partial(nn.ReLU6, inplace=True))
 14 |     def __init__(
 15 |         self,
 16 |         in_channels: int = 3,
 17 |         num_classes: int = 1000,
 18 |         multiplier: float = 1.0,
 19 |         dropout_rate: float = 0.2,
 20 |         dilations: List[int] = None,
 21 |         thumbnail: bool = False,
 22 |         **kwargs: Any
 23 |     ):
 24 |         super().__init__()
 25 | 
 26 |         dilations = [1] + (dilations or [1, 1, 1, 1])
 27 |         assert len(dilations) == 5, ''
 28 | 
 29 |         self.block = blocks.InvertedResidualBlock
 30 | 
 31 |         FRONT_S = 1 if thumbnail else 2
 32 | 
 33 |         t = [1, 6, 6, 6, 6, 6, 6]
 34 |         c = [32, 16, 24, 32, 64, 96, 160, 320]
 35 |         n = [1, 2, 3, 4, 3, 3, 1]
 36 |         s = [1, FRONT_S, 2, 2, 1, 2, 1]
 37 |         stages = [0, 1, 1, 1, 0, 1, 0]
 38 | 
 39 |         if multiplier < 1.0:
 40 |             c = [make_divisible(x * multiplier, 8) for x in c]
 41 | 
 42 |         self.features = nn.Sequential(OrderedDict([
 43 |             ('stem', blocks.Stage(
 44 |                 blocks.Conv2dBlock(in_channels, c[0], 3, stride=FRONT_S)
 45 |             ))
 46 |         ]))
 47 | 
 48 |         for i in range(len(t)):
 49 |             layers = self.make_layers(
 50 |                 c[i],
 51 |                 t[i],
 52 |                 c[i+1],
 53 |                 n[i],
 54 |                 s[i],
 55 |                 dilations[len(self.features) + (stages[i] - 1)]
 56 |             )
 57 | 
 58 |             if stages[i]:
 59 |                 self.features.add_module(f'stage{len(self.features)}', blocks.Stage(layers))
 60 |             else:
 61 |                 self.features[-1].append(layers)
 62 | 
 63 |         self.features[-1].append(blocks.Conv2d1x1Block(c[-1], 1280))
 64 | 
 65 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 66 |         self.classifier = nn.Sequential(
 67 |             nn.Dropout(dropout_rate, inplace=True),
 68 |             nn.Linear(1280, num_classes)
 69 |         )
 70 | 
 71 |     def make_layers(self, inp: int, t: int, oup: int, n: int, stride: int, dilation: int):
 72 |         layers = [
 73 |             self.block(
 74 |                 inp,
 75 |                 oup,
 76 |                 t,
 77 |                 stride=stride if dilation == 1 else 1,
 78 |                 dilation=max(dilation // stride, 1)
 79 |             )
 80 |         ]
 81 | 
 82 |         for _ in range(n - 1):
 83 |             layers.append(self.block(oup, oup, t, dilation=dilation))
 84 | 
 85 |         return layers
 86 | 
 87 |     def forward(self, x):
 88 |         x = self.features(x)
 89 |         x = self.pool(x)
 90 |         x = torch.flatten(x, 1)
 91 |         x = self.classifier(x)
 92 | 
 93 |         return x
 94 | 
 95 | 
 96 | def _mobilenet_v2(
 97 |     multiplier: float = 1.0,
 98 |     pretrained: bool = False,
 99 |     pth: str = None,
100 |     progress: bool = True,
101 |     **kwargs: Any
102 | ):
103 |     model = MobileNetV2(multiplier=multiplier, **kwargs)
104 | 
105 |     if pretrained:
106 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
107 |     return model
108 | 
109 | 
110 | @export
111 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v2_x1_0-bf342af4.pth')
112 | def mobilenet_v2_x1_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
113 |     return _mobilenet_v2(1.0, pretrained, pth, progress, **kwargs)
114 | 
115 | 
116 | @export
117 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v2_x0_75-fdfaf351.pth')
118 | def mobilenet_v2_x0_75(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
119 |     return _mobilenet_v2(0.75, pretrained, pth, progress, **kwargs)
120 | 
121 | 
122 | @export
123 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v2_x0_5-a9d4ed71.pth')
124 | def mobilenet_v2_x0_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
125 |     return _mobilenet_v2(0.5, pretrained, pth, progress, **kwargs)
126 | 
127 | 
128 | @export
129 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1/mobilenet_v2_x0_35-9bce1f31.pth')
130 | def mobilenet_v2_x0_35(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
131 |     return _mobilenet_v2(0.35, pretrained, pth, progress, **kwargs)
132 | 


--------------------------------------------------------------------------------
/cvm/models/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .blocks import *
2 | 
3 | from . import functional


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/__init__.py:
--------------------------------------------------------------------------------
 1 | from .factory import normalizer, activation, normalizer_fn, activation_fn, norm_activation, attention, attention_fn, Nil
 2 | from .stage import Stage
 3 | from .affine import Affine, Scale
 4 | from .vanilla_conv2d import Conv2d1x1, Conv2d3x3, Conv2d1x1BN, Conv2d3x3BN, Conv2d1x1Block, Conv2dBlock
 5 | from .bottleneck import ResBasicBlockV1, BottleneckV1, ResBasicBlockV2, BottleneckV2
 6 | from .inception import InceptionA, InceptionB, InceptionC, ReductionA, ReductionB, ReductionC, InceptionResNetA, InceptionResNetB, InceptionResNetC
 7 | from .channel import Combine, ChannelChunk, ChannelSplit, ChannelShuffle, ConcatBranches, Permute
 8 | from .depthwise_separable_conv2d import DepthwiseConv2d, PointwiseConv2d, DepthwiseConv2dBN, PointwiseConv2dBN, DepthwiseBlock, PointwiseBlock
 9 | from .inverted_residual_block import InvertedResidualBlock, FusedInvertedResidualBlock
10 | from .squeeze_excite import se, SEBlock
11 | from .mlp import MlpBlock
12 | from .drop import StochasticDepth
13 | from .gaussian_blur import GaussianBlur, GaussianBlurBN, GaussianBlurBlock
14 | from .aspp import ASPP, ASPPPooling
15 | from .adder import adder2d, adder, adder2d_function
16 | from .non_local import NonLocalBlock
17 | from .interpolate import Interpolate
18 | from .gather_excite import GatherExciteBlock
19 | from .selective_kernel import SelectiveKernelBlock
20 | from .cbam import CBAM
21 | from .efficient_channel_attention import EfficientChannelAttention
22 | from .norm import LayerNorm2d
23 | from .global_context import GlobalContextBlock


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/adder.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | refer to: https://github.com/huawei-noah/AdderNet/blob/master/adder.py
 3 | 
 4 | Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.
 5 | This program is free software; you can redistribute it and/or modify
 6 | it under the terms of BSD 3-Clause License.
 7 | This program is distributed in the hope that it will be useful,
 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 | BSD 3-Clause License for more details.
11 | '''
12 | import torch
13 | import torch.nn as nn
14 | import numpy as np
15 | from torch.autograd import Function
16 | import math
17 | 
18 | 
19 | def adder2d_function(X, W, stride=1, padding=0):
20 |     n_filters, d_filter, h_filter, w_filter = W.size()
21 |     n_x, d_x, h_x, w_x = X.size()
22 | 
23 |     h_out = (h_x - h_filter + 2 * padding) / stride + 1
24 |     w_out = (w_x - w_filter + 2 * padding) / stride + 1
25 | 
26 |     h_out, w_out = int(h_out), int(w_out)
27 |     X_col = torch.nn.functional.unfold(X.view(1, -1, h_x, w_x), h_filter, dilation=1,
28 |                                        padding=padding, stride=stride).view(n_x, -1, h_out*w_out)
29 |     X_col = X_col.permute(1, 2, 0).contiguous().view(X_col.size(1), -1)
30 |     W_col = W.view(n_filters, -1)
31 | 
32 |     out = adder.apply(W_col, X_col)
33 | 
34 |     out = out.view(n_filters, h_out, w_out, n_x)
35 |     out = out.permute(3, 0, 1, 2).contiguous()
36 | 
37 |     return out
38 | 
39 | 
40 | class adder(Function):
41 |     @staticmethod
42 |     def forward(ctx, W_col, X_col):
43 |         ctx.save_for_backward(W_col, X_col)
44 |         output = -(W_col.unsqueeze(2)-X_col.unsqueeze(0)).abs().sum(1)
45 |         return output
46 | 
47 |     @staticmethod
48 |     def backward(ctx, grad_output):
49 |         W_col, X_col = ctx.saved_tensors
50 |         grad_W_col = ((X_col.unsqueeze(0)-W_col.unsqueeze(2))*grad_output.unsqueeze(1)).sum(2)
51 |         grad_W_col = grad_W_col/grad_W_col.norm(p=2).clamp(min=1e-12)*math.sqrt(W_col.size(1)*W_col.size(0))/5
52 |         grad_X_col = (-(X_col.unsqueeze(0)-W_col.unsqueeze(2)).clamp(-1, 1)*grad_output.unsqueeze(1)).sum(0)
53 | 
54 |         return grad_W_col, grad_X_col
55 | 
56 | 
57 | class adder2d(nn.Module):
58 | 
59 |     def __init__(self, input_channel, output_channel, kernel_size, stride=1, padding=0, bias=False):
60 |         super(adder2d, self).__init__()
61 |         self.stride = stride
62 |         self.padding = padding
63 |         self.input_channel = input_channel
64 |         self.output_channel = output_channel
65 |         self.kernel_size = kernel_size
66 |         self.adder = torch.nn.Parameter(nn.init.normal_(torch.randn(
67 |             output_channel, input_channel, kernel_size, kernel_size)))
68 |         self.bias = bias
69 |         if bias:
70 |             self.b = torch.nn.Parameter(nn.init.uniform_(torch.zeros(output_channel)))
71 | 
72 |     def forward(self, x):
73 |         output = adder2d_function(x, self.adder, self.stride, self.padding)
74 |         if self.bias:
75 |             output += self.b.unsqueeze(0).unsqueeze(2).unsqueeze(3)
76 | 
77 |         return output
78 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/affine.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 |     def __init__(self, dim, alpha: float = 1e-6):
 7 |         super().__init__()
 8 | 
 9 |         self.dim = dim
10 | 
11 |         self.alpha = nn.Parameter(torch.ones(dim, 1, 1).fill_(alpha))
12 | 
13 |     def forward(self, x):
14 |         return self.alpha * x
15 | 
16 |     def extra_repr(self):
17 |         return f'{self.dim}'
18 | 
19 | 
20 | class Affine(nn.Module):
21 |     def __init__(self, dim, alpha: float = 1.0, beta: float = 0.0):
22 |         super().__init__()
23 | 
24 |         self.dim = dim
25 | 
26 |         self.alpha = nn.Parameter(torch.empty(dim, 1, 1).fill_(alpha))
27 |         self.beta = nn.Parameter(torch.empty(dim, 1, 1).fill_(beta))
28 | 
29 |     def forward(self, x):
30 |         return self.alpha * x + self.beta
31 | 
32 |     def extra_repr(self):
33 |         return f'{self.dim}'
34 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/aspp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from .vanilla_conv2d import Conv2d1x1, Conv2d1x1Block, Conv2dBlock
 5 | from .channel import Combine
 6 | from typing import List
 7 | 
 8 | 
 9 | class ASPPPooling(nn.Sequential):
10 |     def __init__(self, in_channels: int, out_channels: int):
11 |         super().__init__(
12 |             nn.AdaptiveAvgPool2d(1),
13 |             Conv2d1x1Block(in_channels, out_channels)
14 |         )
15 | 
16 |     def forward(self, x):
17 |         size = x.shape[-2:]
18 |         for mod in self:
19 |             x = mod(x)
20 |         return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
21 | 
22 | 
23 | class ASPP(nn.Module):
24 |     """Atrous Spatial Pyramid Pooling"""
25 | 
26 |     def __init__(
27 |         self,
28 |         in_channels: int,
29 |         out_channels: int = 256,
30 |         rates: List[int] = [6, 12, 18]
31 |     ):
32 |         super().__init__()
33 | 
34 |         ms = [Conv2d1x1Block(in_channels, out_channels)]
35 |         for rate in rates:
36 |             ms.append(Conv2dBlock(in_channels, out_channels, padding=rate, dilation=rate))
37 | 
38 |         ms.append(ASPPPooling(in_channels, out_channels))
39 |         self.ms = nn.ModuleList(ms)
40 | 
41 |         self.combine = Combine('CONCAT')
42 |         self.conv1x1 = Conv2d1x1(out_channels * len(self.ms), out_channels)
43 | 
44 |     def forward(self, x):
45 |         aspp = []
46 |         for module in self.ms:
47 |             aspp.append(module(x))
48 | 
49 |         x = self.combine(aspp)
50 |         x = self.conv1x1(x)
51 |         return x
52 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/cbam.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from .vanilla_conv2d import Conv2d1x1
 4 | from .factory import normalizer_fn, activation_fn
 5 | from ..functional import make_divisible
 6 | 
 7 | 
 8 | class ChannelAttention(nn.Module):
 9 |     def __init__(
10 |         self,
11 |         in_channels,
12 |         rd_ratio: float = 1/8,
13 |         rd_divisor: int = 8,
14 |         gate_fn: nn.Module = nn.Sigmoid
15 |     ) -> None:
16 |         super().__init__()
17 | 
18 |         rd_channels = make_divisible(in_channels * rd_ratio, rd_divisor)
19 | 
20 |         self.max_pool = nn.AdaptiveMaxPool2d((1, 1))
21 |         self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
22 | 
23 |         self.mlp = nn.Sequential(
24 |             Conv2d1x1(in_channels, rd_channels, bias=True),
25 |             activation_fn(),
26 |             Conv2d1x1(rd_channels, in_channels, bias=True)
27 |         )
28 |         self.gate = gate_fn()
29 | 
30 |     def forward(self, x):
31 |         return x * self.gate(self.mlp(self.max_pool(x)) + self.mlp(self.avg_pool(x)))
32 | 
33 | 
34 | class SpatialAttention(nn.Module):
35 |     def __init__(
36 |         self,
37 |         kernel_size: int = 7,
38 |         gate_fn: nn.Module = nn.Sigmoid
39 |     ) -> None:
40 |         super().__init__()
41 | 
42 |         self.conv = nn.Conv2d(2, 1, kernel_size, padding=(kernel_size - 1) // 2, bias=False)
43 |         self.norm = normalizer_fn(1)
44 |         self.gate = gate_fn()
45 | 
46 |     def forward(self, x):
47 |         s = torch.cat([torch.amax(x, dim=1, keepdim=True), torch.mean(x, dim=1, keepdim=True)], dim=1)
48 |         return x * self.gate(self.norm(self.conv(s)))
49 | 
50 | 
51 | class CBAM(nn.Sequential):
52 |     r"""
53 |     Paper: CBAM: Convolutional Block Attention Module, https://arxiv.org/abs/1807.06521
54 |     Code: https://github.com/Jongchan/attention-module
55 |     """
56 | 
57 |     def __init__(
58 |         self,
59 |         in_channels,
60 |         rd_ratio,
61 |         kernel_size: int = 7,
62 |         gate_fn: nn.Module = nn.Sigmoid
63 |     ) -> None:
64 |         super().__init__(
65 |             ChannelAttention(in_channels, rd_ratio, gate_fn=gate_fn),
66 |             SpatialAttention(kernel_size=kernel_size, gate_fn=gate_fn)
67 |         )
68 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/channel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from ..functional import channel_shuffle
 4 | from typing import List
 5 | 
 6 | 
 7 | class ChannelChunk(nn.Module):
 8 |     def __init__(self, groups: int):
 9 |         super().__init__()
10 | 
11 |         self.groups = groups
12 | 
13 |     def forward(self, x: torch.Tensor):
14 |         return torch.chunk(x, self.groups, dim=1)
15 | 
16 |     def extra_repr(self):
17 |         return f'groups={self.groups}'
18 | 
19 | 
20 | class ChannelSplit(nn.Module):
21 |     def __init__(self, sections):
22 |         super().__init__()
23 | 
24 |         self.sections = sections
25 | 
26 |     def forward(self, x: torch.Tensor):
27 |         return torch.split(x, self.sections, dim=1)
28 | 
29 |     def extra_repr(self):
30 |         return f'sections={self.sections}'
31 | 
32 | 
33 | class ChannelShuffle(nn.Module):
34 |     def __init__(self, groups: int):
35 |         super().__init__()
36 | 
37 |         self.groups = groups
38 | 
39 |     def forward(self, x):
40 |         return channel_shuffle(x, self.groups)
41 | 
42 |     def extra_repr(self):
43 |         return 'groups={}'.format(self.groups)
44 | 
45 | 
46 | class Combine(nn.Module):
47 |     def __init__(self, method: str = 'ADD', *args, **kwargs):
48 |         super().__init__()
49 |         assert method in ['ADD', 'CONCAT'], ''
50 | 
51 |         self.method = method
52 |         self._combine = self._add if self.method == 'ADD' else self._cat
53 | 
54 |     @staticmethod
55 |     def _add(x):
56 |         return x[0] + x[1]
57 | 
58 |     @staticmethod
59 |     def _cat(x):
60 |         return torch.cat(x, dim=1)
61 | 
62 |     def forward(self, x):
63 |         return self._combine(x)
64 | 
65 |     def extra_repr(self):
66 |         return f'method=\'{self.method}\''
67 | 
68 | 
69 | class ConcatBranches(nn.Sequential):
70 |     def forward(self, x):
71 |         res = []
72 |         for module in self:
73 |             res.append(module(x))
74 |         return torch.cat(res, dim=1)
75 | 
76 | 
77 | class Permute(nn.Module):
78 |     def __init__(self, dims: List[int]):
79 |         super().__init__()
80 |         self.dims = dims
81 | 
82 |     def forward(self, x):
83 |         return x.permute(*self.dims)
84 | 
85 |     def extra_repr(self):
86 |         return ', '.join([str(dim) for dim in self.dims])
87 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/depthwise_separable_conv2d.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | from . import factory
  3 | 
  4 | 
  5 | class DepthwiseConv2d(nn.Conv2d):
  6 |     def __init__(
  7 |         self,
  8 |         inp,
  9 |         oup,
 10 |         kernel_size: int = 3,
 11 |         stride: int = 1,
 12 |         padding: int = None,
 13 |         dilation: int = 1,
 14 |         bias: bool = False,
 15 |     ):
 16 |         if padding is None:
 17 |             padding = ((kernel_size - 1) * (dilation - 1) + kernel_size) // 2
 18 | 
 19 |         super().__init__(
 20 |             inp, oup, kernel_size, stride=stride,
 21 |             padding=padding, dilation=dilation, bias=bias, groups=inp
 22 |         )
 23 | 
 24 | 
 25 | class PointwiseConv2d(nn.Conv2d):
 26 |     def __init__(
 27 |         self,
 28 |         inp,
 29 |         oup,
 30 |         stride: int = 1,
 31 |         bias: bool = False,
 32 |         groups: int = 1
 33 |     ):
 34 |         super().__init__(inp, oup, 1, stride=stride, padding=0, bias=bias, groups=groups)
 35 | 
 36 | 
 37 | class DepthwiseConv2dBN(nn.Sequential):
 38 |     def __init__(
 39 |         self,
 40 |         inp,
 41 |         oup,
 42 |         kernel_size: int = 3,
 43 |         stride: int = 1,
 44 |         padding: int = None,
 45 |         dilation: int = 1,
 46 |         normalizer_fn: nn.Module = None
 47 |     ):
 48 |         normalizer_fn = normalizer_fn or factory._NORMALIZER
 49 | 
 50 |         super().__init__(
 51 |             DepthwiseConv2d(inp, oup, kernel_size, stride=stride, padding=padding, dilation=dilation)
 52 |         )
 53 | 
 54 |         if normalizer_fn:
 55 |             self.add_module(str(self.__len__()), normalizer_fn(oup))
 56 | 
 57 | 
 58 | class PointwiseConv2dBN(nn.Sequential):
 59 |     def __init__(
 60 |         self,
 61 |         inp,
 62 |         oup,
 63 |         stride: int = 1,
 64 |         normalizer_fn: nn.Module = None
 65 |     ):
 66 |         normalizer_fn = normalizer_fn or factory._NORMALIZER
 67 | 
 68 |         super().__init__(
 69 |             PointwiseConv2d(inp, oup, stride=stride)
 70 |         )
 71 | 
 72 |         if normalizer_fn:
 73 |             self.add_module(str(self.__len__()), normalizer_fn(oup))
 74 | 
 75 | 
 76 | class DepthwiseBlock(nn.Sequential):
 77 |     def __init__(
 78 |         self,
 79 |         inp,
 80 |         oup,
 81 |         kernel_size: int = 3,
 82 |         stride: int = 1,
 83 |         padding: int = None,
 84 |         dilation: int = 1,
 85 |         normalizer_fn: nn.Module = None,
 86 |         activation_fn: nn.Module = None,
 87 |         norm_position: str = None
 88 |     ):
 89 |         super().__init__(
 90 |             DepthwiseConv2d(inp, oup, kernel_size, stride, padding=padding, dilation=dilation),
 91 |             *factory.norm_activation(oup, normalizer_fn, activation_fn, norm_position)
 92 |         )
 93 | 
 94 | 
 95 | class PointwiseBlock(nn.Sequential):
 96 |     def __init__(
 97 |         self,
 98 |         inp,
 99 |         oup,
100 |         stride: int = 1,
101 |         groups: int = 1,
102 |         normalizer_fn: nn.Module = None,
103 |         activation_fn: nn.Module = None,
104 |         norm_position: str = None,
105 |     ):
106 |         super().__init__(
107 |             PointwiseConv2d(inp, oup, stride=stride, groups=groups),
108 |             *factory.norm_activation(oup, normalizer_fn, activation_fn, norm_position)
109 |         )
110 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/drop.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class StochasticDepth(nn.Module):
 6 |     r"""Stochastic Depth: Drop paths per sample (when applied in main path of residual blocks)
 7 |     
 8 |     Paper:
 9 |         Deep Networks with Stochastic Depth, https://arxiv.org/abs/1603.09382
10 |     """
11 | 
12 |     def __init__(self, survival_prob: float):
13 |         super().__init__()
14 | 
15 |         self.p = survival_prob
16 | 
17 |     def forward(self, x):
18 |         if self.p == 1. or not self.training:
19 |             return x
20 | 
21 |         # work with diff dim tensors, not just 2D ConvNets
22 |         shape = (x.shape[0],) + (1,) * (x.ndim - 1)
23 | 
24 |         probs = self.p + torch.rand(shape, dtype=x.dtype, device=x.device)
25 |         # We therefore need to re-calibrate the outputs of any given function f
26 |         # by the expected number of times it participates in training, p.
27 |         return (x / self.p) * probs.floor_()
28 | 
29 |     def extra_repr(self):
30 |         return f'survival_prob={self.p}'
31 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/efficient_channel_attention.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class EfficientChannelAttention(nn.Module):
 7 |     r"""
 8 |     Paper: ECA-Net: Efficient Channel Attention for Deep Convolutional Neural Networks, https://arxiv.org/abs/1910.03151
 9 |     """
10 |     def __init__(
11 |         self,
12 |         in_channels,
13 |         gamma=2,
14 |         beta=2
15 |     ) -> None:
16 |         super().__init__()
17 | 
18 |         t = int(abs((math.log(in_channels, 2) + beta) / gamma))
19 |         k = max(t if t % 2 else t + 1, 3)
20 | 
21 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
22 |         self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2)
23 |         self.gate = nn.Sigmoid()
24 | 
25 |     def forward(self, x: torch.Tensor):
26 |         y = self.pool(x)
27 |         y = self.conv(y.view(y.shape[0], 1, -1))
28 |         y = y.view(y.shape[0], -1, 1, 1)
29 |         y = self.gate(y)
30 | 
31 |         return x * y.expand_as(x)
32 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/factory.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from contextlib import contextmanager
 3 | from functools import partial
 4 | import torch.nn as nn
 5 | from .squeeze_excite import SEBlock
 6 | 
 7 | _NORM_POSIITON: str = 'before'
 8 | _NORMALIZER: nn.Module = nn.BatchNorm2d
 9 | _ACTIVATION: nn.Module = partial(nn.ReLU, inplace=True)
10 | _ATTENTION: nn.Module = SEBlock
11 | 
12 | 
13 | class Nil:
14 |     ...
15 | 
16 | 
17 | @contextmanager
18 | def normalizer(
19 |     # _NORMALIZER can be None, Nil: _NORMALIZER->_NORMALIZER, None: _NORMALIZER->None
20 |     fn: nn.Module = Nil,
21 |     position: str = None
22 | ):
23 | 
24 |     global _NORMALIZER, _NORM_POSIITON
25 | 
26 |     fn = _NORMALIZER if fn == Nil else fn
27 |     position = position or _NORM_POSIITON
28 | 
29 |     _pre_normalizer = _NORMALIZER
30 |     _pre_position = _NORM_POSIITON
31 | 
32 |     _NORMALIZER = fn
33 |     _NORM_POSIITON = position
34 | 
35 |     yield
36 | 
37 |     _NORMALIZER = _pre_normalizer
38 |     _NORM_POSIITON = _pre_position
39 | 
40 | 
41 | @contextmanager
42 | def activation(fn: nn.Module):
43 |     global _ACTIVATION
44 | 
45 |     _pre_activation = _ACTIVATION
46 |     _ACTIVATION = fn
47 |     yield
48 |     _ACTIVATION = _pre_activation
49 | 
50 | 
51 | @contextmanager
52 | def attention(fn: nn.Module):
53 |     global _ATTENTION
54 | 
55 |     _pre_attn = _ATTENTION
56 |     _ATTENTION = fn
57 |     yield
58 |     _ATTENTION = _pre_attn
59 | 
60 | 
61 | def normalizer_fn(channels):
62 |     return _NORMALIZER(channels)
63 | 
64 | 
65 | def activation_fn():
66 |     return _ACTIVATION()
67 | 
68 | 
69 | def attention_fn(channels, **kwargs):
70 |     return _ATTENTION(channels, **kwargs)
71 | 
72 | 
73 | def norm_activation(
74 |     channels,
75 |     normalizer_fn: nn.Module = None,
76 |     activation_fn: nn.Module = None,
77 |     norm_position: str = None
78 | ) -> List[nn.Module]:
79 |     norm_position = norm_position or _NORM_POSIITON
80 |     assert norm_position in ['before', 'after', 'none'], ''
81 | 
82 |     normalizer_fn = normalizer_fn or _NORMALIZER
83 |     activation_fn = activation_fn or _ACTIVATION
84 | 
85 |     if normalizer_fn == None and activation_fn == None:
86 |         return []
87 | 
88 |     if normalizer_fn == None:
89 |         return [activation_fn()]
90 | 
91 |     if activation_fn == None:
92 |         return [normalizer_fn(channels)]
93 | 
94 |     if norm_position == 'after':
95 |         return [activation_fn(), normalizer_fn(channels)]
96 | 
97 |     return [normalizer_fn(channels), activation_fn()]
98 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/gather_excite.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from functools import partial
 3 | from contextlib import contextmanager
 4 | from torch import nn
 5 | from .depthwise_separable_conv2d import DepthwiseBlock, DepthwiseConv2dBN
 6 | from .interpolate import Interpolate
 7 | 
 8 | _GE_INNER_NONLINEAR: nn.Module = partial(nn.ReLU, inplace=True)
 9 | _GE_GATING_FN: nn.Module = nn.Sigmoid
10 | 
11 | 
12 | @contextmanager
13 | def ge(
14 |     inner_nonlinear: nn.Module = _GE_INNER_NONLINEAR,
15 |     gating_fn: nn.Module = _GE_GATING_FN
16 | ):
17 |     global _GE_INNER_NONLINEAR
18 |     global _GE_GATING_FN
19 | 
20 |     _pre_inner_fn = _GE_INNER_NONLINEAR
21 |     _pre_fn = _GE_GATING_FN
22 | 
23 |     _GE_INNER_NONLINEAR = inner_nonlinear
24 |     _GE_GATING_FN = gating_fn
25 | 
26 |     yield
27 | 
28 |     _GE_INNER_NONLINEAR = _pre_inner_fn
29 |     _GE_GATING_FN = _pre_fn
30 | 
31 | 
32 | class GatherExciteBlock(nn.Module):
33 |     r"""Gather-Excite Block
34 |     Paper: Gather-Excite: Exploiting Feature Context in Convolutional Neural Networks, https://arxiv.org/abs/1810.12348
35 |     Code: https://github.com/hujie-frank/GENet
36 |     """
37 | 
38 |     def __init__(
39 |         self,
40 |         channels,
41 |         extent_ratio: int = 0,
42 |         param_free: bool = True,
43 |         kernel_size: int = 3,
44 |         inner_activation_fn: nn.Module = None,
45 |         gating_fn: nn.Module = None
46 |     ):
47 |         super().__init__()
48 | 
49 |         inner_activation_fn = inner_activation_fn or _GE_INNER_NONLINEAR
50 |         gating_fn = gating_fn or _GE_GATING_FN
51 | 
52 |         self.gather = nn.Sequential()
53 | 
54 |         if param_free is True:
55 |             if extent_ratio == 0:
56 |                 self.gather = nn.AdaptiveAvgPool2d((1, 1))
57 |             else:
58 |                 self.gather = nn.AvgPool2d((15, 15), stride=extent_ratio)
59 |         else:
60 |             if extent_ratio == 0:
61 |                 self.gather.append(DepthwiseConv2dBN(channels, channels, kernel_size=kernel_size, padding=0))
62 |             else:
63 |                 for i in range(int(math.log2(extent_ratio))):
64 |                     if i != (int(math.log2(extent_ratio)) - 1):
65 |                         self.gather.append(DepthwiseBlock(channels, channels, kernel_size=kernel_size,
66 |                                                           stride=2, activation_fn=inner_activation_fn))
67 |                     else:
68 |                         self.gather.append(DepthwiseConv2dBN(channels, channels, kernel_size=kernel_size, stride=2))
69 | 
70 |         self.excite = Interpolate()
71 |         self.gate = gating_fn()
72 | 
73 |     def _forward(self, x):
74 |         size = x.shape[-2:]
75 | 
76 |         # gather
77 |         x = self.gather(x)
78 | 
79 |         if x.shape[-1] != 1:
80 |             x = self.excite(x, size)
81 | 
82 |         x = self.gate(x)
83 | 
84 |         return x
85 | 
86 |     def forward(self, x):
87 |         return x * self._forward(x)
88 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/gaussian_blur.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import torch.nn.functional as F
  4 | from . import factory
  5 | from ..functional import get_gaussian_kernels2d
  6 | from typing import Tuple
  7 | 
  8 | 
  9 | class GaussianBlur(nn.Module):
 10 |     def __init__(
 11 |         self,
 12 |         channels: int,
 13 |         kernel_size: int = 3,
 14 |         sigma_range: Tuple[float, float] = (1.0, 1.0),
 15 |         normalize: bool = True,
 16 |         stride: int = 1,
 17 |         padding: int = None,
 18 |         dilation: int = 1
 19 |     ):
 20 |         super().__init__()
 21 | 
 22 |         padding = padding or ((kernel_size - 1) * (dilation - 1) + kernel_size) // 2
 23 | 
 24 |         self.channels = channels
 25 |         self.kernel_size = (kernel_size, kernel_size)
 26 |         self.padding = (padding, padding)
 27 |         self.stride = (stride, stride)
 28 |         self.dilation = (dilation, dilation)
 29 |         self.padding_mode = 'zeros'
 30 |         self.sigma_range = sigma_range
 31 |         self.normalize = normalize
 32 | 
 33 |         self.register_buffer(
 34 |             'weight',
 35 |             get_gaussian_kernels2d(
 36 |                 kernel_size,
 37 |                 torch.linspace(self.sigma_range[0], self.sigma_range[1], self.channels).view(-1, 1, 1, 1),
 38 |                 self.normalize
 39 |             )
 40 |         )
 41 | 
 42 |     def forward(self, x):
 43 |         return F.conv2d(x, self.weight, None, self.stride, self.padding, self.dilation, self.channels)
 44 | 
 45 |     @property
 46 |     def out_channels(self):
 47 |         return self.channels
 48 | 
 49 |     def extra_repr(self):
 50 |         s = ('{channels}, kernel_size={kernel_size}'
 51 |              ', sigma_range={sigma_range}, normalize={normalize}, stride={stride}')
 52 |         if self.padding != (0,) * len(self.padding):
 53 |             s += ', padding={padding}'
 54 |         if self.dilation != (1,) * len(self.dilation):
 55 |             s += ', dilation={dilation}'
 56 |         if self.padding_mode != 'zeros':
 57 |             s += ', padding_mode={padding_mode}'
 58 |         return s.format(**self.__dict__)
 59 | 
 60 | 
 61 | class GaussianBlurBN(nn.Sequential):
 62 |     def __init__(
 63 |         self,
 64 |         channels,
 65 |         kernel_size: int = 3,
 66 |         sigma_range: Tuple[float, float] = (1.0, 1.0),
 67 |         normalize: bool = True,
 68 |         stride: int = 1,
 69 |         padding: int = None,
 70 |         dilation: int = 1,
 71 |         normalizer_fn: nn.Module = None
 72 |     ):
 73 |         normalizer_fn = normalizer_fn or factory._NORMALIZER
 74 | 
 75 |         super().__init__(
 76 |             GaussianBlur(channels, kernel_size, sigma_range, normalize,
 77 |                          stride=stride, padding=padding, dilation=dilation),
 78 |             normalizer_fn(channels)
 79 |         )
 80 | 
 81 | 
 82 | class GaussianBlurBlock(nn.Sequential):
 83 |     def __init__(
 84 |         self,
 85 |         channels,
 86 |         kernel_size: int = 3,
 87 |         sigma_range: Tuple[float, float] = (1.0, 1.0),
 88 |         normalize: bool = True,
 89 |         stride: int = 1,
 90 |         padding: int = None,
 91 |         dilation: int = 1,
 92 |         normalizer_fn: nn.Module = None,
 93 |         activation_fn: nn.Module = None,
 94 |         norm_position: str = None
 95 |     ):
 96 |         super().__init__(
 97 |             GaussianBlur(channels, kernel_size, sigma_range, normalize,
 98 |                          stride=stride, padding=padding, dilation=dilation),
 99 |             *factory.norm_activation(channels, normalizer_fn, activation_fn, norm_position)
100 |         )
101 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/global_context.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from .vanilla_conv2d import Conv2d1x1
 4 | from .norm import LayerNorm2d
 5 | from ..functional import make_divisible
 6 | 
 7 | 
 8 | class GlobalContextBlock(nn.Module):
 9 |     r"""
10 |     Paper: GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond, https://arxiv.org/abs/1904.11492
11 |     """
12 | 
13 |     def __init__(
14 |         self,
15 |         in_channels,
16 |         rd_ratio: float = 1/8,
17 |         rd_divisor: int = 8,
18 |     ) -> None:
19 |         super().__init__()
20 | 
21 |         channels = make_divisible(in_channels * rd_ratio, rd_divisor)
22 | 
23 |         self.conv1x1 = Conv2d1x1(in_channels, 1, bias=True)
24 |         self.softmax = nn.Softmax(dim=1)
25 | 
26 |         self.transform = nn.Sequential(
27 |             Conv2d1x1(in_channels, channels),
28 |             LayerNorm2d(channels),
29 |             nn.ReLU(inplace=True),
30 |             Conv2d1x1(channels, in_channels)
31 |         )
32 | 
33 |     def forward(self, x):
34 |         # context modeling
35 |         c = torch.einsum(
36 |             "ncx, nxo -> nco",
37 |             x.view(x.shape[0], x.shape[1], -1),
38 |             self.softmax(self.conv1x1(x).view(x.shape[0], -1, 1))
39 |         )
40 |         c = x * c.unsqueeze(-1)
41 | 
42 |         # transform
43 |         return x + self.transform(c)
44 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/interpolate.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch.nn.functional as F
 3 | from typing import Optional, List
 4 | 
 5 | 
 6 | class Interpolate(nn.Module):
 7 |     def __init__(self,  mode='nearest') -> None:
 8 |         super().__init__()
 9 | 
10 |         self.mode = mode
11 | 
12 |     def forward(self, x, size: Optional[int] = None, scale_factor: Optional[List[float]] = None):
13 |         return F.interpolate(x, size=size, scale_factor=scale_factor, mode=self.mode)
14 | 
15 |     def extra_repr(self) -> str:
16 |         return 'mode=' + self.mode
17 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/inverted_residual_block.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from . import factory
  3 | from .vanilla_conv2d import Conv2d1x1Block, Conv2d1x1BN, Conv2dBlock
  4 | from .depthwise_separable_conv2d import DepthwiseBlock, DepthwiseConv2dBN
  5 | from .squeeze_excite import SEBlock
  6 | from .channel import Combine
  7 | from .drop import StochasticDepth
  8 | 
  9 | 
 10 | class InvertedResidualBlock(nn.Module):
 11 |     def __init__(
 12 |         self,
 13 |         inp,
 14 |         oup,
 15 |         t,
 16 |         kernel_size: int = 3,
 17 |         stride: int = 1,
 18 |         padding: int = None,
 19 |         dilation: int = 1,
 20 |         rd_ratio: float = None,
 21 |         se_ind: bool = False,
 22 |         survival_prob: float = None,
 23 |         normalizer_fn: nn.Module = None,
 24 |         activation_fn: nn.Module = None,
 25 |         dw_se_act: nn.Module = None
 26 |     ):
 27 |         super().__init__()
 28 | 
 29 |         self.inp = inp
 30 |         self.planes = int(self.inp * t)
 31 |         self.oup = oup
 32 |         self.stride = stride
 33 |         self.apply_residual = (self.stride == 1) and (self.inp == self.oup)
 34 |         self.rd_ratio = rd_ratio if se_ind or rd_ratio is None else (rd_ratio / t)
 35 |         self.has_attn = (self.rd_ratio is not None) and (self.rd_ratio > 0) and (self.rd_ratio <= 1)
 36 | 
 37 |         normalizer_fn = normalizer_fn or factory._NORMALIZER
 38 |         activation_fn = activation_fn or factory._ACTIVATION
 39 | 
 40 |         layers = []
 41 |         if t != 1:
 42 |             layers.append(Conv2d1x1Block(inp, self.planes, normalizer_fn=normalizer_fn, activation_fn=activation_fn))
 43 | 
 44 |         if dw_se_act is None:
 45 |             layers.append(DepthwiseBlock(self.planes, self.planes, kernel_size, stride=self.stride,
 46 |                                          padding=padding, dilation=dilation, normalizer_fn=normalizer_fn, activation_fn=activation_fn))
 47 |         else:
 48 |             layers.append(DepthwiseConv2dBN(self.planes, self.planes, kernel_size, stride=self.stride, padding=padding,
 49 |                                             dilation=dilation, normalizer_fn=normalizer_fn))
 50 | 
 51 |         if self.has_attn:
 52 |             layers.append(SEBlock(self.planes, rd_ratio=self.rd_ratio))
 53 | 
 54 |         if dw_se_act:
 55 |             layers.append(dw_se_act())
 56 | 
 57 |         layers.append(Conv2d1x1BN(self.planes, oup, normalizer_fn=normalizer_fn))
 58 | 
 59 |         if self.apply_residual and survival_prob:
 60 |             layers.append(StochasticDepth(survival_prob))
 61 | 
 62 |         self.branch1 = nn.Sequential(*layers)
 63 |         self.branch2 = nn.Identity() if self.apply_residual else None
 64 |         self.combine = Combine('ADD') if self.apply_residual else None
 65 | 
 66 |     def forward(self, x):
 67 |         if self.apply_residual:
 68 |             return self.combine([self.branch2(x), self.branch1(x)])
 69 |         else:
 70 |             return self.branch1(x)
 71 | 
 72 | 
 73 | class FusedInvertedResidualBlock(nn.Module):
 74 |     def __init__(
 75 |         self,
 76 |         inp,
 77 |         oup,
 78 |         t,
 79 |         kernel_size: int = 3,
 80 |         stride: int = 1,
 81 |         padding: int = None,
 82 |         rd_ratio: float = None,
 83 |         se_ind: bool = False,
 84 |         survival_prob: float = None,
 85 |         normalizer_fn: nn.Module = None,
 86 |         activation_fn: nn.Module = None
 87 |     ):
 88 |         super().__init__()
 89 | 
 90 |         self.inp = inp
 91 |         self.planes = int(self.inp * t)
 92 |         self.oup = oup
 93 |         self.stride = stride
 94 |         self.padding = padding if padding is not None else (kernel_size // 2)
 95 |         self.apply_residual = (self.stride == 1) and (self.inp == self.oup)
 96 |         self.rd_ratio = rd_ratio if se_ind or rd_ratio is None else (rd_ratio / t)
 97 |         self.has_attn = (self.rd_ratio is not None) and (self.rd_ratio > 0) and (self.rd_ratio <= 1)
 98 | 
 99 |         normalizer_fn = normalizer_fn or factory._NORMALIZER
100 |         activation_fn = activation_fn or factory._ACTIVATION
101 | 
102 |         layers = [
103 |             Conv2dBlock(inp, self.planes, kernel_size, stride=self.stride, padding=self.padding,
104 |                         normalizer_fn=normalizer_fn, activation_fn=activation_fn)
105 |         ]
106 | 
107 |         if self.has_attn:
108 |             layers.append(SEBlock(self.planes, rd_ratio=self.rd_ratio))
109 | 
110 |         layers.append(Conv2d1x1BN(
111 |             self.planes, oup, normalizer_fn=normalizer_fn))
112 | 
113 |         if self.apply_residual and survival_prob:
114 |             layers.append(StochasticDepth(survival_prob))
115 | 
116 |         self.branch1 = nn.Sequential(*layers)
117 |         self.branch2 = nn.Identity() if self.apply_residual else None
118 |         self.combine = Combine('ADD') if self.apply_residual else None
119 | 
120 |     def forward(self, x):
121 |         if self.apply_residual:
122 |             return self.combine([self.branch2(x), self.branch1(x)])
123 |         else:
124 |             return self.branch1(x)
125 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/mlp.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from typing import OrderedDict
 3 | 
 4 | 
 5 | class MlpBlock(nn.Sequential):
 6 |     def __init__(
 7 |         self,
 8 |         in_features,
 9 |         hidden_features=None,
10 |         out_features=None,
11 |         activation_fn: nn.Module = nn.GELU,
12 |         dropout_rate: float = 0.
13 |     ):
14 |         hidden_features = hidden_features or in_features
15 |         out_features = out_features or in_features
16 | 
17 |         super().__init__(
18 |             nn.Linear(in_features, hidden_features),
19 |             activation_fn(),
20 |             nn.Dropout(dropout_rate),
21 |             nn.Linear(hidden_features, out_features),
22 |             nn.Dropout(dropout_rate)
23 |         )
24 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/non_local.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from .vanilla_conv2d import Conv2d1x1
 4 | from ..functional import make_divisible
 5 | 
 6 | 
 7 | class NonLocalBlock(nn.Module):
 8 |     r"""Non-Local Block for image classification
 9 |     Paper: Non-local Neural Networks, https://arxiv.org/abs/1711.07971
10 |     Code: https://github.com/facebookresearch/video-nonlocal-net
11 |     """
12 | 
13 |     def __init__(
14 |         self,
15 |         in_channels,
16 |         rd_ratio,
17 |         rd_divisor: int = 8,
18 |         use_scale: bool = True,
19 |         use_norm: bool = True
20 |     ):
21 |         super().__init__()
22 | 
23 |         channels = make_divisible(in_channels * rd_ratio, rd_divisor)
24 | 
25 |         self.ratio = rd_ratio
26 |         self.scale = channels ** -0.5 if use_scale else 1.0
27 |         self.use_scale = use_scale
28 | 
29 |         # theta, phi, g
30 |         self.W = Conv2d1x1(in_channels, channels * 3, bias=True)
31 | 
32 |         # z
33 |         self.Z = nn.Sequential(
34 |             Conv2d1x1(channels, in_channels, bias=not use_norm),
35 |             nn.BatchNorm2d(in_channels) if use_norm else nn.Identity()
36 |         )
37 | 
38 |         self.reset_parameters()
39 | 
40 |     def reset_parameters(self):
41 |         for name, m in self.named_modules():
42 |             if isinstance(m, nn.Conv2d):
43 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
44 |                 if len(list(m.parameters())) > 1:
45 |                     nn.init.constant_(m.bias, 0.0)
46 |             elif isinstance(m, nn.BatchNorm2d):
47 |                 nn.init.constant_(m.weight, 0.0)
48 |                 nn.init.constant_(m.bias, 0.0)
49 |             elif isinstance(m, nn.GroupNorm):
50 |                 nn.init.constant_(m.weight, 0.0)
51 |                 nn.init.constant_(m.bias, 0.0)
52 | 
53 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
54 |         N, _, H, W = x.size()
55 | 
56 |         # self-attention: y = softmax((Q(x) @ K(x)) / N) @ V(x). @{
57 |         t, p, g = torch.chunk(torch.flatten(self.W(x), start_dim=2), 3, dim=1)  # Q, K, V
58 | 
59 |         s = torch.einsum('ncq, nck -> nqk', t, p)
60 |         s = torch.softmax(s * self.scale, dim=2)
61 |         s = torch.einsum('nqv, ncv -> ncq', s, g)
62 |         # @}
63 | 
64 |         z = self.Z(s.contiguous().view(N, -1, H, W))
65 | 
66 |         return z + x
67 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/norm.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | class LayerNorm2d(nn.LayerNorm):
 6 |     """ LayerNorm for channels of '2D' spatial BCHW tensors """
 7 | 
 8 |     def forward(self, x):
 9 |         x = x.permute(0, 2, 3, 1)
10 |         x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
11 |         x = x.permute(0, 3, 1, 2)
12 |         return x
13 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/selective_kernel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from .vanilla_conv2d import Conv2d1x1, Conv2d1x1Block
 4 | from .depthwise_separable_conv2d import DepthwiseBlock
 5 | from .channel import Combine
 6 | from ..functional import make_divisible
 7 | 
 8 | 
 9 | class SelectiveKernelBlock(nn.Module):
10 |     r"""
11 |     Paper: Selective Kernel Networks, https://arxiv.org/abs/1903.06586
12 |     """
13 | 
14 |     def __init__(
15 |         self,
16 |         in_channels,
17 |         rd_ratio: float = 1/8,
18 |         rd_divisor: int = 8,
19 |     ) -> None:
20 |         super().__init__()
21 | 
22 |         self.in_channels = in_channels
23 | 
24 |         rd_channels = max(make_divisible(in_channels * rd_ratio, rd_divisor), 32)
25 | 
26 |         self.conv3x3 = DepthwiseBlock(in_channels, in_channels, kernel_size=3, dilation=1)
27 |         self.conv5x5 = DepthwiseBlock(in_channels, in_channels, kernel_size=3, dilation=2)
28 | 
29 |         self.fuse = Combine('ADD')
30 | 
31 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
32 | 
33 |         self.reduce = Conv2d1x1Block(in_channels, rd_channels)
34 | 
35 |         self.qk = Conv2d1x1(rd_channels, in_channels * 2, bias=True)
36 |         self.softmax = nn.Softmax(dim=1)
37 | 
38 |     def forward(self, x):
39 |         u3 = self.conv3x3(x)
40 |         u5 = self.conv5x5(x)
41 | 
42 |         u = self.fuse([u3, u5])
43 | 
44 |         s = self.pool(u)
45 | 
46 |         z = self.reduce(s)
47 | 
48 |         ab = self.softmax(self.qk(z).view(-1, 2, self.in_channels, 1, 1))
49 | 
50 |         v = torch.sum(torch.stack([u3, u5], dim=1) * ab, dim=1)
51 | 
52 |         return v
53 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/squeeze_excite.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | from contextlib import contextmanager
 3 | from torch import nn
 4 | from . import factory
 5 | from .vanilla_conv2d import Conv2d1x1
 6 | from ..functional import make_divisible
 7 | from typing import OrderedDict
 8 | 
 9 | _SE_INNER_NONLINEAR: nn.Module = partial(nn.ReLU, inplace=True)
10 | _SE_GATING_FN: nn.Module = nn.Sigmoid
11 | _SE_DIVISOR: int = 8
12 | _SE_USE_NORM: bool = False
13 | 
14 | 
15 | @contextmanager
16 | def se(
17 |     inner_nonlinear: nn.Module = _SE_INNER_NONLINEAR,
18 |     gating_fn: nn.Module = _SE_GATING_FN,
19 |     divisor: int = _SE_DIVISOR,
20 |     use_norm: bool = _SE_USE_NORM
21 | ):
22 |     global _SE_INNER_NONLINEAR
23 |     global _SE_GATING_FN
24 |     global _SE_DIVISOR
25 |     global _SE_USE_NORM
26 | 
27 |     _pre_inner_fn = _SE_INNER_NONLINEAR
28 |     _pre_fn = _SE_GATING_FN
29 |     _pre_divisor = _SE_DIVISOR
30 |     _pre_use_norm = _SE_USE_NORM
31 |     _SE_INNER_NONLINEAR = inner_nonlinear
32 |     _SE_GATING_FN = gating_fn
33 |     _SE_DIVISOR = divisor
34 |     _SE_USE_NORM = use_norm
35 |     yield
36 |     _SE_INNER_NONLINEAR = _pre_inner_fn
37 |     _SE_GATING_FN = _pre_fn
38 |     _SE_DIVISOR = _pre_divisor
39 |     _SE_USE_NORM = _pre_use_norm
40 | 
41 | 
42 | class SEBlock(nn.Sequential):
43 |     """Squeeze-and-Excitation Block
44 |     """
45 | 
46 |     def __init__(
47 |         self,
48 |         channels,
49 |         rd_ratio,
50 |         inner_activation_fn: nn.Module = None,
51 |         gating_fn: nn.Module = None
52 |     ):
53 |         squeezed_channels = make_divisible(int(channels * rd_ratio), _SE_DIVISOR)
54 |         inner_activation_fn = inner_activation_fn or _SE_INNER_NONLINEAR
55 |         gating_fn = gating_fn or _SE_GATING_FN
56 | 
57 |         layers = OrderedDict([])
58 | 
59 |         layers['pool'] = nn.AdaptiveAvgPool2d((1, 1))
60 |         layers['reduce'] = Conv2d1x1(channels, squeezed_channels, bias=True)
61 |         if _SE_USE_NORM:
62 |             layers['norm'] = factory.normalizer_fn(squeezed_channels)
63 |         layers['act'] = inner_activation_fn()
64 |         layers['expand'] = Conv2d1x1(squeezed_channels, channels, bias=True)
65 |         layers['gate'] = gating_fn()
66 | 
67 |         super().__init__(layers)
68 | 
69 |     def _forward(self, input):
70 |         for module in self:
71 |             input = module(input)
72 |         return input
73 | 
74 |     def forward(self, x):
75 |         return x * self._forward(x)
76 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/stage.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from typing import Union, List
 3 | 
 4 | 
 5 | class Stage(nn.Sequential):
 6 |     def __init__(self, *args):
 7 |         if len(args) == 1 and isinstance(args[0], list):
 8 |             args = args[0]
 9 |         super().__init__(*args)
10 | 
11 |     def append(self, m: Union[nn.Module, List[nn.Module]]):
12 |         if isinstance(m, nn.Module):
13 |             self.add_module(str(len(self)), m)
14 |         elif isinstance(m, list):
15 |             [self.append(i) for i in m]
16 |         else:
17 |             ValueError('')
18 | 


--------------------------------------------------------------------------------
/cvm/models/ops/blocks/vanilla_conv2d.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | from . import factory
  3 | 
  4 | 
  5 | class Conv2d3x3(nn.Conv2d):
  6 |     def __init__(
  7 |         self,
  8 |         in_channels: int,
  9 |         out_channels: int,
 10 |         stride: int = 1,
 11 |         padding: int = None,
 12 |         dilation: int = 1,
 13 |         bias: bool = False,
 14 |         groups: int = 1
 15 |     ):
 16 |         padding = padding if padding is not None else dilation
 17 |         super().__init__(
 18 |             in_channels, out_channels, 3, stride=stride,
 19 |             padding=padding, dilation=dilation, bias=bias, groups=groups
 20 |         )
 21 | 
 22 | 
 23 | class Conv2d1x1(nn.Conv2d):
 24 |     def __init__(
 25 |         self,
 26 |         in_channels: int,
 27 |         out_channels: int,
 28 |         stride: int = 1,
 29 |         padding: int = 0,
 30 |         bias: bool = False,
 31 |         groups: int = 1
 32 |     ):
 33 |         super().__init__(
 34 |             in_channels, out_channels, 1, stride=stride,
 35 |             padding=padding, bias=bias, groups=groups
 36 |         )
 37 | 
 38 | 
 39 | class Conv2d3x3BN(nn.Sequential):
 40 |     def __init__(
 41 |         self,
 42 |         in_channels: int,
 43 |         out_channels: int,
 44 |         stride: int = 1,
 45 |         padding: int = None,
 46 |         dilation: int = 1,
 47 |         bias: bool = False,
 48 |         groups: int = 1,
 49 |         normalizer_fn: nn.Module = None
 50 |     ):
 51 |         normalizer_fn = normalizer_fn or factory._NORMALIZER
 52 |         padding = padding if padding is not None else dilation
 53 | 
 54 |         super().__init__(
 55 |             Conv2d3x3(in_channels, out_channels, stride=stride,
 56 |                       padding=padding, dilation=dilation, bias=bias, groups=groups)
 57 |         )
 58 |         if normalizer_fn:
 59 |             self.add_module(str(self.__len__()), normalizer_fn(out_channels))
 60 | 
 61 | 
 62 | class Conv2d1x1BN(nn.Sequential):
 63 |     def __init__(
 64 |         self,
 65 |         in_channels: int,
 66 |         out_channels: int,
 67 |         stride: int = 1,
 68 |         padding: int = 0,
 69 |         bias: bool = False,
 70 |         groups: int = 1,
 71 |         normalizer_fn: nn.Module = None
 72 |     ):
 73 |         normalizer_fn = normalizer_fn or factory._NORMALIZER
 74 | 
 75 |         super().__init__(
 76 |             Conv2d1x1(in_channels, out_channels, stride=stride,
 77 |                       padding=padding, bias=bias, groups=groups)
 78 |         )
 79 |         if normalizer_fn:
 80 |             self.add_module(str(self.__len__()), normalizer_fn(out_channels))
 81 | 
 82 | 
 83 | class Conv2d1x1Block(nn.Sequential):
 84 |     def __init__(
 85 |         self,
 86 |         in_channels: int,
 87 |         out_channels: int,
 88 |         stride: int = 1,
 89 |         padding: int = 0,
 90 |         bias: bool = False,
 91 |         groups: int = 1,
 92 |         normalizer_fn: nn.Module = None,
 93 |         activation_fn: nn.Module = None,
 94 |         norm_position: str = None
 95 |     ):
 96 |         super().__init__(
 97 |             Conv2d1x1(in_channels, out_channels, stride=stride,
 98 |                       padding=padding, bias=bias, groups=groups),
 99 |             *factory.norm_activation(out_channels, normalizer_fn, activation_fn, norm_position)
100 |         )
101 | 
102 | 
103 | class Conv2dBlock(nn.Sequential):
104 |     def __init__(
105 |         self,
106 |         in_channels,
107 |         out_channels,
108 |         kernel_size: int = 3,
109 |         stride: int = 1,
110 |         padding: int = None,
111 |         dilation: int = 1,
112 |         bias: bool = False,
113 |         groups: int = 1,
114 |         normalizer_fn: nn.Module = None,
115 |         activation_fn: nn.Module = None,
116 |         norm_position: str = None,
117 |     ):
118 |         if padding is None:
119 |             padding = ((kernel_size - 1) * (dilation - 1) + kernel_size) // 2
120 | 
121 |         super().__init__(
122 |             nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,
123 |                       bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups),
124 |             *factory.norm_activation(out_channels, normalizer_fn, activation_fn, norm_position)
125 |         )
126 | 


--------------------------------------------------------------------------------
/cvm/models/ops/functional.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import fft
  3 | 
  4 | __all__ = ['channel_shuffle', 'make_divisible',
  5 |            'get_gaussian_kernel1d', 'get_gaussian_kernel2d',
  6 |            'get_gaussian_bandpass_kernel2d', 'get_gaussian_kernels2d',
  7 |            'get_distance_grid', 'spectral_filter']
  8 | 
  9 | 
 10 | def channel_shuffle(x, groups):
 11 |     batchsize, num_channels, height, width = x.data.size()
 12 |     channels_per_group = num_channels // groups
 13 | 
 14 |     # reshape
 15 |     x = x.view(batchsize, groups,
 16 |                channels_per_group, height, width)
 17 |     x = torch.transpose(x, 1, 2).contiguous()
 18 | 
 19 |     # flatten
 20 |     x = x.view(batchsize, -1, height, width)
 21 |     return x
 22 | 
 23 | 
 24 | def make_divisible(value, divisor, min_value=None):
 25 |     if min_value is None:
 26 |         min_value = divisor
 27 | 
 28 |     new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
 29 | 
 30 |     # Make sure that round down does not go down by more than 10%.
 31 |     if new_value < 0.9 * value:
 32 |         new_value += divisor
 33 | 
 34 |     return new_value
 35 | 
 36 | 
 37 | def get_gaussian_kernel1d(kernel_size, sigma: float, normalize: bool = True):
 38 |     ksize_half = (kernel_size - 1) * 0.5
 39 | 
 40 |     x = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
 41 |     pdf = torch.exp(-0.5 * (x / sigma).pow(2))
 42 |     return pdf / pdf.sum() if normalize else pdf
 43 | 
 44 | 
 45 | def get_gaussian_kernel2d(kernel_size, sigma: float, normalize: bool = True):
 46 |     ksize_half = (kernel_size - 1) * 0.5
 47 | 
 48 |     xs = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
 49 |     ys = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
 50 | 
 51 |     x, y = torch.meshgrid(xs, ys, indexing='xy')
 52 | 
 53 |     pdf = torch.exp(-0.5 * ((x * x + y * y) / (sigma * sigma)))
 54 | 
 55 |     return pdf / pdf.sum() if normalize else pdf
 56 | 
 57 | 
 58 | def get_gaussian_bandpass_kernel2d(kernel_size, sigma: float, W: float):
 59 |     ksize_half = (kernel_size - 1) * 0.5
 60 | 
 61 |     xs = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
 62 |     ys = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
 63 | 
 64 |     x, y = torch.meshgrid(xs, ys, indexing='xy')
 65 | 
 66 |     d2 = x * x + y * y
 67 |     d = torch.sqrt(d2)
 68 | 
 69 |     return torch.exp(-((d2 - sigma * sigma) / (d * W)).pow(2))
 70 | 
 71 | 
 72 | def get_gaussian_kernels2d(kernel_size, sigma: torch.Tensor, normalize: bool = True):
 73 |     ksize_half = (kernel_size - 1) * 0.5
 74 | 
 75 |     xs = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
 76 |     ys = torch.linspace(-ksize_half, ksize_half, steps=kernel_size)
 77 | 
 78 |     x, y = torch.meshgrid(xs, ys, indexing='xy')
 79 | 
 80 |     pdf = torch.exp(-0.5 * ((x * x + y * y).repeat(sigma.shape) / torch.pow(sigma, 2)))
 81 | 
 82 |     return pdf / pdf.sum([-2, -1], keepdim=True) if normalize else pdf
 83 | 
 84 | 
 85 | def get_distance_grid(size):
 86 |     size_half = (size - 1) * 0.5
 87 | 
 88 |     xs = torch.linspace(-size_half, size_half, steps=size)
 89 |     ys = torch.linspace(-size_half, size_half, steps=size)
 90 | 
 91 |     x, y = torch.meshgrid(xs, ys, indexing='xy')
 92 | 
 93 |     return torch.sqrt(x * x + y * y)
 94 | 
 95 | 
 96 | def spectral_filter(x, callback):
 97 |     fre_x = fft.fftshift(fft.fft2(x))
 98 | 
 99 |     fre_x = callback(fre_x)
100 | 
101 |     return fft.ifft2(fft.ifftshift(fre_x)).real
102 | 


--------------------------------------------------------------------------------
/cvm/models/resmlp.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .ops import blocks
  5 | from .utils import export, config, load_from_local_or_url
  6 | from typing import Any
  7 | 
  8 | 
  9 | class Affine(nn.Module):
 10 |     def __init__(self, dim):
 11 |         super().__init__()
 12 | 
 13 |         self.alpha = nn.Parameter(torch.ones(1, 1, dim))
 14 |         self.beta = nn.Parameter(torch.zeros(1, 1, dim))
 15 | 
 16 |     def forward(self, x):
 17 |         return self.alpha * x + self.beta
 18 | 
 19 | 
 20 | class ResMlpBlock(nn.Module):
 21 |     def __init__(
 22 |         self,
 23 |         hidden_dim,
 24 |         sequence_len,
 25 |         layerscale_init: float = 1e-4,
 26 |         dropout_rate: float = 0.,
 27 |         drop_path_rate: float = 0.
 28 |     ):
 29 |         super().__init__()
 30 | 
 31 |         self.affine_1 = Affine(hidden_dim)
 32 |         self.linear_patches = nn.Linear(sequence_len, sequence_len)
 33 |         self.layerscale_1 = nn.Parameter(layerscale_init * torch.ones(hidden_dim))
 34 |         self.drop1 = blocks.StochasticDepth(1.0 - drop_path_rate)
 35 | 
 36 |         self.affine_2 = Affine(hidden_dim)
 37 |         self.mlp_channels = blocks.MlpBlock(hidden_dim, hidden_dim * 4, dropout_rate=dropout_rate)
 38 |         self.layerscale_2 = nn.Parameter(layerscale_init * torch.ones(hidden_dim))
 39 |         self.drop2 = blocks.StochasticDepth(1.0 - drop_path_rate)
 40 | 
 41 |     def forward(self, x):
 42 |         x = x + self.drop1(self.layerscale_1 * self.linear_patches(self.affine_1(x).transpose(1, 2)).transpose(1, 2))
 43 |         x = x + self.drop2(self.layerscale_2 * self.mlp_channels(self.affine_2(x)))
 44 |         return x
 45 | 
 46 | 
 47 | @export
 48 | class ResMLP(nn.Module):
 49 |     def __init__(
 50 |         self,
 51 |         image_size: int = 224,
 52 |         in_channels: int = 3,
 53 |         num_classes: int = 1000,
 54 |         patch_size: int = 32,
 55 |         hidden_dim: int = 768,
 56 |         depth: int = 12,
 57 |         dropout_rate: float = 0.,
 58 |         drop_path_rate: float = 0.,
 59 |         **kwargs: Any
 60 |     ):
 61 |         super().__init__()
 62 | 
 63 |         num_patches = (image_size // patch_size) ** 2
 64 | 
 65 |         self.stem = nn.Conv2d(in_channels, hidden_dim,
 66 |                               kernel_size=patch_size, stride=patch_size)
 67 | 
 68 |         self.blocks = nn.Sequential(
 69 |             *[ResMlpBlock(
 70 |                 hidden_dim,
 71 |                 num_patches,
 72 |                 dropout_rate=dropout_rate,
 73 |                 drop_path_rate=drop_path_rate
 74 |             ) for _ in range(depth)]
 75 |         )
 76 | 
 77 |         self.affine = Affine(hidden_dim)
 78 |         self.classifier = nn.Linear(hidden_dim, num_classes)
 79 | 
 80 |     def forward(self, x):
 81 |         x = self.stem(x)
 82 |         x = x.flatten(2).transpose(1, 2)
 83 |         x = self.blocks(x)
 84 |         x = self.affine(x)
 85 |         x = x.mean(dim=1)
 86 |         x = self.classifier(x)
 87 |         return x
 88 | 
 89 | 
 90 | def _resmlp(
 91 |     image_size: int = 224,
 92 |     patch_size: int = 16,
 93 |     hidden_dim: int = 768,
 94 |     depth: int = 12,
 95 |     pretrained: bool = False,
 96 |     pth: str = None,
 97 |     progress: bool = True,
 98 |     **kwargs: Any
 99 | ):
100 |     model = ResMLP(image_size, patch_size=patch_size,
101 |                    hidden_dim=hidden_dim, depth=depth, **kwargs)
102 | 
103 |     if pretrained:
104 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
105 |     return model
106 | 
107 | 
108 | @export
109 | def resmlp_s12_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
110 |     return _resmlp(224, 16, 384, 12, pretrained, pth, progress, **kwargs)
111 | 
112 | 
113 | @export
114 | def resmlp_s24_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
115 |     return _resmlp(224, 16, 384, 24, pretrained, pth, progress, **kwargs)
116 | 
117 | 
118 | @export
119 | def resmlp_b24_224(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
120 |     return _resmlp(224, 16, 768, 24, pretrained, pth, progress, **kwargs)
121 | 


--------------------------------------------------------------------------------
/cvm/models/rexnet.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | from .ops import blocks
  6 | from .utils import export, config, load_from_local_or_url
  7 | from typing import Any
  8 | 
  9 | 
 10 | class InvertedResidualBlock(blocks.InvertedResidualBlock):
 11 |     def __init__(
 12 |         self,
 13 |         inp,
 14 |         oup,
 15 |         t, kernel_size: int = 3,
 16 |         stride: int = 1,
 17 |         padding: int = 1,
 18 |         rd_ratio: float = None,
 19 |         se_ind: bool = True,
 20 |         dw_se_act: nn.Module = nn.ReLU6
 21 |     ):
 22 |         super().__init__(inp, oup, t, kernel_size=kernel_size, stride=stride,
 23 |                          padding=padding, rd_ratio=rd_ratio, se_ind=se_ind, dw_se_act=dw_se_act)
 24 | 
 25 |         self.apply_residual = (stride == 1) and (inp <= oup)
 26 |         self.branch2 = nn.Identity() if self.apply_residual else None
 27 |         self.combine = blocks.Combine('ADD') if self.apply_residual else None
 28 | 
 29 |     def forward(self, x):
 30 |         out = self.branch1(x)
 31 |         if self.apply_residual:
 32 |             out[:, 0:self.inp] += self.branch2(x)
 33 |         return out
 34 | 
 35 | 
 36 | @export
 37 | class ReXNet(nn.Module):
 38 | 
 39 |     @blocks.activation(partial(nn.SiLU, inplace=True))
 40 |     @blocks.se(divisor=1, use_norm=True)
 41 |     def __init__(
 42 |         self,
 43 |         in_channels: int = 3,
 44 |         num_classes: int = 1000,
 45 |         width_multiplier: float = 1.0,
 46 |         dropout_rate: float = 0.2,
 47 |         thumbnail: bool = False,
 48 |         **kwargs: Any
 49 |     ):
 50 |         super().__init__()
 51 | 
 52 |         FRONT_S = 1 if thumbnail else 2
 53 | 
 54 |         n = [2, 2, 3, 3, 5]  # repeats
 55 |         s = [FRONT_S, 2, 2, 1, 2]
 56 |         ratios = [0, 1/12, 1/12, 1/12, 1/12]
 57 | 
 58 |         self.depth = (sum(n[:]) + 1) * 3
 59 |         increase = 180 / (self.depth // 3 * 1.0)
 60 | 
 61 |         def multiplier(x): return int(round(x * width_multiplier))
 62 | 
 63 |         features = [
 64 |             blocks.Conv2dBlock(in_channels, multiplier(32), 3, FRONT_S),
 65 |             InvertedResidualBlock(multiplier(32), multiplier(16), 1)
 66 |         ]
 67 | 
 68 |         inplanes, planes = 16, 16 + increase
 69 |         for i, layers in enumerate(n):
 70 |             features.append(InvertedResidualBlock(multiplier(inplanes),
 71 |                             multiplier(planes), 6, stride=s[i], rd_ratio=ratios[i]))
 72 |             inplanes, planes = planes, planes + increase
 73 |             for _ in range(layers - 1):
 74 |                 features.append(InvertedResidualBlock(multiplier(inplanes), multiplier(planes), 6, rd_ratio=ratios[i]))
 75 |                 inplanes, planes = planes, planes + increase
 76 | 
 77 |         features.append(blocks.Conv2d1x1Block(multiplier(inplanes), multiplier(1280)))
 78 | 
 79 |         self.features = nn.Sequential(*features)
 80 | 
 81 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 82 |         self.classifier = nn.Sequential(
 83 |             nn.Dropout(dropout_rate),
 84 |             nn.Linear(multiplier(1280), num_classes)
 85 |         )
 86 | 
 87 |     def forward(self, x):
 88 |         x = self.features(x)
 89 |         x = self.pool(x)
 90 |         x = torch.flatten(x, 1)
 91 |         x = self.classifier(x)
 92 |         return x
 93 | 
 94 | 
 95 | def _rexnet(
 96 |     width_multiplier: float = 1.0,
 97 |     pretrained: bool = False,
 98 |     pth: str = None,
 99 |     progress: bool = True,
100 |     **kwargs: Any
101 | ):
102 |     model = ReXNet(width_multiplier=width_multiplier, **kwargs)
103 | 
104 |     if pretrained:
105 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
106 |     return model
107 | 
108 | 
109 | @export
110 | def rexnet_x0_9(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
111 |     return _rexnet(0.9, pretrained, pth, progress, **kwargs)
112 | 
113 | 
114 | @export
115 | def rexnet_x1_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
116 |     return _rexnet(1.0, pretrained, pth, progress, **kwargs)
117 | 
118 | 
119 | @export
120 | def rexnet_x1_3(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
121 |     return _rexnet(1.3, pretrained, pth, progress, **kwargs)
122 | 
123 | 
124 | @export
125 | def rexnet_x1_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
126 |     return _rexnet(1.5, pretrained, pth, progress, **kwargs)
127 | 
128 | 
129 | @export
130 | def rexnet_x2_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
131 |     return _rexnet(2.0, pretrained, pth, progress, **kwargs)
132 | 
133 | 
134 | class PlainBlock(nn.Sequential):
135 |     def __init__(self, inplanes, planes, stride: int = 1):
136 |         super().__init__(
137 |             blocks.DepthwiseConv2d(inplanes, inplanes, stride=stride),
138 |             nn.BatchNorm2d(inplanes),
139 |             nn.ReLU(inplace=True),
140 |             blocks.PointwiseBlock(inplanes, planes),
141 |             nn.BatchNorm2d(planes),
142 |             nn.SiLU(inplace=True)
143 |         )
144 | 
145 | 
146 | @export
147 | class ReXNetPlain(nn.Module):
148 |     def __init__(
149 |         self,
150 |         in_channels: int = 3,
151 |         num_classes: int = 1000,
152 |         dropout_rate: float = 0.2,
153 |         thumbnail: bool = False,
154 |         **kwargs: Any
155 |     ):
156 |         super().__init__()
157 | 
158 |         FRONT_S = 1 if thumbnail else 2
159 | 
160 |         self.features = nn.Sequential(
161 |             blocks.Conv2dBlock(in_channels, 32, stride=FRONT_S, activation_fn=partial(nn.SiLU, inplace=True)),
162 |             PlainBlock(32, 96, stride=FRONT_S),
163 |             PlainBlock(96, 144),
164 |             PlainBlock(144, 192, stride=2),
165 |             PlainBlock(192, 240),
166 |             PlainBlock(240, 288, stride=2),
167 |             PlainBlock(288, 336),
168 |             PlainBlock(336, 384),
169 |             PlainBlock(384, 432),
170 |             PlainBlock(432, 480),
171 |             PlainBlock(480, 528),
172 |             PlainBlock(528, 576, stride=2),
173 |             PlainBlock(576, 624),
174 |             PlainBlock(624, 1024),
175 |             blocks.Conv2d1x1Block(1024, 1280)
176 |         )
177 | 
178 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
179 |         self.classifier = nn.Sequential(
180 |             nn.Dropout(dropout_rate),
181 |             nn.Linear(1280, num_classes)
182 |         )
183 | 
184 |     def forward(self, x):
185 |         x = self.features(x)
186 |         x = self.pool(x)
187 |         x = torch.flatten(x, 1)
188 |         x = self.classifier(x)
189 |         return x
190 | 
191 | 
192 | @export
193 | def rexnet_plain(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
194 |     model = ReXNetPlain(**kwargs)
195 | 
196 |     if pretrained:
197 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
198 |     return model
199 | 


--------------------------------------------------------------------------------
/cvm/models/seg/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcn import *
2 | from .deeplabv3 import *
3 | from .unet import *
4 | from .deeplabv3_plus import *


--------------------------------------------------------------------------------
/cvm/models/seg/deeplabv3.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | import torch.nn as nn
  3 | from cvm import models
  4 | from ..ops import blocks
  5 | from ..utils import export, get_out_channels, load_from_local_or_url
  6 | from .heads import FCNHead, ClsHead
  7 | from .segmentation_model import SegmentationModel
  8 | 
  9 | 
 10 | class DeepLabHead(nn.Sequential):
 11 |     def __init__(
 12 |         self,
 13 |         in_channels: int,
 14 |         out_channels: int = 256,
 15 |         num_classes: int = 32,
 16 |     ):
 17 |         super().__init__(
 18 |             blocks.ASPP(in_channels, out_channels, [12, 24, 36]),
 19 |             blocks.Conv2dBlock(out_channels, out_channels),
 20 |             blocks.Conv2d1x1(out_channels, num_classes)
 21 |         )
 22 | 
 23 | 
 24 | @export
 25 | class DeepLabV3(SegmentationModel):
 26 |     ...
 27 | 
 28 | 
 29 | @export
 30 | def create_deeplabv3(
 31 |     backbone: str = 'resnet50_v1',
 32 |     num_classes: int = 21,
 33 |     aux_loss: bool = False,
 34 |     cls_loss: bool = False,
 35 |     dropout_rate: float = 0.1,
 36 |     pretrained_backbone: bool = False,
 37 |     pretrained: bool = False,
 38 |     pth: str = None,
 39 |     progress: bool = True,
 40 |     **kwargs: Any
 41 | ):
 42 |     if pretrained:
 43 |         pretrained_backbone = False
 44 | 
 45 |     backbone = models.__dict__[backbone](
 46 |         pretrained=pretrained_backbone,
 47 |         dilations=[1, 1, 2, 4],
 48 |         **kwargs
 49 |     ).features
 50 | 
 51 |     aux_head = FCNHead(get_out_channels(backbone.stage3), None, num_classes, dropout_rate) if aux_loss else None
 52 |     cls_head = ClsHead(get_out_channels(backbone.stage4), num_classes) if cls_loss else None
 53 |     decode_head = DeepLabHead(get_out_channels(backbone.stage4), num_classes=num_classes)
 54 | 
 55 |     model = DeepLabV3(backbone, [3, 4] if aux_loss else [4], decode_head, aux_head, cls_head)
 56 | 
 57 |     if pretrained:
 58 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
 59 |     return model
 60 | 
 61 | 
 62 | @export
 63 | def deeplabv3_resnet50_v1(*args, **kwargs: Any):
 64 |     return create_deeplabv3('resnet50_v1', *args, **kwargs)
 65 | 
 66 | 
 67 | @export
 68 | def deeplabv3_mobilenet_v3_small(*args, **kwargs: Any):
 69 |     return create_deeplabv3('mobilenet_v3_small', *args, **kwargs)
 70 | 
 71 | 
 72 | @export
 73 | def deeplabv3_mobilenet_v3_large(*args, **kwargs: Any):
 74 |     return create_deeplabv3('mobilenet_v3_large', *args, **kwargs)
 75 | 
 76 | 
 77 | @export
 78 | def deeplabv3_regnet_x_400mf(*args, **kwargs: Any):
 79 |     return create_deeplabv3('regnet_x_400mf', *args, **kwargs)
 80 | 
 81 | 
 82 | @export
 83 | def deeplabv3_mobilenet_v1_x1_0(*args, **kwargs: Any):
 84 |     return create_deeplabv3('mobilenet_v1_x1_0', *args, **kwargs)
 85 | 
 86 | 
 87 | @export
 88 | def deeplabv3_sd_mobilenet_v1_x1_0(*args, **kwargs: Any):
 89 |     return create_deeplabv3('sd_mobilenet_v1_x1_0', *args, **kwargs)
 90 | 
 91 | 
 92 | @export
 93 | def deeplabv3_mobilenet_v2_x1_0(*args, **kwargs: Any):
 94 |     return create_deeplabv3('mobilenet_v2_x1_0', *args, **kwargs)
 95 | 
 96 | 
 97 | @export
 98 | def deeplabv3_sd_mobilenet_v2_x1_0(*args, **kwargs: Any):
 99 |     return create_deeplabv3('sd_mobilenet_v2_x1_0', *args, **kwargs)
100 | 
101 | 
102 | @export
103 | def deeplabv3_shufflenet_v2_x2_0(*args, **kwargs: Any):
104 |     return create_deeplabv3('shufflenet_v2_x2_0', *args, **kwargs)
105 | 
106 | 
107 | @export
108 | def deeplabv3_sd_shufflenet_v2_x2_0(*args, **kwargs: Any):
109 |     return create_deeplabv3('sd_shufflenet_v2_x2_0', *args, **kwargs)
110 | 
111 | 
112 | @export
113 | def deeplabv3_efficientnet_b0(*args, **kwargs: Any):
114 |     return create_deeplabv3('efficientnet_b0', *args, **kwargs)
115 | 
116 | 
117 | @export
118 | def deeplabv3_sd_efficientnet_b0(*args, **kwargs: Any):
119 |     return create_deeplabv3('sd_efficientnet_b0', *args, **kwargs)
120 | 


--------------------------------------------------------------------------------
/cvm/models/seg/deeplabv3_plus.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | import torch
  3 | import torch.nn as nn
  4 | from cvm import models
  5 | from ..ops import blocks
  6 | from ..utils import export, get_out_channels, load_from_local_or_url
  7 | from torch.nn import functional as F
  8 | from .heads import FCNHead, ClsHead
  9 | from .segmentation_model import SegmentationModel
 10 | 
 11 | 
 12 | class DeepLabPlusHead(nn.Module):
 13 |     def __init__(
 14 |         self,
 15 |         aspp_in_channels: int,
 16 |         feautes_channels: int,
 17 |         out_channels: int = 256,
 18 |         num_classes: int = 32,
 19 |     ):
 20 |         super().__init__()
 21 | 
 22 |         self.aspp = blocks.ASPP(aspp_in_channels, out_channels, [12, 24, 36])
 23 |         self.cat = blocks.Combine('CONCAT')
 24 | 
 25 |         self.conv3x3 = blocks.Conv2d3x3(out_channels + feautes_channels, num_classes)
 26 | 
 27 |     def forward(self, x, low_level_feautes):
 28 |         size = low_level_feautes.shape[-2:]
 29 |         aspp_features = self.aspp(x)
 30 |         aspp_features = F.interpolate(aspp_features, size=size, mode="bilinear", align_corners=False)
 31 |         features = self.cat([aspp_features, low_level_feautes])
 32 |         features = self.conv3x3(features)
 33 | 
 34 |         return features
 35 | 
 36 | 
 37 | @export
 38 | class DeepLabV3Plus(SegmentationModel):
 39 |     def forward(self, x):
 40 |         size = x.shape[-2:]
 41 | 
 42 |         stages = self.backbone(x)
 43 | 
 44 |         out = self.decode_head(stages[f'stage{self.out_stages[-1]}'], stages[f'stage{self.out_stages[0]}'], )
 45 |         out = self.interpolate(out, size=size)
 46 | 
 47 |         res = {'out': out}
 48 | 
 49 |         if self.aux_head:
 50 |             aux = self.aux_head(stages[f'stage{self.out_stages[-2]}'])
 51 |             aux = self.interpolate(aux, size=size)
 52 |             res['aux'] = aux
 53 | 
 54 |         if self.cls_head:
 55 |             cls = self.cls_head(stages[f'stage{self.out_stages[-1]}'])
 56 |             cls = cls.reshape(cls.shape[0], cls.shape[1], 1, 1)
 57 |             res['out'] = out * torch.sigmoid(cls)
 58 | 
 59 |         return res
 60 | 
 61 | 
 62 | @export
 63 | def create_deeplabv3_plus(
 64 |     backbone: str = 'resnet50_v1',
 65 |     num_classes: int = 21,
 66 |     aux_loss: bool = False,
 67 |     cls_loss: bool = False,
 68 |     dropout_rate: float = 0.1,
 69 |     pretrained_backbone: bool = False,
 70 |     pretrained: bool = False,
 71 |     pth: str = None,
 72 |     progress: bool = True,
 73 |     **kwargs: Any
 74 | ):
 75 |     if pretrained:
 76 |         pretrained_backbone = False
 77 | 
 78 |     backbone = models.__dict__[backbone](
 79 |         pretrained=pretrained_backbone,
 80 |         dilations=[1, 1, 2, 4],
 81 |         **kwargs
 82 |     ).features
 83 | 
 84 |     aux_head = FCNHead(get_out_channels(backbone.stage3), None, num_classes, dropout_rate) if aux_loss else None
 85 |     cls_head = ClsHead(get_out_channels(backbone.stage4), num_classes) if cls_loss else None
 86 |     decode_head = DeepLabPlusHead(get_out_channels(backbone.stage4),
 87 |                                   get_out_channels(backbone.stage2), num_classes=num_classes)
 88 | 
 89 |     model = DeepLabV3Plus(backbone, [2, 3, 4] if aux_loss else [2, 4], decode_head, aux_head, cls_head)
 90 | 
 91 |     if pretrained:
 92 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
 93 |     return model
 94 | 
 95 | 
 96 | @export
 97 | def deeplabv3_plus_resnet50_v1(*args, **kwargs: Any):
 98 |     return create_deeplabv3_plus('resnet50_v1', *args, **kwargs)
 99 | 
100 | 
101 | @export
102 | def deeplabv3_plus_mobilenet_v3_small(*args, **kwargs: Any):
103 |     return create_deeplabv3_plus('mobilenet_v3_small', *args, **kwargs)
104 | 
105 | 
106 | @export
107 | def deeplabv3_plus_mobilenet_v3_large(*args, **kwargs: Any):
108 |     return create_deeplabv3_plus('mobilenet_v3_large', *args, **kwargs)
109 | 
110 | 
111 | @export
112 | def deeplabv3_plus_regnet_x_400mf(*args, **kwargs: Any):
113 |     return create_deeplabv3_plus('regnet_x_400mf', *args, **kwargs)
114 | 
115 | 
116 | @export
117 | def deeplabv3_plus_mobilenet_v1_x1_0(*args, **kwargs: Any):
118 |     return create_deeplabv3_plus('mobilenet_v1_x1_0', *args, **kwargs)
119 | 
120 | 
121 | @export
122 | def deeplabv3_plus_sd_mobilenet_v1_x1_0(*args, **kwargs: Any):
123 |     return create_deeplabv3_plus('sd_mobilenet_v1_x1_0', *args, **kwargs)
124 | 
125 | 
126 | @export
127 | def deeplabv3_plus_mobilenet_v2_x1_0(*args, **kwargs: Any):
128 |     return create_deeplabv3_plus('mobilenet_v2_x1_0', *args, **kwargs)
129 | 
130 | 
131 | @export
132 | def deeplabv3_plus_sd_mobilenet_v2_x1_0(*args, **kwargs: Any):
133 |     return create_deeplabv3_plus('sd_mobilenet_v2_x1_0', *args, **kwargs)
134 | 
135 | 
136 | @export
137 | def deeplabv3_plus_shufflenet_v2_x2_0(*args, **kwargs: Any):
138 |     return create_deeplabv3_plus('shufflenet_v2_x2_0', *args, **kwargs)
139 | 
140 | 
141 | @export
142 | def deeplabv3_plus_sd_shufflenet_v2_x2_0(*args, **kwargs: Any):
143 |     return create_deeplabv3_plus('sd_shufflenet_v2_x2_0', *args, **kwargs)
144 | 
145 | 
146 | @export
147 | def deeplabv3_plus_efficientnet_b0(*args, **kwargs: Any):
148 |     return create_deeplabv3_plus('efficientnet_b0', *args, **kwargs)
149 | 
150 | 
151 | @export
152 | def deeplabv3_plus_sd_efficientnet_b0(*args, **kwargs: Any):
153 |     return create_deeplabv3_plus('sd_efficientnet_b0', *args, **kwargs)
154 | 


--------------------------------------------------------------------------------
/cvm/models/seg/fcn.py:
--------------------------------------------------------------------------------
  1 | from cvm import models
  2 | from ..utils import export, get_out_channels, load_from_local_or_url
  3 | from .heads import ClsHead, FCNHead
  4 | from typing import Any
  5 | from .segmentation_model import SegmentationModel
  6 | 
  7 | 
  8 | @export
  9 | class FCN(SegmentationModel):
 10 |     ...
 11 | 
 12 | 
 13 | @export
 14 | def create_fcn(
 15 |     backbone: str = 'resnet50_v1',
 16 |     num_classes: int = 21,
 17 |     aux_loss: bool = False,
 18 |     cls_loss: bool = False,
 19 |     dropout_rate: float = 0.1,
 20 |     pretrained_backbone: bool = False,
 21 |     pretrained: bool = False,
 22 |     pth: str = None,
 23 |     progress: bool = True,
 24 |     **kwargs: Any
 25 | ):
 26 |     if pretrained:
 27 |         pretrained_backbone = False
 28 | 
 29 |     backbone = models.__dict__[backbone](
 30 |         pretrained=pretrained_backbone,
 31 |         dilations=[1, 1, 2, 4],
 32 |         **kwargs
 33 |     ).features
 34 | 
 35 |     aux_head = FCNHead(get_out_channels(backbone.stage3), None, num_classes, dropout_rate) if aux_loss else None
 36 |     cls_head = ClsHead(get_out_channels(backbone.stage4), num_classes) if cls_loss else None
 37 |     decode_head = FCNHead(get_out_channels(backbone.stage4), None, num_classes, dropout_rate)
 38 | 
 39 |     model = FCN(backbone, [3, 4] if aux_loss else [4], decode_head, aux_head, cls_head)
 40 | 
 41 |     if pretrained:
 42 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
 43 |     return model
 44 | 
 45 | 
 46 | @export
 47 | def fcn_resnet50_v1(*args, **kwargs: Any):
 48 |     return create_fcn('resnet50_v1', *args, **kwargs)
 49 | 
 50 | 
 51 | @export
 52 | def fcn_mobilenet_v3_small(*args, **kwargs: Any):
 53 |     return create_fcn('mobilenet_v3_small', *args, **kwargs)
 54 | 
 55 | 
 56 | @export
 57 | def fcn_mobilenet_v3_large(*args, **kwargs: Any):
 58 |     return create_fcn('mobilenet_v3_large', *args, **kwargs)
 59 | 
 60 | 
 61 | @export
 62 | def fcn_regnet_x_400mf(*args, **kwargs: Any):
 63 |     return create_fcn('regnet_x_400mf', *args, **kwargs)
 64 | 
 65 | 
 66 | @export
 67 | def fcn_mobilenet_v1_x1_0(*args, **kwargs: Any):
 68 |     return create_fcn('mobilenet_v1_x1_0', *args, **kwargs)
 69 | 
 70 | 
 71 | @export
 72 | def fcn_sd_mobilenet_v1_x1_0(*args, **kwargs: Any):
 73 |     return create_fcn('sd_mobilenet_v1_x1_0', *args, **kwargs)
 74 | 
 75 | 
 76 | @export
 77 | def fcn_mobilenet_v2_x1_0(*args, **kwargs: Any):
 78 |     return create_fcn('mobilenet_v2_x1_0', *args, **kwargs)
 79 | 
 80 | 
 81 | @export
 82 | def fcn_sd_mobilenet_v2_x1_0(*args, **kwargs: Any):
 83 |     return create_fcn('sd_mobilenet_v2_x1_0', *args, **kwargs)
 84 | 
 85 | 
 86 | @export
 87 | def fcn_shufflenet_v2_x2_0(*args, **kwargs: Any):
 88 |     return create_fcn('shufflenet_v2_x2_0', *args, **kwargs)
 89 | 
 90 | 
 91 | @export
 92 | def fcn_sd_shufflenet_v2_x2_0(*args, **kwargs: Any):
 93 |     return create_fcn('sd_shufflenet_v2_x2_0', *args, **kwargs)
 94 | 
 95 | 
 96 | @export
 97 | def fcn_efficientnet_b0(*args, **kwargs: Any):
 98 |     return create_fcn('efficientnet_b0', *args, **kwargs)
 99 | 
100 | 
101 | @export
102 | def fcn_sd_efficientnet_b0(*args, **kwargs: Any):
103 |     return create_fcn('sd_efficientnet_b0', *args, **kwargs)
104 | 


--------------------------------------------------------------------------------
/cvm/models/seg/heads.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class FCNHead(nn.Sequential):
 5 |     def __init__(
 6 |         self,
 7 |         in_channels: int = 2048,
 8 |         channels: int = None,
 9 |         num_classes: int = 32,
10 |         dropout_rate: float = 0.1,
11 |     ):
12 |         channels = channels or int(in_channels / 4.0)
13 |         super().__init__(
14 |             nn.Conv2d(in_channels, channels, 3, padding=1, bias=False),
15 |             nn.BatchNorm2d(channels),
16 |             nn.ReLU(),
17 |             nn.Dropout(dropout_rate),
18 |             nn.Conv2d(channels, num_classes, 1)
19 |         )
20 | 
21 | 
22 | class ClsHead(nn.Sequential):
23 |     def __init__(
24 |         self,
25 |         in_channels,
26 |         num_classes: int
27 |     ):
28 |         super().__init__(
29 |             nn.AdaptiveAvgPool2d((1, 1)),
30 |             nn.Flatten(1),
31 |             nn.Linear(in_channels, num_classes)
32 |         )
33 | 


--------------------------------------------------------------------------------
/cvm/models/seg/segmentation_model.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.nn import functional as F
 5 | from typing import List, Optional
 6 | from ..ops.blocks.stage import Stage
 7 | 
 8 | from torchvision.models.feature_extraction import create_feature_extractor
 9 | 
10 | __all__ = ['SegmentationModel']
11 | 
12 | 
13 | class SegmentationModel(nn.Module):
14 |     def __init__(
15 |         self,
16 |         backbone: nn.Module,
17 |         out_stages: List[int],
18 |         decode_head: nn.Module = None,
19 |         aux_head: Optional[nn.Module] = None,
20 |         cls_head: Optional[nn.Module] = None
21 |     ):
22 |         super().__init__()
23 | 
24 |         if out_stages is None:
25 |             out_stages = [4]
26 | 
27 |         self.backbone = create_feature_extractor(
28 |             backbone,
29 |             return_nodes=[f'stage{i}' for i in out_stages],
30 |             tracer_kwargs={'leaf_modules': [Stage]}
31 |         )
32 |         self.out_stages = out_stages
33 |         self.decode_head = decode_head
34 |         self.aux_head = aux_head
35 |         self.cls_head = cls_head
36 |         self.interpolate = partial(F.interpolate, mode='bilinear', align_corners=False)
37 | 
38 |     def forward(self, x):
39 |         size = x.shape[-2:]
40 | 
41 |         stages = self.backbone(x)
42 | 
43 |         out = self.decode_head(stages[f'stage{self.out_stages[-1]}'])
44 |         out = self.interpolate(out, size=size)
45 | 
46 |         res = {'out': out}
47 | 
48 |         if self.aux_head:
49 |             aux = self.aux_head(stages[f'stage{self.out_stages[-2]}'])
50 |             aux = self.interpolate(aux, size=size)
51 |             res['aux'] = aux
52 | 
53 |         if self.cls_head:
54 |             cls = self.cls_head(stages[f'stage{self.out_stages[-1]}'])
55 |             cls = cls.reshape(cls.shape[0], cls.shape[1], 1, 1)
56 |             res['out'] = out * torch.sigmoid(cls)
57 | 
58 |         return res
59 | 


--------------------------------------------------------------------------------
/cvm/models/seg/unet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from ..ops import blocks
 4 | from ..utils import export, load_from_local_or_url
 5 | from typing import Any
 6 | 
 7 | 
 8 | @export
 9 | class UNet(nn.Module):
10 |     def __init__(
11 |         self,
12 |         in_channels: int = 3,
13 |         num_classes: int = 2,
14 |         filters: int = [64, 128, 256, 512, 1024],
15 |         **kwargs: Any
16 |     ):
17 |         super().__init__()
18 | 
19 |         for i in range(4):
20 |             self.add_module(f'encode_conv{i+1}', nn.Sequential(
21 |                 blocks.Conv2dBlock(filters[i - 1] if i else in_channels, filters[i]),
22 |                 blocks.Conv2dBlock(filters[i], filters[i])
23 |             ))
24 |             self.add_module(f'down{i+1}', nn.MaxPool2d(2, 2))
25 | 
26 |         self.u = nn.Sequential(
27 |             blocks.Conv2dBlock(filters[3], filters[4]),
28 |             blocks.Conv2dBlock(filters[4], filters[4])
29 |         )
30 | 
31 |         filters.reverse()
32 |         for i in range(4):
33 |             self.add_module(f'up{i+1}', nn.ConvTranspose2d(filters[i], filters[i + 1], 4, stride=2, padding=1))
34 |             self.add_module(f'decode_conv{i+1}', nn.Sequential(
35 |                 blocks.Combine('CONCAT'),
36 |                 blocks.Conv2dBlock(filters[i], filters[i+1]),
37 |                 blocks.Conv2dBlock(filters[i + 1], filters[i + 1])
38 |             ))
39 | 
40 |         self.output = blocks.Conv2d1x1(filters[-1], num_classes, bias=True)
41 | 
42 |     def forward(self, x):
43 |         e1 = self.encode_conv1(x)
44 |         e2 = self.encode_conv2(self.down1(e1))
45 |         e3 = self.encode_conv3(self.down2(e2))
46 |         e4 = self.encode_conv4(self.down3(e3))
47 | 
48 |         u = self.u(self.down4(e4))
49 | 
50 |         d1 = self.decode_conv1([e4, self.up1(u)])
51 |         d2 = self.decode_conv2([e3, self.up2(d1)])
52 |         d3 = self.decode_conv3([e2, self.up3(d2)])
53 |         d4 = self.decode_conv4([e1, self.up4(d3)])
54 | 
55 |         return self.output(d4)
56 | 
57 | 
58 | @export
59 | def unet(
60 |     pretrained: bool = False,
61 |     pth: str = None,
62 |     progress: bool = True,
63 |     **kwargs: Any
64 | ):
65 |     model = UNet(**kwargs)
66 | 
67 |     if pretrained:
68 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
69 |     return model
70 | 


--------------------------------------------------------------------------------
/cvm/models/shufflenet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .ops import blocks
  5 | from .utils import export, config, load_from_local_or_url
  6 | from typing import Any, OrderedDict, List
  7 | 
  8 | 
  9 | class ShuffleAddBlock(nn.Module):
 10 |     def __init__(self, channels, g: int = 2):
 11 |         super().__init__()
 12 | 
 13 |         self.branch1 = nn.Sequential(OrderedDict([
 14 |             ('gconv1', blocks.Conv2d1x1Block(channels, channels, groups=g)),
 15 |             ('shuffle', blocks.ChannelShuffle(groups=g)),
 16 |             ('dwconv', blocks.DepthwiseConv2dBN(channels, channels, 3)),
 17 |             ('gconv2', blocks.Conv2d1x1BN(channels, channels, groups=g))
 18 |         ]))
 19 | 
 20 |         self.branch2 = nn.Identity()
 21 |         self.combine = blocks.Combine('ADD')
 22 |         self.relu = nn.ReLU(inplace=True)
 23 | 
 24 |     def forward(self, x):
 25 |         x = self.combine([self.branch1(x), self.branch2(x)])
 26 |         x = self.relu(x)
 27 |         return x
 28 | 
 29 | 
 30 | class ShuffleCatBlock(nn.Module):
 31 |     def __init__(
 32 |         self,
 33 |         inp,
 34 |         oup,
 35 |         g: int = 2,
 36 |         stride: int = 2,
 37 |         apply_first: bool = True
 38 |     ):
 39 |         super().__init__()
 40 | 
 41 |         g_1st = g if apply_first else 1
 42 | 
 43 |         self.branch1 = nn.Sequential(OrderedDict([
 44 |             ('gconv1', blocks.Conv2d1x1Block(inp, oup, groups=g_1st)),
 45 |             ('shuffle', blocks.ChannelShuffle(groups=g)),
 46 |             ('dwconv', blocks.DepthwiseConv2dBN(oup, oup, stride=stride)),
 47 |             ('gconv2', blocks.Conv2d1x1BN(oup, oup, groups=g))
 48 |         ]))
 49 | 
 50 |         self.branch2 = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1)
 51 |         self.combine = blocks.Combine('CONCAT')
 52 |         self.relu = nn.ReLU(inplace=True)
 53 | 
 54 |     def forward(self, x):
 55 |         x = self.combine([self.branch1(x), self.branch2(x)])
 56 |         x = self.relu(x)
 57 |         return x
 58 | 
 59 | 
 60 | @export
 61 | class ShuffleNet(nn.Module):
 62 |     def __init__(
 63 |         self,
 64 |         in_channels: int = 3,
 65 |         num_classes: int = 1000,
 66 |         repeats: List[int] = [4, 84, 4],
 67 |         channels: List[int] = [],
 68 |         g: int = 3,
 69 |         thumbnail: bool = False,
 70 |         **kwargs: Any
 71 |     ):
 72 |         super().__init__()
 73 | 
 74 |         FRONT_S = 1 if thumbnail else 2
 75 | 
 76 |         self.features = nn.Sequential(OrderedDict([
 77 |             ('stem', blocks.Conv2dBlock(in_channels, channels[0], 3, FRONT_S)),
 78 |             ('stage1', nn.MaxPool2d(kernel_size=3, stride=2, padding=1) if not thumbnail else nn.Identity()),
 79 |             ('stage2', self.make_layers(repeats[0], channels[0], channels[1], g)),
 80 |             ('stage3', self.make_layers(repeats[1], channels[1], channels[2], g)),
 81 |             ('stage4', self.make_layers(repeats[2], channels[2], channels[3], g))
 82 |         ]))
 83 | 
 84 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 85 |         self.classifier = nn.Linear(channels[3], num_classes)
 86 | 
 87 |     @staticmethod
 88 |     def make_layers(repeat, inp, oup, g):
 89 |         layers = [ShuffleCatBlock(inp, oup - inp, stride=2, g=g)]
 90 |         for _ in range(repeat - 1):
 91 |             layers.append(ShuffleAddBlock(oup, g=g))
 92 | 
 93 |         return blocks.Stage(layers)
 94 | 
 95 |     def forward(self, x):
 96 |         x = self.features(x)
 97 |         x = self.pool(x)
 98 |         x = torch.flatten(x, 1)
 99 |         x = self.classifier(x)
100 |         return x
101 | 
102 | 
103 | def _shufflenet(
104 |     repeats: List[int],
105 |     channels: List[int],
106 |     g: int,
107 |     pretrained: bool = False,
108 |     pth: str = None,
109 |     progress: bool = True,
110 |     **kwargs: Any
111 | ):
112 |     model = ShuffleNet(repeats=repeats, channels=channels, g=g, **kwargs)
113 | 
114 |     if pretrained:
115 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
116 |     return model
117 | 
118 | 
119 | @export
120 | def shufflenet_g1(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
121 |     return _shufflenet([4, 8, 4], [24, 144, 288, 576], 1, pretrained, pth, progress, **kwargs)
122 | 
123 | 
124 | @export
125 | def shufflenet_g2(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
126 |     return _shufflenet([4, 8, 4], [24, 200, 400, 800], 2, pretrained, pth, progress, **kwargs)
127 | 
128 | 
129 | @export
130 | def shufflenet_g3(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
131 |     return _shufflenet([4, 8, 4], [24, 240, 480, 960], 3, pretrained, pth, progress, **kwargs)
132 | 
133 | 
134 | @export
135 | def shufflenet_g4(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
136 |     return _shufflenet([4, 8, 4], [24, 272, 544, 1088], 4, pretrained, pth, progress, **kwargs)
137 | 
138 | 
139 | @export
140 | def shufflenet_g8(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
141 |     return _shufflenet([4, 8, 4], [24, 384, 768, 1536], 8, pretrained, pth, progress, **kwargs)
142 | 


--------------------------------------------------------------------------------
/cvm/models/shufflenetv2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .ops import blocks
  5 | from .utils import export, config, load_from_local_or_url
  6 | from typing import Any, OrderedDict, List
  7 | 
  8 | 
  9 | class ShuffleBlockV2(nn.Module):
 10 |     def __init__(
 11 |         self,
 12 |         inp,
 13 |         oup,
 14 |         stride: int = 1,
 15 |         dilation: int = 1
 16 |     ):
 17 |         super().__init__()
 18 | 
 19 |         self.inp = inp
 20 |         self.oup = oup // 2
 21 |         self.stride = stride if dilation == 1 else 1
 22 |         self.dilation = max(1, dilation // stride)
 23 |         self.split = None
 24 | 
 25 |         if stride == 1:
 26 |             self.inp = inp // 2
 27 |             self.split = blocks.ChannelChunk(2)
 28 | 
 29 |         self.branch1 = nn.Identity()
 30 |         if stride != 1:
 31 |             self.branch1 = nn.Sequential(OrderedDict([
 32 |                 ('dwconv', blocks.DepthwiseConv2dBN(self.inp, self.inp, stride=self.stride, dilation=self.dilation)),
 33 |                 ('1x1', blocks.Conv2d1x1Block(self.inp, self.oup))
 34 |             ]))
 35 | 
 36 |         self.branch2 = nn.Sequential(OrderedDict([
 37 |             ('1x1-1', blocks.Conv2d1x1Block(self.inp, self.oup)),
 38 |             ('dwconv', blocks.DepthwiseConv2dBN(self.oup, self.oup, stride=self.stride, dilation=self.dilation)),
 39 |             ('1x1-2', blocks.Conv2d1x1Block(self.oup, self.oup))
 40 |         ]))
 41 | 
 42 |         self.combine = blocks.Combine('CONCAT')
 43 |         self.shuffle = blocks.ChannelShuffle(groups=2)
 44 | 
 45 |     def forward(self, x):
 46 |         if isinstance(self.branch1, nn.Identity):
 47 |             x1, x2 = self.split(x)
 48 |             x2 = self.branch2(x2)
 49 |         else:
 50 |             x1 = self.branch1(x)
 51 |             x2 = self.branch2(x)
 52 | 
 53 |         out = self.combine([x1, x2])
 54 |         out = self.shuffle(out)
 55 |         return out
 56 | 
 57 | 
 58 | @export
 59 | class ShuffleNetV2(nn.Module):
 60 |     def __init__(
 61 |         self,
 62 |         in_channels: int = 3,
 63 |         num_classes: int = 1000,
 64 |         repeats: List[int] = [4, 8, 4],
 65 |         channels: List[int] = [24, 48, 96, 192, 1024],
 66 |         dropout_rate: float = 0.0,
 67 |         dilations: List[int] = None,
 68 |         thumbnail: bool = False,
 69 |         **kwargs: Any
 70 |     ):
 71 |         super().__init__()
 72 | 
 73 |         self.block = ShuffleBlockV2
 74 |         dilations = dilations or [1, 1, 1, 1]
 75 |         assert len(dilations) == 4, ''
 76 | 
 77 |         FRONT_S = 1 if thumbnail else 2
 78 | 
 79 |         self.features = nn.Sequential(OrderedDict([
 80 |             ('stem', blocks.Conv2dBlock(in_channels, channels[0], 3, FRONT_S)),
 81 |             ('stage1', nn.MaxPool2d(3, stride=2, padding=1) if not thumbnail else nn.Identity()),
 82 |             ('stage2', self.make_layers(repeats[0], channels[0], channels[1], dilations[1])),
 83 |             ('stage3', self.make_layers(repeats[1], channels[1], channels[2], dilations[2])),
 84 |             ('stage4', self.make_layers(repeats[2], channels[2], channels[3], dilations[3])),
 85 |         ]))
 86 | 
 87 |         self.features[-1].append(
 88 |             blocks.Conv2d1x1Block(channels[3], channels[4])
 89 |         )
 90 | 
 91 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 92 |         self.classifier = nn.Sequential(
 93 |             nn.Dropout(dropout_rate, inplace=True),
 94 |             nn.Linear(channels[4], num_classes)
 95 |         )
 96 | 
 97 |         self.features[-1].out_channels = channels[-1]
 98 |         self.features[-2].out_channels = channels[-3]
 99 |         self.features[-3].out_channels = channels[-4]
100 | 
101 |     def make_layers(self, repeat, inp, oup, dilation):
102 |         layers = [self.block(inp, oup, stride=2, dilation=dilation)]
103 |         
104 |         for _ in range(repeat - 1):
105 |             layers.append(self.block(oup, oup, dilation=dilation))
106 | 
107 |         return blocks.Stage(layers)
108 | 
109 |     def forward(self, x):
110 |         x = self.features(x)
111 |         x = self.pool(x)
112 |         x = torch.flatten(x, 1)
113 |         x = self.classifier(x)
114 |         return x
115 | 
116 | 
117 | def _shufflenet_v2(
118 |     repeats: List[int],
119 |     channels: List[int],
120 |     pretrained: bool = False,
121 |     pth: str = None,
122 |     progress: bool = True,
123 |     **kwargs: Any
124 | ):
125 |     model = ShuffleNetV2(repeats=repeats, channels=channels, **kwargs)
126 | 
127 |     if pretrained:
128 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
129 |     return model
130 | 
131 | 
132 | @export
133 | def shufflenet_v2_x0_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
134 |     return _shufflenet_v2([4, 8, 4], [24, 48, 96, 192, 1024], pretrained, pth, progress, **kwargs)
135 | 
136 | 
137 | @export
138 | def shufflenet_v2_x1_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
139 |     return _shufflenet_v2([4, 8, 4], [24, 116, 232, 464, 1024], pretrained, pth, progress, **kwargs)
140 | 
141 | 
142 | @export
143 | def shufflenet_v2_x1_5(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
144 |     return _shufflenet_v2([4, 8, 4], [24, 176, 352, 704, 1024], pretrained, pth, progress, **kwargs)
145 | 
146 | 
147 | @export
148 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.0.1-shufflenets-weights/shufflenet_v2_x2_0-35a176a6.pth')
149 | def shufflenet_v2_x2_0(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
150 |     return _shufflenet_v2([4, 8, 4], [24, 244, 488, 976, 2048], pretrained, pth, progress, **kwargs)
151 | 


--------------------------------------------------------------------------------
/cvm/models/squeezenet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .ops import blocks
 5 | from .utils import export, load_from_local_or_url
 6 | from typing import Any
 7 | 
 8 | 
 9 | class FireBlock(nn.Module):
10 |     def __init__(self, inp, oup):
11 |         super().__init__()
12 | 
13 |         planes = oup // 8
14 | 
15 |         self.squeeze = blocks.Conv2d1x1(inp, planes)
16 |         self.relu1 = nn.ReLU(inplace=True)
17 |         self.expand1x1 = blocks.Conv2d1x1(planes, oup // 2, bias=True)
18 |         self.expand3x3 = blocks.Conv2d3x3(planes, oup // 2, bias=True)
19 |         self.combine = blocks.Combine('CONCAT')
20 |         self.relu2 = nn.ReLU(inplace=True)
21 | 
22 |     def forward(self, x):
23 |         x = self.squeeze(x)
24 |         x = self.relu1(x)
25 |         x = self.combine([self.expand1x1(x), self.expand3x3(x)])
26 |         x = self.relu2(x)
27 |         return x
28 | 
29 | 
30 | @export
31 | class SqueezeNet(nn.Module):
32 |     def __init__(
33 |         self,
34 |         in_channels: int = 3,
35 |         num_classes: int = 1000,
36 |         dropout_rate: float = 0.5,
37 |         thumbnail: bool = False,
38 |         **kwargs: Any
39 |     ):
40 |         super().__init__()
41 | 
42 |         FRONT_S = 1 if thumbnail else 2
43 |         maxpool = nn.Identity() if thumbnail else nn.MaxPool2d(3, 2, ceil_mode=True)
44 | 
45 |         self.features = nn.Sequential(
46 |             nn.Conv2d(in_channels, 96, 7, stride=FRONT_S),
47 |             maxpool,
48 | 
49 |             FireBlock(96, 128),
50 |             FireBlock(128, 128),
51 |             FireBlock(128, 256),
52 | 
53 |             nn.MaxPool2d(3, stride=2, ceil_mode=True),
54 | 
55 |             FireBlock(256, 256),
56 |             FireBlock(256, 384),
57 |             FireBlock(384, 384),
58 |             FireBlock(384, 512),
59 | 
60 |             nn.MaxPool2d(3, stride=2, ceil_mode=True),
61 | 
62 |             FireBlock(512, 512)
63 |         )
64 | 
65 |         self.classifier = nn.Sequential(
66 |             nn.Dropout(dropout_rate),
67 |             blocks.Conv2d1x1(512, num_classes, bias=True),
68 |             nn.ReLU(inplace=True),
69 |             nn.AdaptiveAvgPool2d((1, 1))
70 |         )
71 | 
72 |     def forward(self, x):
73 |         x = self.features(x)
74 |         x = self.classifier(x)
75 |         return x
76 | 
77 | 
78 | @export
79 | def squeezenet(
80 |     pretrained: bool = False,
81 |     pth: str = None,
82 |     progress: bool = True,
83 |     **kwargs: Any
84 | ):
85 |     model = SqueezeNet(**kwargs)
86 | 
87 |     if pretrained:
88 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
89 |     return model
90 | 


--------------------------------------------------------------------------------
/cvm/models/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import functools
 4 | import torch
 5 | import torch.nn as nn
 6 | from .ops.blocks import Stage
 7 | 
 8 | __all__ = ['export', 'config', 'load_from_local_or_url', 'get_out_channels']
 9 | 
10 | 
11 | def export(obj):
12 |     if hasattr(sys.modules[obj.__module__], '__all__'):
13 |         assert obj.__name__ not in sys.modules[
14 |             obj.__module__].__all__, f'Duplicate name: {obj.__name__}'
15 | 
16 |         sys.modules[obj.__module__].__all__.append(obj.__name__)
17 |     else:
18 |         sys.modules[obj.__module__].__all__ = [obj.__name__]
19 |     return obj
20 | 
21 | 
22 | def config(url='', **settings):
23 |     def decorator(func):
24 |         @functools.wraps(func)
25 |         def wrapper(*args, **kwargs):
26 |             kwargs['url'] = url
27 |             # kwargs['arch'] = func.__name__
28 |             return func(*args, **{**settings, **kwargs})
29 |         return wrapper
30 | 
31 |     return decorator
32 | 
33 | 
34 | def load_from_local_or_url(model, pth=None, url=None, progress=True):
35 |     assert pth is not None or url is not None, 'The "pth" and "url" can not both be None.'
36 | 
37 |     if pth is not None:
38 |         state_dict = torch.load(os.path.expanduser(pth))
39 |     else:
40 |         state_dict = torch.hub.load_state_dict_from_url(url, progress=progress)
41 | 
42 |     model.load_state_dict(state_dict)
43 | 
44 | 
45 | def get_out_channels(module: nn.Module):
46 |     # block has out_channels
47 |     if isinstance(module, Stage) and hasattr(module, 'out_channels'):
48 |         return module.out_channels
49 | 
50 |     # or get channels of the last Conv2d
51 |     out_channels = 0
52 |     for m in module.modules():
53 |         if isinstance(m, nn.Conv2d):
54 |             out_channels = m.out_channels
55 | 
56 |     return out_channels
57 | 


--------------------------------------------------------------------------------
/cvm/models/vae/__init__.py:
--------------------------------------------------------------------------------
1 | from .vae import *
2 | from .cvae import *


--------------------------------------------------------------------------------
/cvm/models/vae/cvae.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from ..utils import export, load_from_local_or_url
 5 | from typing import Any
 6 | 
 7 | 
 8 | @export
 9 | class ConditionalVAE(nn.Module):
10 |     """
11 |         Paper: [Learning Structured Output Representation using Deep Conditional Generative Models](https://papers.nips.cc/paper/2015/hash/8d55a249e6baa5c06772297520da2051-Abstract.html)
12 |     """
13 |     def __init__(
14 |         self,
15 |         image_size,
16 |         nz: int = 100,
17 |         **kwargs: Any
18 |     ):
19 |         super().__init__()
20 | 
21 |         self.image_size = image_size
22 |         self.nz = nz
23 | 
24 |         self.embeds_en = nn.Embedding(10, 200)
25 | 
26 |         self.embeds_de = nn.Embedding(10, 10)
27 | 
28 |         # Q(z|X)
29 |         self.encoder = nn.Sequential(
30 |             nn.Linear(self.image_size ** 2 + 200, 512),
31 |             nn.LeakyReLU(0.2, inplace=True),
32 |             nn.Linear(512, 512),
33 |             nn.LeakyReLU(0.2, inplace=True),
34 |             nn.Linear(512, 256),
35 |             nn.LeakyReLU(0.2, inplace=True),
36 |             nn.Linear(256, self.nz * 2)
37 |         )
38 | 
39 |         # P(X|z)
40 |         self.decoder = nn.Sequential(
41 |             nn.Linear(self.nz + 10, 256),
42 |             nn.LeakyReLU(0.2, inplace=True),
43 |             nn.Linear(256, 512),
44 |             nn.LeakyReLU(0.2, inplace=True),
45 |             nn.Linear(512, 512),
46 |             nn.LeakyReLU(0.2, inplace=True),
47 |             nn.Linear(512, self.image_size ** 2),
48 |             nn.Sigmoid(),
49 |             nn.Unflatten(1, (1, image_size, image_size))
50 |         )
51 | 
52 |     def sample_z(self, mu, logvar, c):
53 |         eps = torch.randn_like(logvar)
54 | 
55 |         return torch.cat([mu + eps * torch.exp(0.5 * logvar), c], dim=1)
56 | 
57 |     def forward(self, x, c):
58 |         x = torch.flatten(x, 1)
59 | 
60 |         x = torch.cat([x, self.embeds_en(c)], dim=1)
61 | 
62 |         mu, logvar = torch.chunk(self.encoder(x), 2, dim=1)
63 | 
64 |         z = self.sample_z(mu, logvar, self.embeds_de(c))
65 | 
66 |         x = self.decoder(z)
67 |         return x, mu, logvar
68 | 
69 | 
70 | @export
71 | def cvae(
72 |     pretrained: bool = False,
73 |     pth: str = None,
74 |     progress: bool = True,
75 |     **kwargs: Any
76 | ):
77 |     model = ConditionalVAE(**kwargs)
78 | 
79 |     if pretrained:
80 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
81 |     return model
82 | 


--------------------------------------------------------------------------------
/cvm/models/vae/vae.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from ..utils import export, load_from_local_or_url
 5 | from typing import Any
 6 | 
 7 | 
 8 | @export
 9 | class VAE(nn.Module):
10 |     def __init__(
11 |         self,
12 |         image_size,
13 |         nz: int = 100,
14 |         **kwargs: Any
15 |     ):
16 |         super().__init__()
17 | 
18 |         self.image_size = image_size
19 |         self.nz = nz
20 | 
21 |         # Q(z|X)
22 |         self.encoder = nn.Sequential(
23 |             nn.Flatten(1),
24 |             nn.Linear(self.image_size ** 2, 512),
25 |             nn.LeakyReLU(0.2, inplace=True),
26 |             nn.Linear(512, 512),
27 |             nn.LeakyReLU(0.2, inplace=True),
28 |             nn.Linear(512, 256),
29 |             nn.LeakyReLU(0.2, inplace=True),
30 |             nn.Linear(256, self.nz * 2)
31 |         )
32 | 
33 |         # P(X|z)
34 |         self.decoder = nn.Sequential(
35 |             nn.Linear(self.nz, 256),
36 |             nn.LeakyReLU(0.2, inplace=True),
37 |             nn.Linear(256, 512),
38 |             nn.LeakyReLU(0.2, inplace=True),
39 |             nn.Linear(512, 512),
40 |             nn.LeakyReLU(0.2, inplace=True),
41 |             nn.Linear(512, self.image_size ** 2),
42 |             nn.Sigmoid(),
43 |             nn.Unflatten(1, (1, image_size, image_size))
44 |         )
45 | 
46 |     def sample_z(self, mu, logvar):
47 |         eps = torch.randn_like(logvar)
48 | 
49 |         return mu + eps * torch.exp(0.5 * logvar)
50 | 
51 |     def forward(self, x):
52 |         mu, logvar = torch.chunk(self.encoder(x), 2, dim=1)
53 | 
54 |         z = self.sample_z(mu, logvar)
55 | 
56 |         x = self.decoder(z)
57 |         return x, mu, logvar
58 | 
59 | 
60 | @export
61 | def vae(
62 |     pretrained: bool = False,
63 |     pth: str = None,
64 |     progress: bool = True,
65 |     **kwargs: Any
66 | ):
67 |     model = VAE(**kwargs)
68 | 
69 |     if pretrained:
70 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
71 |     return model
72 | 


--------------------------------------------------------------------------------
/cvm/models/vggnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .ops import blocks
  5 | from .utils import export, load_from_local_or_url
  6 | from typing import Any, List, OrderedDict
  7 | 
  8 | 
  9 | @export
 10 | class VGGNet(nn.Module):
 11 |     def __init__(
 12 |         self,
 13 |         in_channels: int = 3,
 14 |         num_classes: int = 1000,
 15 |         layers: List[int] = [1, 1, 2, 2, 2],
 16 |         dropout_rate: float = 0.5,
 17 |         thumbnail: bool = False,
 18 |         **kwargs: Any
 19 |     ):
 20 |         super().__init__()
 21 | 
 22 |         maxpool1 = nn.Identity() if thumbnail else nn.MaxPool2d(2, stride=2)
 23 |         maxpool2 = nn.Identity() if thumbnail else nn.MaxPool2d(2, stride=2)
 24 | 
 25 |         self.features = nn.Sequential(OrderedDict([
 26 |             ('stem', blocks.Stage(
 27 |                 *self.make_layers(in_channels, 64, layers[0]),
 28 |                 maxpool1
 29 |             )),
 30 |             ('stage1', blocks.Stage(
 31 |                 *self.make_layers(64, 128, layers[1]),
 32 |                 maxpool2
 33 |             )),
 34 |             ('stage2', blocks.Stage(
 35 |                 *self.make_layers(128, 256, layers[2]),
 36 |                 nn.MaxPool2d(kernel_size=2, stride=2)
 37 |             )),
 38 |             ('stage3', blocks.Stage(
 39 |                 *self.make_layers(256, 512, layers[3]),
 40 |                 nn.MaxPool2d(kernel_size=2, stride=2)
 41 |             )),
 42 |             ('stage4', blocks.Stage(
 43 |                 *self.make_layers(512, 512, layers[4]),
 44 |                 nn.MaxPool2d(kernel_size=2, stride=2)
 45 |             ))
 46 |         ]))
 47 | 
 48 |         self.pool = nn.AdaptiveAvgPool2d((7, 7))
 49 | 
 50 |         self.classifier = nn.Sequential(
 51 |             nn.Linear(512 * 7 * 7, 4096),
 52 |             nn.ReLU(inplace=True),
 53 |             nn.Dropout(dropout_rate),
 54 |             nn.Linear(4096, 4096),
 55 |             nn.ReLU(inplace=True),
 56 |             nn.Dropout(dropout_rate),
 57 |             nn.Linear(4096, num_classes)
 58 |         )
 59 | 
 60 |         self.reset_parameters()
 61 | 
 62 |     def reset_parameters(self) -> None:
 63 |         for m in self.modules():
 64 |             if isinstance(m, nn.Conv2d):
 65 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 66 |                 if m.bias is not None:
 67 |                     nn.init.constant_(m.bias, 0)
 68 |             elif isinstance(m, nn.BatchNorm2d):
 69 |                 nn.init.constant_(m.weight, 1)
 70 |                 nn.init.constant_(m.bias, 0)
 71 |             elif isinstance(m, nn.Linear):
 72 |                 nn.init.normal_(m.weight, 0, 0.01)
 73 |                 nn.init.constant_(m.bias, 0)
 74 | 
 75 |     def forward(self, x):
 76 |         x = self.features(x)
 77 |         x = self.pool(x)
 78 |         x = torch.flatten(x, 1)
 79 |         x = self.classifier(x)
 80 |         return x
 81 | 
 82 |     @staticmethod
 83 |     def make_layers(inp, oup, n):
 84 |         layers = [blocks.Conv2dBlock(inp, oup, bias=True)]
 85 | 
 86 |         for _ in range(n - 1):
 87 |             layers.append(blocks.Conv2dBlock(oup, oup, bias=True))
 88 | 
 89 |         return layers
 90 | 
 91 | 
 92 | def _vgg(
 93 |     layers: List[int],
 94 |     pretrained: bool = False,
 95 |     pth: str = None,
 96 |     progress: bool = True,
 97 |     **kwargs: Any
 98 | ):
 99 |     model = VGGNet(layers=layers, **kwargs)
100 | 
101 |     if pretrained:
102 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
103 |     return model
104 | 
105 | 
106 | @export
107 | @blocks.normalizer(None)
108 | def vgg11(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
109 |     return _vgg([1, 1, 2, 2, 2], pretrained, pth, progress, **kwargs)
110 | 
111 | 
112 | @export
113 | @blocks.normalizer(None)
114 | def vgg13(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
115 |     return _vgg([2, 2, 2, 2, 2], pretrained, pth, progress, **kwargs)
116 | 
117 | 
118 | @export
119 | @blocks.normalizer(None)
120 | def vgg16(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
121 |     return _vgg([2, 2, 3, 3, 3], pretrained, pth, progress, **kwargs)
122 | 
123 | 
124 | @export
125 | @blocks.normalizer(None)
126 | def vgg19(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
127 |     return _vgg([2, 2, 4, 4, 4], pretrained, pth, progress, **kwargs)
128 | 
129 | 
130 | @export
131 | def vgg11_bn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
132 |     return _vgg([1, 1, 2, 2, 2], pretrained, pth, progress, **kwargs)
133 | 
134 | 
135 | @export
136 | def vgg13_bn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
137 |     return _vgg([2, 2, 2, 2, 2], pretrained, pth, progress, **kwargs)
138 | 
139 | 
140 | @export
141 | def vgg16_bn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
142 |     return _vgg([2, 2, 3, 3, 3], pretrained, pth, progress, **kwargs)
143 | 
144 | 
145 | @export
146 | def vgg19_bn(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
147 |     return _vgg([2, 2, 4, 4, 4], pretrained, pth, progress, **kwargs)
148 | 


--------------------------------------------------------------------------------
/cvm/models/vision_transformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .ops import blocks
  5 | from .utils import export, config, load_from_local_or_url
  6 | from typing import Any
  7 | from functools import partial
  8 | 
  9 | 
 10 | class MultiheadSelfAttention(nn.MultiheadAttention):
 11 |     def forward(self, x):
 12 |         x, _ = super().forward(x, x, x, need_weights=False)
 13 |         return x
 14 | 
 15 | 
 16 | class EncoderBlock(nn.Module):
 17 |     def __init__(
 18 |         self,
 19 |         embed_dim,
 20 |         num_heads: int = 8,
 21 |         qkv_bias: bool = False,
 22 |         mlp_ratio: float = 4.0,
 23 |         dropout_rate: float = 0.,
 24 |         attn_dropout_rate: float = 0.,
 25 |         drop_path_rate: float = 0.,
 26 |         normalizer_fn: nn.Module = partial(nn.LayerNorm, eps=1e-6),
 27 |     ):
 28 |         super().__init__()
 29 | 
 30 |         self.msa = nn.Sequential(
 31 |             normalizer_fn(embed_dim),
 32 |             MultiheadSelfAttention(embed_dim, num_heads, dropout=attn_dropout_rate, bias=qkv_bias, batch_first=True),
 33 |             nn.Dropout(dropout_rate),
 34 |             blocks.StochasticDepth(1 - drop_path_rate)
 35 |         )
 36 | 
 37 |         self.mlp = nn.Sequential(
 38 |             normalizer_fn(embed_dim),
 39 |             blocks.MlpBlock(embed_dim, int(embed_dim * mlp_ratio), dropout_rate=dropout_rate),
 40 |             blocks.StochasticDepth(1 - drop_path_rate)
 41 |         )
 42 | 
 43 |     def forward(self, x):
 44 |         x = x + self.msa(x)
 45 |         x = x + self.mlp(x)
 46 |         return x
 47 | 
 48 | 
 49 | @export
 50 | class VisionTransformer(nn.Module):
 51 |     r"""
 52 |     Paper: An Image is Worth 16x16 Words. Transformers for Image Recognition at Scale, https://arxiv.org/abs/2010.11929
 53 |     """
 54 |     def __init__(
 55 |         self,
 56 |         image_size: int = 224,
 57 |         in_channels: int = 3,
 58 |         num_classes: int = 1000,
 59 |         patch_size: int = 16,
 60 |         hidden_dim: int = 768,
 61 |         num_blocks: int = 12,
 62 |         num_heads: int = 12,
 63 |         mlp_ratio: float = 4.,
 64 |         qkv_bias: bool = True,
 65 |         dropout_rate: float = 0.,
 66 |         attn_dropout_rate: float = 0.,
 67 |         drop_path_rate: float = 0.,
 68 |         classifier: str = 'token',
 69 |         normalizer_fn: nn.Module = partial(nn.LayerNorm, eps=1e-6),
 70 |         **kwargs: Any
 71 |     ):
 72 |         super().__init__()
 73 | 
 74 |         self.num_patches = (image_size // patch_size) ** 2
 75 |         self.classifier = classifier
 76 | 
 77 |         self.class_token = nn.Parameter(torch.zeros(1, 1, hidden_dim))
 78 |         self.positions = nn.Parameter(torch.normal(mean=0.0, std=0.02, size=[1, self.num_patches + 1, hidden_dim]))
 79 | 
 80 |         self.embedding = nn.Conv2d(in_channels, hidden_dim, patch_size, stride=patch_size)
 81 | 
 82 |         self.drop = nn.Dropout(dropout_rate)
 83 | 
 84 |         # encoder
 85 |         self.encoder = nn.Sequential(*[
 86 |             EncoderBlock(
 87 |                 hidden_dim, num_heads, qkv_bias=qkv_bias, mlp_ratio=mlp_ratio,
 88 |                 dropout_rate=dropout_rate, attn_dropout_rate=attn_dropout_rate,
 89 |                 drop_path_rate=drop_path_rate, normalizer_fn=normalizer_fn
 90 |             ) for _ in range(num_blocks)
 91 |         ])
 92 | 
 93 |         self.norm = normalizer_fn(hidden_dim)
 94 | 
 95 |         self.head = nn.Linear(hidden_dim, num_classes)
 96 | 
 97 |     def forward(self, x):
 98 |         # NCHW -> (N, hidden_dim, NP_H, NP_W)
 99 |         x = self.embedding(x)
100 |         # (N, hidden_dim, NP_H, NP_W) -> (N, hidden_dim, NP)
101 |         x = torch.flatten(x, start_dim=2)
102 |         # (N, hidden_dim, NP) -> (N, NP, hidden_dim)
103 |         x = x.permute(0, 2, 1)
104 | 
105 |         class_tokens = self.class_token.expand(x.shape[0], -1, -1)
106 |         x = torch.cat([class_tokens, x], dim=1) + self.positions
107 | 
108 |         x = self.drop(x)
109 |         x = self.encoder(x)
110 |         x = self.norm(x)
111 | 
112 |         x = x[:, 0] if self.classifier == 'token' else x.mean(dim=1)
113 |         return self.head(x)
114 | 
115 | 
116 | def _vit(
117 |     image_size: int = 224,
118 |     patch_size: int = 32,
119 |     hidden_dim: int = 768,
120 |     num_blocks: int = 12,
121 |     num_heads: int = 12,
122 |     pretrained: bool = False,
123 |     pth: str = None,
124 |     progress: bool = True,
125 |     **kwargs: Any
126 | ):
127 |     model = VisionTransformer(image_size, patch_size=patch_size, hidden_dim=hidden_dim,
128 |                               num_blocks=num_blocks, num_heads=num_heads,
129 |                               normalizer_fn=partial(nn.LayerNorm, eps=1e-6), **kwargs)
130 | 
131 |     if pretrained:
132 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
133 |     return model
134 | 
135 | 
136 | @export
137 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.1-vit-weights/torch-vit_b_32-f0b6fb13.pth')
138 | def vit_b_32(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
139 |     return _vit(224, 32, 768, 12, 12, pretrained, pth, progress, **kwargs)
140 | 
141 | 
142 | @export
143 | @config(url='https://github.com/ffiirree/cv-models/releases/download/v0.1.1-vit-weights/torch-vit_b_16-1d93d631.pth')
144 | def vit_b_16(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
145 |     return _vit(224, 16, 768, 12, 12, pretrained, pth, progress, **kwargs)
146 | 
147 | 
148 | @export
149 | def vit_l_32(pretrained: bool = True, pth: str = None, progress: bool = True, **kwargs: Any):
150 |     return _vit(224, 32, 1024, 24, 16, pretrained, pth, progress, **kwargs)
151 | 
152 | 
153 | @export
154 | def vit_l_16(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
155 |     return _vit(224, 16, 1024, 24, 16, pretrained, pth, progress, **kwargs)
156 | 
157 | 
158 | @export
159 | def vit_h_32(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
160 |     return _vit(224, 32, 1280, 32, 16, pretrained, pth, progress, **kwargs)
161 | 
162 | 
163 | @export
164 | def vit_h_16(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
165 |     return _vit(224, 16, 1280, 32, 16, pretrained, pth, progress, **kwargs)
166 | 


--------------------------------------------------------------------------------
/cvm/models/xception.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .ops import blocks
  5 | from .utils import export, load_from_local_or_url
  6 | from typing import OrderedDict, Any
  7 | 
  8 | 
  9 | class SeparableConv2d(nn.Sequential):
 10 |     def __init__(self, inplanes, planes):
 11 |         super().__init__(
 12 |             blocks.DepthwiseConv2d(inplanes, inplanes),
 13 |             blocks.PointwiseConv2d(inplanes, planes),
 14 |             nn.BatchNorm2d(planes)
 15 |         )
 16 | 
 17 | 
 18 | class XceptionBlock(nn.Module):
 19 |     def __init__(
 20 |         self,
 21 |         inp,
 22 |         oup,
 23 |         stride: int = 1,
 24 |         expand_first: bool = True,
 25 |         first_relu: bool = True
 26 |     ):
 27 |         super().__init__()
 28 | 
 29 |         layers = OrderedDict([])
 30 |         if first_relu:
 31 |             layers['relu1'] = nn.ReLU(inplace=True)
 32 | 
 33 |         planes = oup if expand_first else inp
 34 | 
 35 |         layers['conv1'] = SeparableConv2d(inp, planes)
 36 |         layers['relu2'] = nn.ReLU(inplace=True)
 37 |         layers['conv2'] = SeparableConv2d(planes, oup)
 38 | 
 39 |         self.branch1 = nn.Sequential(layers)
 40 | 
 41 |         self.branch2 = nn.Identity()
 42 | 
 43 |         if stride != 1:
 44 |             self.branch1.add_module('maxpool', nn.MaxPool2d(3, 2, padding=1))
 45 |             self.branch2 = nn.Sequential(
 46 |                 blocks.PointwiseConv2d(inp, oup, stride=2),
 47 |                 nn.BatchNorm2d(oup)
 48 |             )
 49 |         else:
 50 |             self.branch1.add_module('relu3', nn.ReLU(inplace=True))
 51 |             self.branch1.add_module('conv3', SeparableConv2d(oup, oup))
 52 | 
 53 |         self.combine = blocks.Combine('ADD')
 54 | 
 55 |     def forward(self, x):
 56 |         return self.combine([self.branch1(x), self.branch2(x)])
 57 | 
 58 | 
 59 | @export
 60 | class Xception(nn.Module):
 61 |     def __init__(
 62 |         self,
 63 |         in_channels: int = 3,
 64 |         num_classes: int = 1000,
 65 |         thumbnail: bool = False,
 66 |         **kwargs: Any
 67 |     ):
 68 |         super().__init__()
 69 | 
 70 |         FRONT_S = 1 if thumbnail else 2
 71 | 
 72 |         self.features = nn.Sequential(
 73 |             blocks.Conv2dBlock(in_channels, 32, stride=FRONT_S, padding=0),
 74 |             blocks.Conv2dBlock(32, 64, padding=0),
 75 | 
 76 |             XceptionBlock(64, 128, stride=FRONT_S, first_relu=False),
 77 |             XceptionBlock(128, 256, stride=2),
 78 |             XceptionBlock(256, 728, stride=2),
 79 | 
 80 |             *[XceptionBlock(728, 728) for _ in range(8)],
 81 | 
 82 |             XceptionBlock(728, 1024, stride=2, expand_first=False),
 83 | 
 84 |             SeparableConv2d(1024, 1536),
 85 |             nn.ReLU(inplace=True),
 86 |             SeparableConv2d(1536, 2048),
 87 |             nn.ReLU(inplace=True)
 88 |         )
 89 | 
 90 |         self.avg = nn.AdaptiveAvgPool2d((1, 1))
 91 | 
 92 |         self.classifier = nn.Linear(2048, num_classes)
 93 | 
 94 |     def forward(self, x):
 95 |         x = self.features(x)
 96 |         x = self.avg(x)
 97 |         x = torch.flatten(x, 1)
 98 |         x = self.classifier(x)
 99 |         return x
100 | 
101 | 
102 | @export
103 | def xception(pretrained: bool = False, pth: str = None, progress: bool = True, **kwargs: Any):
104 |     model = Xception(**kwargs)
105 | 
106 |     if pretrained:
107 |         load_from_local_or_url(model, pth, kwargs.get('url', None), progress)
108 |     return model
109 | 


--------------------------------------------------------------------------------
/cvm/scheduler/__init__.py:
--------------------------------------------------------------------------------
1 | from .cosine_lr import *
2 | from .step_lr import *


--------------------------------------------------------------------------------
/cvm/scheduler/cosine_lr.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import warnings
 3 | import torch.optim as optim
 4 | 
 5 | 
 6 | __all__ = ['WarmUpCosineLR']
 7 | 
 8 | 
 9 | class WarmUpCosineLR(optim.lr_scheduler._LRScheduler):
10 |     def __init__(self, optimizer, warmup_steps, steps, min_lr=0.1, last_epoch=-1, verbose=False):
11 |         self.warmup_steps = warmup_steps
12 |         self.steps = steps - self.warmup_steps
13 |         self.min_lr = min_lr
14 |         super().__init__(optimizer, last_epoch, verbose)
15 | 
16 |     def get_lr(self):
17 |         if not self._get_lr_called_within_step:
18 |             warnings.warn("To get the last learning rate computed by the scheduler, "
19 |                           "please use `get_last_lr()`.", UserWarning)
20 | 
21 |         if self.last_epoch < self.warmup_steps:
22 |             return [base_lr * (float(1 + self.last_epoch) / self.warmup_steps) for base_lr in self.base_lrs]
23 | 
24 |         return [self.min_lr + (base_lr - self.min_lr) * (1 + math.cos(math.pi * (1 + self.last_epoch - self.warmup_steps) / self.steps)) / 2
25 |                 for base_lr in self.base_lrs]
26 | 
27 |     def __repr__(self) -> str:
28 |         return f'WarmUpCosineLR(warmup_steps={self.warmup_steps}, steps={self.steps}, min_lr={self.min_lr})'
29 | 


--------------------------------------------------------------------------------
/cvm/scheduler/step_lr.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | import torch.optim as optim
 3 | 
 4 | 
 5 | __all__ = ['WarmUpStepLR']
 6 | 
 7 | 
 8 | class WarmUpStepLR(optim.lr_scheduler._LRScheduler):
 9 |     def __init__(self, optimizer, warmup_steps, step_size, gamma=0.1, last_epoch=-1, verbose=False):
10 |         self.warmup_steps = warmup_steps
11 |         self.step_size = step_size
12 |         self.gamma = gamma
13 |         super().__init__(optimizer, last_epoch, verbose)
14 | 
15 |     def get_lr(self):
16 |         if not self._get_lr_called_within_step:
17 |             warnings.warn("To get the last learning rate computed by the scheduler, "
18 |                           "please use `get_last_lr()`.", UserWarning)
19 | 
20 |         if self.last_epoch < self.warmup_steps:
21 |             return [base_lr * (float(1 + self.last_epoch) / self.warmup_steps) for base_lr in self.base_lrs]
22 | 
23 |         milestone = ((self.last_epoch - self.warmup_steps) // self.step_size)
24 |         return [base_lr * self.gamma ** milestone for base_lr in self.base_lrs]
25 | 
26 |     def __repr__(self) -> str:
27 |         return f'WarmUpStepLR(warmup_steps={self.warmup_steps}, step_size={self.step_size}, gamma={self.gamma})'
28 | 


--------------------------------------------------------------------------------
/cvm/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .logger import *
2 | from .utils import *
3 | from .augment import *
4 | from .ema import *
5 | from .factory import *
6 | from .seg_transforms import *
7 | from .metrics import *


--------------------------------------------------------------------------------
/cvm/utils/coco.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | 
  4 | import torch
  5 | import torch.utils.data
  6 | import torchvision
  7 | from PIL import Image
  8 | from pycocotools import mask as coco_mask
  9 | from .seg_transforms import Compose
 10 | 
 11 | 
 12 | class FilterAndRemapCocoCategories:
 13 |     def __init__(self, categories, remap=True):
 14 |         self.categories = categories
 15 |         self.remap = remap
 16 | 
 17 |     def __call__(self, image, anno):
 18 |         anno = [obj for obj in anno if obj["category_id"] in self.categories]
 19 |         if not self.remap:
 20 |             return image, anno
 21 |         anno = copy.deepcopy(anno)
 22 |         for obj in anno:
 23 |             obj["category_id"] = self.categories.index(obj["category_id"])
 24 |         return image, anno
 25 | 
 26 | 
 27 | def convert_coco_poly_to_mask(segmentations, height, width):
 28 |     masks = []
 29 |     for polygons in segmentations:
 30 |         rles = coco_mask.frPyObjects(polygons, height, width)
 31 |         mask = coco_mask.decode(rles)
 32 |         if len(mask.shape) < 3:
 33 |             mask = mask[..., None]
 34 |         mask = torch.as_tensor(mask, dtype=torch.uint8)
 35 |         mask = mask.any(dim=2)
 36 |         masks.append(mask)
 37 |     if masks:
 38 |         masks = torch.stack(masks, dim=0)
 39 |     else:
 40 |         masks = torch.zeros((0, height, width), dtype=torch.uint8)
 41 |     return masks
 42 | 
 43 | 
 44 | class ConvertCocoPolysToMask:
 45 |     def __call__(self, image, anno):
 46 |         w, h = image.size
 47 |         segmentations = [obj["segmentation"] for obj in anno]
 48 |         cats = [obj["category_id"] for obj in anno]
 49 |         if segmentations:
 50 |             masks = convert_coco_poly_to_mask(segmentations, h, w)
 51 |             cats = torch.as_tensor(cats, dtype=masks.dtype)
 52 |             # merge all instance masks into a single segmentation map
 53 |             # with its corresponding categories
 54 |             target, _ = (masks * cats[:, None, None]).max(dim=0)
 55 |             # discard overlapping instances
 56 |             target[masks.sum(0) > 1] = 255
 57 |         else:
 58 |             target = torch.zeros((h, w), dtype=torch.uint8)
 59 |         target = Image.fromarray(target.numpy())
 60 |         return image, target
 61 | 
 62 | 
 63 | def _coco_remove_images_without_annotations(dataset, cat_list=None):
 64 |     def _has_valid_annotation(anno):
 65 |         # if it's empty, there is no annotation
 66 |         if len(anno) == 0:
 67 |             return False
 68 |         # if more than 1k pixels occupied in the image
 69 |         return sum(obj["area"] for obj in anno) > 1000
 70 | 
 71 |     if not isinstance(dataset, torchvision.datasets.CocoDetection):
 72 |         raise TypeError(
 73 |             f"This function expects dataset of type torchvision.datasets.CocoDetection, instead  got {type(dataset)}"
 74 |         )
 75 | 
 76 |     ids = []
 77 |     for ds_idx, img_id in enumerate(dataset.ids):
 78 |         ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
 79 |         anno = dataset.coco.loadAnns(ann_ids)
 80 |         if cat_list:
 81 |             anno = [obj for obj in anno if obj["category_id"] in cat_list]
 82 |         if _has_valid_annotation(anno):
 83 |             ids.append(ds_idx)
 84 | 
 85 |     dataset = torch.utils.data.Subset(dataset, ids)
 86 |     return dataset
 87 | 
 88 | 
 89 | def get_coco(root, image_set, transforms):
 90 |     PATHS = {
 91 |         "train": ("train2017", os.path.join("annotations", "instances_train2017.json")),
 92 |         "val": ("val2017", os.path.join("annotations", "instances_val2017.json")),
 93 |         # "train": ("val2017", os.path.join("annotations", "instances_val2017.json"))
 94 |     }
 95 |     CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72]
 96 | 
 97 |     transforms = Compose([FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask(), transforms])
 98 | 
 99 |     img_folder, ann_file = PATHS[image_set]
100 |     img_folder = os.path.join(root, img_folder)
101 |     ann_file = os.path.join(root, ann_file)
102 | 
103 |     dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
104 | 
105 |     if image_set == "train":
106 |         dataset = _coco_remove_images_without_annotations(dataset, CAT_LIST)
107 | 
108 |     return dataset
109 | 


--------------------------------------------------------------------------------
/cvm/utils/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class ExponentialMovingAverage(torch.optim.swa_utils.AveragedModel):
 5 |     """Maintains moving averages of model parameters using an exponential decay.
 6 |     ``ema_avg = decay * avg_model_param + (1 - decay) * model_param``
 7 |     `torch.optim.swa_utils.AveragedModel <https://pytorch.org/docs/stable/optim.html#custom-averaging-strategies>`_
 8 |     is used to compute the EMA.
 9 |     """
10 | 
11 |     def __init__(self, model, decay, device="cpu"):
12 |         def ema_avg(avg_model_param, model_param, num_averaged):
13 |             return decay * avg_model_param + (1 - decay) * model_param
14 | 
15 |         super().__init__(model, device, ema_avg)
16 | 
17 |     def update_parameters(self, model):
18 |         for p_swa, p_model in zip(self.module.state_dict().values(), model.state_dict().values()):
19 |             device = p_swa.device
20 |             p_model_ = p_model.detach().to(device)
21 |             if self.n_averaged == 0:
22 |                 p_swa.detach().copy_(p_model_)
23 |             else:
24 |                 p_swa.detach().copy_(self.avg_fn(p_swa.detach(), p_model_, self.n_averaged.to(device)))
25 |         self.n_averaged += 1
26 | 


--------------------------------------------------------------------------------
/cvm/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import os
 3 | import logging
 4 | from os.path import dirname, abspath, exists, join
 5 | import torch.distributed as dist
 6 | from .utils import is_dist_avail_and_initialized
 7 | 
 8 | __all__ = ['make_logger']
 9 | 
10 | 
11 | def make_logger(run_name, log_dir='logs', rank: int = 0):
12 |     logger = logging.getLogger(run_name)
13 |     logger.propagate = False
14 | 
15 |     log_filepath = join(log_dir, f'{run_name}_{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.log')
16 | 
17 |     log_dir = dirname(abspath(log_filepath))
18 |     if not exists(log_dir) and rank == 0:
19 |         os.makedirs(log_dir)
20 | 
21 |     if is_dist_avail_and_initialized():
22 |         dist.barrier()
23 | 
24 |     if not logger.handlers and rank == 0:  # execute only if logger doesn't already exist
25 |         file_handler = logging.FileHandler(log_filepath, mode='a', encoding='utf-8')
26 |         stream_handler = logging.StreamHandler(os.sys.stdout)
27 | 
28 |         formatter = logging.Formatter(
29 |             '%(asctime)s - %(filename)s:%(lineno)d[%(levelname)s]: %(message)s',
30 |             datefmt='%H:%M:%S'
31 |         )
32 | 
33 |         file_handler.setFormatter(formatter)
34 |         stream_handler.setFormatter(formatter)
35 | 
36 |         logger.addHandler(file_handler)
37 |         logger.addHandler(stream_handler)
38 |         logger.setLevel(logging.INFO)
39 |     return logger
40 | 


--------------------------------------------------------------------------------
/cvm/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | __all__ = ['accuracy', 'accuracy_k', 'ConfusionMatrix']
 4 | 
 5 | 
 6 | def accuracy(output, target, topk=(1,)):
 7 |     """Computes the accuracy over the k top predictions for the specified values of k"""
 8 |     with torch.inference_mode():
 9 |         maxk = max(topk)
10 |         batch_size = target.size(0)
11 |         if target.ndim == 2:
12 |             target = target.max(dim=1)[1]
13 | 
14 |         _, pred = output.topk(maxk, 1, True, True)
15 |         pred = pred.t()
16 |         correct = pred.eq(target[None])
17 | 
18 |         res = []
19 |         for k in topk:
20 |             correct_k = correct[:k].flatten().sum(dtype=torch.float32)
21 |             res.append(correct_k * (100.0 / batch_size))
22 |         return res
23 | 
24 | 
25 | def accuracy_k(output: torch.Tensor, target):
26 | 
27 |     with torch.inference_mode():
28 |         output = output.max(dim=1)[1]
29 |         if target.ndim == 2:
30 |             target = target.max(dim=1)[1]
31 | 
32 |         mask = output.eq(target)
33 | 
34 |         return target[mask]
35 | 
36 | 
37 | class ConfusionMatrix:
38 |     def __init__(self, num_classes, eps=1e-6):
39 |         self.n = num_classes
40 |         self.mat = None
41 |         self.eps = eps
42 | 
43 |     def update(self, pr, gt):
44 |         if self.mat is None:
45 |             self.mat = torch.zeros(
46 |                 (self.n, self.n), dtype=torch.int64, device=pr.device)
47 | 
48 |         with torch.inference_mode():
49 |             k = (gt >= 0) & (gt < self.n)
50 |             inds = self.n * gt[k].to(torch.int64) + pr[k]
51 |             self.mat += torch.bincount(inds, minlength=self.n ** 2).reshape(self.n, self.n)
52 | 
53 |     def all_reduce(self):
54 |         if not torch.distributed.is_available():
55 |             return
56 |         if not torch.distributed.is_initialized():
57 |             return
58 | 
59 |         torch.distributed.barrier()
60 |         torch.distributed.all_reduce(self.mat)
61 | 
62 |     @property
63 |     def intersection(self):
64 |         return torch.diag(self.mat)
65 | 
66 |     @property
67 |     def union(self):
68 |         return self.mat.sum(0) + self.mat.sum(1)
69 | 
70 |     @property
71 |     def iou(self):
72 |         return (self.intersection / (self.union - self.intersection + self.eps)).tolist()
73 | 
74 |     @property
75 |     def mean_iou(self):
76 |         return (self.intersection / (self.union - self.intersection + self.eps)).mean().item()
77 | 
78 |     @property
79 |     def pa(self):
80 |         return (self.intersection.sum() / self.mat.sum()).item()
81 | 
82 |     @property
83 |     def mean_pa(self):
84 |         return (self.intersection / self.mat.sum(1)).tolist()
85 | 


--------------------------------------------------------------------------------
/cvm/utils/seg_transforms.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torchvision.transforms.v2 as T
  4 | from torchvision.transforms.v2 import functional as TF
  5 | 
  6 | 
  7 | class Compose(T.Compose):
  8 |     def __init__(self, transforms):
  9 |         super().__init__(transforms)
 10 | 
 11 |     def __call__(self, images, targets):
 12 |         for t in self.transforms:
 13 |             images, targets = t(images, targets)
 14 |         return images, targets
 15 | 
 16 | 
 17 | class ToImage:
 18 |     def __call__(self, images, targets):
 19 |         return TF.to_image(images), torch.as_tensor(np.array(targets), dtype=torch.int64)
 20 | 
 21 |     def __repr__(self):
 22 |         return self.__class__.__name__ + '()'
 23 | 
 24 | 
 25 | class RandomHorizontalFlip(torch.nn.Module):
 26 |     def __init__(self, p=0.5):
 27 |         super().__init__()
 28 |         self.p = p
 29 | 
 30 |     def forward(self, images, targets):
 31 |         if torch.rand(1) < self.p:
 32 |             return TF.hflip(images), TF.hflip(targets)
 33 |         return images, targets
 34 | 
 35 |     def __repr__(self):
 36 |         return self.__class__.__name__ + '(p={})'.format(self.p)
 37 | 
 38 | 
 39 | class RandomVerticalFlip(torch.nn.Module):
 40 |     def __init__(self, p=0.5):
 41 |         super().__init__()
 42 |         self.p = p
 43 | 
 44 |     def forward(self, images, targets):
 45 |         if torch.rand(1) < self.p:
 46 |             return TF.vflip(images), TF.vflip(targets)
 47 |         return images, targets
 48 | 
 49 |     def __repr__(self):
 50 |         return self.__class__.__name__ + '(p={})'.format(self.p)
 51 | 
 52 | 
 53 | class ToDtype(T.ToDtype):
 54 |     def __init__(self, dtype, scale: bool = False) -> None:
 55 |         super().__init__(dtype=dtype, scale=scale)
 56 | 
 57 |     def forward(self, images, targets):
 58 |         return super().forward(images), targets
 59 | 
 60 | 
 61 | class Normalize(T.Normalize):
 62 |     def __init__(self, mean, std, inplace=False):
 63 |         super().__init__(mean, std, inplace)
 64 | 
 65 |     def forward(self, images, targets):
 66 |         return super().forward(images), targets
 67 | 
 68 | 
 69 | class Resize(T.Resize):
 70 |     def __init__(self, size, interpolation=TF.InterpolationMode.BILINEAR):
 71 |         super().__init__(size, interpolation=interpolation)
 72 | 
 73 |     def forward(self, images, targets):
 74 |         images = TF.resize(images, self.size, self.interpolation)
 75 |         targets = TF.resize(targets, self.size, TF.InterpolationMode.NEAREST)
 76 | 
 77 |         return images, targets
 78 | 
 79 | 
 80 | class RandomCrop(T.RandomCrop):
 81 |     def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"):
 82 |         super().__init__(
 83 |             size,
 84 |             padding=padding,
 85 |             pad_if_needed=pad_if_needed,
 86 |             fill=fill,
 87 |             padding_mode=padding_mode
 88 |         )
 89 | 
 90 |     def forward(self, images, targets):
 91 |         if self.padding is not None:
 92 |             img = TF.pad(images, self.padding, self.fill, self.padding_mode)
 93 | 
 94 |         width, height = TF.get_image_size(img)
 95 |         # pad the width if needed
 96 |         if self.pad_if_needed and width < self.size[1]:
 97 |             padding = [self.size[1] - width, 0]
 98 |             img = TF.pad(img, padding, self.fill, self.padding_mode)
 99 |         # pad the height if needed
100 |         if self.pad_if_needed and height < self.size[0]:
101 |             padding = [0, self.size[0] - height]
102 |             img = TF.pad(img, padding, self.fill, self.padding_mode)
103 | 
104 |         i, j, h, w = self.get_params(img, self.size)
105 | 
106 |         return TF.crop(img, i, j, h, w), TF.crop(targets, i, j, h, w)
107 | 
108 | 
109 | class RandomResizedCrop(T.RandomResizedCrop):
110 |     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=TF.InterpolationMode.BILINEAR):
111 |         super().__init__(size, scale=scale, ratio=ratio, interpolation=interpolation)
112 | 
113 |     def forward(self, images, targets):
114 |         i, j, h, w = self.get_params(images, self.scale, self.ratio)
115 |         images = TF.resized_crop(images, i, j, h, w, self.size, self.interpolation)
116 |         targets = TF.resized_crop(targets, i, j, h, w, self.size, TF.InterpolationMode.NEAREST)
117 |         return images, targets
118 | 


--------------------------------------------------------------------------------
/cvm/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.22'


--------------------------------------------------------------------------------
/flops.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import torch
 4 | from cvm.utils import list_models, create_model
 5 | from fvcore.nn import FlopCountAnalysis, flop_count_str, flop_count_table
 6 | 
 7 | 
 8 | def print_model(model, str: bool = False, max_depth: int = 3):
 9 |     model.eval()
10 |     flops = FlopCountAnalysis(model, input)
11 | 
12 |     print(flop_count_str(flops) if str else flop_count_table(flops, max_depth=max_depth))
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
17 |     parser.add_argument('--model', '-m', type=str)
18 |     parser.add_argument('--str', action='store_true')
19 |     parser.add_argument('--list-models', type=str, default=None)
20 |     parser.add_argument('--in-channels', type=int, default=3)
21 |     parser.add_argument('--num-classes', type=int, default=1000)
22 |     parser.add_argument('--image-size', type=int, default=224)
23 |     parser.add_argument('--max-depth', type=int, default=3)
24 | 
25 |     args = parser.parse_args()
26 | 
27 |     input = torch.randn(1, args.in_channels, args.image_size, args.image_size)
28 | 
29 |     thumbnail = True if args.image_size < 100 else False
30 | 
31 |     if args.list_models:
32 |         print(json.dumps(list_models(args.list_models), indent=4))
33 |     else:
34 |         print_model(
35 |             create_model(
36 |                 args.model,
37 |                 thumbnail=thumbnail,
38 |                 in_channels=args.in_channels,
39 |                 num_classes=args.num_classes,
40 |                 cuda=False,
41 |             ),
42 |             args.str,
43 |             args.max_depth
44 |         )
45 | 


--------------------------------------------------------------------------------
/info.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from torchinfo import summary
 3 | from cvm.utils import create_model
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
 7 |     parser.add_argument('--model', '-m', type=str)
 8 | 
 9 |     args = parser.parse_args()
10 | 
11 |     model = create_model(args.model, cuda=False)
12 | 
13 |     summary(
14 |         model,
15 |         input_size=(1, 3, 224, 224),
16 |         col_names=("output_size", "num_params", 'mult_adds')
17 |     )
18 | 


--------------------------------------------------------------------------------
/profiler.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | from tqdm import tqdm
 4 | from torch.profiler.profiler import tensorboard_trace_handler
 5 | from cvm.utils import create_model
 6 | 
 7 | if __name__ == '__main__':
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument('--model', type=str, default='micronet_b1_0')
10 |     parser.add_argument('--batch-size', type=int, default=64, metavar='N')
11 |     parser.add_argument('--amp', action='store_true')
12 |     args = parser.parse_args()
13 | 
14 |     model = create_model(args.model)
15 |     model.eval()
16 | 
17 |     images = torch.randn([args.batch_size, 3, 224, 224]).cuda()
18 | 
19 |     suffix = '_torch' if args.torch else ''
20 |     with torch.profiler.profile(
21 |         schedule=torch.profiler.schedule(
22 |             wait=1,
23 |             warmup=2,
24 |             active=2,
25 |             repeat=1
26 |         ),
27 |         profile_memory=True,
28 |         on_trace_ready=tensorboard_trace_handler(
29 |             f'logs/profiles/{args.model}{suffix}'
30 |         ),
31 |         with_stack=True,
32 |         record_shapes=True,
33 |         with_flops=True,
34 |         activities=[
35 |             torch.profiler.ProfilerActivity.CPU,
36 |             torch.profiler.ProfilerActivity.CUDA
37 |         ]
38 |     ) as prof, tqdm(total=5) as pbar:
39 |         for _ in range(5):
40 |             with torch.amp.autocast(device_type='cuda',enabled=args.amp):
41 |                 output = model(images)
42 | 
43 |             prof.step()
44 |             pbar.update()
45 | 
46 |     print('>>>>>>>> DONE!!!')
47 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.10.0
2 | torchvision>=0.11.1
3 | fvcore
4 | torchinfo
5 | tqdm
6 | nvidia-dali-cuda110>=1.7.0


--------------------------------------------------------------------------------
/resize_imagenet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import shutil
 4 | from cvm.utils import *
 5 | import cv2
 6 | from tqdm import tqdm
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='ImageNet Resizing')
11 |     parser.add_argument('--src', type=str, default='/datasets/ILSVRC2012')
12 |     parser.add_argument('--dst', type=str, default='/datasets/ILSVRC2012_R')
13 |     parser.add_argument('--max-size', type=int, default=256)
14 | 
15 |     return parser.parse_args()
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     args = parse_args()
20 |     print(args)
21 | 
22 |     dirs = os.listdir(os.path.join(args.src, 'train'))
23 |     dirs.sort()
24 |     for i, cls in enumerate(dirs):
25 |         files = os.listdir(os.path.join(args.src, 'train', cls))
26 | 
27 |         if not os.path.exists(os.path.join(args.dst, 'train', cls)):
28 |             os.makedirs(os.path.join(args.dst, 'train', cls))
29 | 
30 |         for f in tqdm(files, desc=f'Resizing [{i:>3}/{len(dirs)}]', unit='images', leave=False, ascii=True):
31 |             src_file, dst_file = os.path.join(args.src, 'train', cls, f), os.path.join(args.dst, 'train', cls, f)
32 |             image = cv2.imread(src_file)
33 | 
34 |             if min(image.shape[0], image.shape[1]) <= args.max_size:
35 |                 shutil.copyfile(src_file, dst_file)
36 |             else:
37 |                 if image.shape[0] < image.shape[1]:
38 |                     size = (int((image.shape[1] / image.shape[0]) * args.max_size), args.max_size)
39 |                 else:
40 |                     size = (args.max_size, int((image.shape[0] / image.shape[1]) * args.max_size))
41 | 
42 |                 image = cv2.resize(image, size, interpolation=cv2.INTER_AREA)
43 |                 cv2.imwrite(dst_file, image)
44 | 


--------------------------------------------------------------------------------
/results.md:
--------------------------------------------------------------------------------
 1 | ## Classification 
 2 | 
 3 | ### ImageNet-1K
 4 | 
 5 | | Model                      | Year |  Params |   FLOPS | Loader | Resize | Crop  | Top@1  | Top@5  | Real Top@1 | Real Top@5 |
 6 | | -------------------------- | ---- | ------: | ------: | -----: | :----: | :---: | :----: | :----: | :--------: | :--------: |
 7 | | ResNet18                   | 2015 |  11.69M |  1.819G |   DALI |  256   |  224  | 71.060 | 89.922 |            |            |
 8 | | MobileNet V1 $\times$ 0.35 | 2017 |  0.766M |  0.079G |   DALI |  232   |  224  | 58.842 | 81.974 |            |            |
 9 | | MobileNet V1 $\times$ 0.5  | 2017 |  1.332M |  0.155G |   DALI |  232   |  224  | 65.126 | 86.190 |            |            |
10 | | MobileNet V1 $\times$ 0.75 | 2017 |  2.586M |  0.333G |   DALI |  232   |  224  | 69.688 | 89.474 |            |            |
11 | | MobileNet V1 $\times$ 1.0  | 2017 |  4.232M |  0.579G |   DALI |  232   |  224  | 73.264 | 91.222 |   80.357   |   94.310   |
12 | | MobileNet V2 $\times$ 0.35 | 2018 |  1.677M |  0.065G |   DALI |  232   |  224  | 58.804 | 81.160 |            |            |
13 | | MobileNet V2 $\times$ 0.5  | 2018 |  1.969M |  0.104G |   DALI |  232   |  224  | 63.458 | 84.890 |            |            |
14 | | MobileNet V2 $\times$ 0.75 | 2018 |  2.636M |  0.221G |   DALI |  232   |  224  | 68.448 | 88.298 |            |            |
15 | | MobileNet V2 $\times$ 1.0  | 2018 |  3.505M |  0.314G |   DALI |  232   |  224  | 72.154 | 90.736 |            |            |
16 | | ShuffleNet V2 $\times$ 2.0 | 2018 |  7.394M |  0.591G |   DALI |  232   |  224  | 74.368 | 92.050 |            |            |
17 | | ViT-B/32                   | 2020 | 88.224M |  4.414G |   DALI |  232   |  224  | 75.438 | 92.264 |            |            |
18 | | ViT-B/16                   | 2020 | 86.568M | 17.583G |   DALI |  232   |  224  | 80.972 | 95.290 |            |            |
19 | | VGNetG $\times$ 1.0MP      | 2022 |  1.000M |  0.144G |   DALI |  232   |  224  | 68.128 | 88.312 |            |            |
20 | | VGNetG $\times$ 1.0MP+SE   | 2022 |  1.146M |  0.145G |   DALI |  232   |  224  | 70.122 | 89.524 |            |            |
21 | | VGNetG $\times$ 1.5MP      | 2022 |  1.506M |  0.191G |   DALI |  232   |  224  | 70.494 | 89.684 |            |            |
22 | | VGNetG $\times$ 1.5MP+SE   | 2022 |  1.706M |  0.192G |   DALI |  232   |  224  | 72.422 | 90.664 |            |            |
23 | | VGNetG $\times$ 2.0MP      | 2022 |   2.01M |  0.304G |   DALI |  232   |  224  | 72.314 | 90.730 |            |            |
24 | | VGNetG $\times$ 2.0MP+SE   | 2022 |  2.349M |  0.306G |   DALI |  232   |  224  | 74.324 | 91.788 |            |            |
25 | | VGNetG $\times$ 2.5MP      | 2022 |  2.497M |  0.403G |   DALI |  232   |  224  | 73.740 | 91.516 |            |            |
26 | | VGNetG $\times$ 2.5MP+SE   | 2022 |  2.927M |  0.405G |   DALI |  232   |  224  | 75.590 | 92.568 |            |            |
27 | | RegNetX-400MF              | 2020 |  5.496M |  0.420G |   DALI |  256   |  224  | 73.156 | 91.320 |            |            |
28 | | ConvNeXt-Tiny              | 2022 | 28.589M |  4.470G |   DALI |  236   |  224  | 82.428 | 96.132 |            |            |
29 | | ConvNeXt-Small             | 2022 | 50.224M |  8.705G |   DALI |  230   |  224  | 83.544 | 96.640 |            |            |
30 | | ConvNeXt-Base              | 2022 | 88.591M | 15.384G |   DALI |  232   |  224  | 83.936 | 96.886 |            |            |
31 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import torch
 4 | 
 5 | 
 6 | def run_script(script: str, args: str = ''):
 7 |     cmd = f'torchrun --standalone --nnodes=1 --nproc_per_node={torch.cuda.device_count()} {script} {args}'
 8 |     print(f'\n====\n > {cmd}\n====\n')
 9 |     os.system(cmd)
10 |     time.sleep(1)
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     # ImageNet-1K
15 |     imagenet = f'train.py '\
16 |         '--data-dir "/datasets/ILSVRC2012" '\
17 |         '--crop-size 192 --val-resize-size 232 --val-crop-size 224 ' \
18 |         '--workers 16 '\
19 |         '--amp '\
20 |         '--dali --dali-cpu '\
21 |         '--lr 0.2 --lr-sched cosine --momentum 0.9 --wd 0.0001 --no-bias-bn-wd '\
22 |         '--batch-size 512 '\
23 |         '--epochs 100 --warmup-epochs 5 '\
24 |         '--print-freq 250 ' \
25 |         '--label-smoothing 0.1 '
26 |     # '--mixup-alpha 0.8 --cutmix-alpha 1.0 ' \
27 |     # '--color-jitter 0.4 --random-erasing 0.25 '\
28 |     # '--augment rand-m9-mstd0.5 '\
29 |     # '--model-ema --model-ema-decay 0.9999 '
30 | 
31 |     # ImageNet-398
32 |     tiny_imagenet = f'train.py '\
33 |         '--data-dir "/datasets/TINY_ILSVRC2012" '\
34 |         '--crop-size 176 --val-resize-size 232 --val-crop-size 224 --num-classes 398 ' \
35 |         '--workers 8 '\
36 |         '--amp '\
37 |         '--lr 0.4 --lr-sched cosine --momentum 0.9 --no-bias-bn-wd '\
38 |         '--batch-size 1024 '\
39 |         '--warmup-epochs 5 '\
40 |         '--print-freq 90 ' \
41 |         '--label-smoothing 0.1 '
42 | 
43 |     mnist = f'train.py '\
44 |         '--dataset MNIST --data-dir "/datasets/MNIST" --in-channels 1 --hflip 0.0 '\
45 |         '--crop-size 28 --val-resize-size 28 --val-crop-size 28 --crop-padding 4 --num-classes 10 ' \
46 |         '--workers 8 '\
47 |         '--amp '\
48 |         '--lr 0.4 --lr-sched cosine --momentum 0.9 --wd 0.001 --no-bias-bn-wd '\
49 |         '--batch-size 2048 --epochs 30 '\
50 |         '--warmup-epochs 3 '\
51 |         '--print-freq 10 ' \
52 |         '--label-smoothing 0.1 '
53 | 
54 |     # CIFAR10/100
55 |     cifar = f'train.py '\
56 |         '--dataset CIFAR100 --data-dir "/datasets/CIFAR100" '\
57 |         '--crop-size 32 --val-resize-size 32 --val-crop-size 32 ' \
58 |         '--workers 8 '\
59 |         '--amp '\
60 |         '--lr 0.4 --lr-sched cosine --momentum 0.9 --wd 0.0005 --no-bias-bn-wd '\
61 |         '--batch-size 1024 '\
62 |         '--epochs 100 --warmup-epochs 5 '\
63 |         '--print-freq 15 ' \
64 |         '--label-smoothing 0.1 '\
65 |         '--random-erasing 0.25 --dropout-rate 0.25 --augment torch/autoaug-cifar10 '
66 | 
67 |     # VOC segmentation
68 |     voc = f'train_seg.py '\
69 |         '--dataset VOCSegmentation --data-dir "/datasets/PASCAL_VOC" '\
70 |         '--workers 8 '\
71 |         '--amp '\
72 |         '--lr 0.01 --lr-sched cosine --momentum 0.9 --wd 0.0001 --no-bias-bn-wd '\
73 |         '--batch-size 16 '\
74 |         '--print-freq 30 ' \
75 |         '--epochs 100 --aux-loss'
76 | 
77 |     run_script(imagenet, '--model mobilenet_v1_x1_0')
78 |     # run_script(voc, '--pretrained-backbone --model seg/fcn_regnet_x_400mf')
79 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from codecs import open
 3 | from setuptools import find_packages, setup
 4 | 
 5 | exec(open('cvm/version.py').read())
 6 | setup(
 7 |     name='cvm',
 8 |     version=__version__,
 9 |     description='Computer Vision Models',
10 |     url='https://github.com/ffiirree/cv-models',
11 |     author='Liangqi Zhang',
12 |     author_email='zhliangqi@gmail.com',
13 |     python_requires='>=3.8',
14 |     install_requires=[
15 |         'torch >= 1.12',
16 |         'torchvision',
17 |         'fvcore',
18 |         'torchinfo',
19 |         'tqdm',
20 |         'pycocotools',
21 |         'nvidia-dali-cuda110 >= 1.16'
22 |     ],
23 |     packages=find_packages(exclude=['tests'])
24 | )


--------------------------------------------------------------------------------
/tests/test_blocks.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | import pytest
 3 | import torch
 4 | import torch.nn as nn
 5 | from cvm.models.ops import blocks
 6 | 
 7 | 
 8 | def test_se_block_forward():
 9 |     inputs = torch.randn(16, 3, 56, 56)
10 | 
11 |     se = blocks.SEBlock(3, 0.25)
12 | 
13 |     outputs = se(inputs)
14 |     assert outputs.shape == inputs.shape
15 |     assert isinstance(se.act, nn.ReLU)
16 |     assert isinstance(se.gate, nn.Sigmoid)
17 | 
18 | 
19 | def test_se_block_decorator():
20 |     with blocks.se(inner_nonlinear=nn.SiLU, gating_fn=nn.Hardsigmoid):
21 |         se = blocks.SEBlock(3, 0.25)
22 | 
23 |     assert isinstance(se.act, nn.SiLU)
24 |     assert isinstance(se.gate, nn.Hardsigmoid)
25 | 
26 | 
27 | def test_normalizer_decorator():
28 |     with blocks.normalizer(None):
29 |         layers = blocks.norm_activation(3)
30 | 
31 |     assert len(layers) == 1
32 |     assert isinstance(layers[0], nn.ReLU)
33 | 
34 |     with blocks.normalizer(nn.LayerNorm, position='before'):
35 |         layers = blocks.norm_activation(3)
36 | 
37 |     assert len(layers) == 2
38 |     assert isinstance(layers[0], nn.LayerNorm)
39 |     assert isinstance(layers[1], nn.ReLU)
40 | 
41 |     with blocks.normalizer(partial(nn.BatchNorm2d, eps=0.1), position='after'):
42 |         layers = blocks.norm_activation(3)
43 | 
44 |     assert len(layers) == 2
45 |     assert isinstance(layers[0], nn.ReLU)
46 |     assert isinstance(layers[1], nn.BatchNorm2d)
47 |     assert layers[1].eps == 0.1
48 | 
49 | 
50 | def test_nonlinear_decorator():
51 |     with blocks.nonlinear(None):
52 |         layers = blocks.norm_activation(3)
53 | 
54 |     assert len(layers) == 1
55 |     assert isinstance(layers[0], nn.BatchNorm2d)
56 | 
57 |     with blocks.nonlinear(nn.SiLU):
58 |         layers = blocks.norm_activation(3)
59 | 
60 |     assert len(layers) == 2
61 |     assert isinstance(layers[0], nn.BatchNorm2d)
62 |     assert isinstance(layers[1], nn.SiLU)
63 | 


--------------------------------------------------------------------------------
/tests/test_models.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import torch
 3 | from cvm.models.core import SegmentationModel
 4 | from cvm.utils import list_models, create_model
 5 | 
 6 | 
 7 | @pytest.mark.parametrize('name', list_models('cvm'))
 8 | def test_model_forward(name):
 9 |     model = create_model(
10 |         name,
11 |         dropout_rate=0.,
12 |         drop_path_rate=0.,
13 |         num_classes=10,
14 |         cuda=False
15 |     )
16 |     
17 |     model.eval()
18 |     
19 |     inputs = torch.randn((1, 3, 224, 224))
20 |     outputs = model(inputs)
21 |     
22 |     if name in ['unet', 'vae', 'dcgan']:
23 |         ...
24 |     elif isinstance(model, SegmentationModel):
25 |         assert outputs[0].shape == torch.Size([1, 10, 224, 224])
26 |         assert not torch.isnan(outputs[0]).any(), 'Output included NaNs'
27 |     else:
28 |         assert outputs.shape == torch.Size([1, 10])
29 |         assert not torch.isnan(outputs).any(), 'Output included NaNs'
30 | 


--------------------------------------------------------------------------------
/validate.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import torch
  4 | from tqdm import tqdm
  5 | from cvm.utils import accuracy, AverageMeter, create_loader, create_model, list_models, list_datasets
  6 | from cvm.data import ImageNet1KRealLabelsEvaluator
  7 | from cvm.models.ops.functional import *
  8 | 
  9 | 
 10 | def parse_args():
 11 |     parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation')
 12 |     parser.add_argument('--dataset', type=str, default='ImageNet', choices=list_datasets() + ['ImageNet'],
 13 |                         help='path to the ImageNet dataset.')
 14 |     parser.add_argument('--data-dir', type=str, default='/datasets/ILSVRC2012',
 15 |                         help='path to the ImageNet dataset.')
 16 |     parser.add_argument('--model', '-m', type=str, default='mobilenet_v1_x1_0', choices=list_models(),
 17 |                         help='type of model to use. (default: mobilenet_v1_x1_0)')
 18 |     parser.add_argument('--real-labels', type=str, default=None)
 19 |     parser.add_argument('--model-path', type=str, default=None)
 20 |     parser.add_argument('--model-weights', type=str, default='DEFAULT')
 21 |     parser.add_argument('--workers', '-j', type=int, default=8, metavar='N',
 22 |                         help='number of data loading workers pre GPU. (default: 4)')
 23 |     parser.add_argument('--batch-size', type=int, default=256, metavar='N',
 24 |                         help='mini-batch size, this is the total batch size of all GPUs. (default: 256)')
 25 |     parser.add_argument('--num-classes', type=int, default=1000, metavar='N',
 26 |                         help='number of label classes')
 27 |     parser.add_argument('--in-channels', type=int, default=3, metavar='N')
 28 |     parser.add_argument('--crop-size', type=int, default=224)
 29 |     parser.add_argument('--resize-size', type=int, default=232)
 30 |     parser.add_argument('--dali', action='store_true', help='use nvidia dali.')
 31 |     parser.add_argument('--dali-cpu', action='store_true',
 32 |                         help='runs CPU based version of DALI pipeline. (default: false)')
 33 |     parser.add_argument('--bandpass', type=int, nargs='+', default=None)
 34 |     parser.add_argument('--bandreject', type=int, nargs='+', default=None)
 35 |     parser.add_argument('--filter-type', type=str, default="ideal", choices=['ideal', 'gaussian'])
 36 |     return parser.parse_args()
 37 | 
 38 | 
 39 | def validate(val_loader, model, real_evaluator, args):
 40 |     top1 = AverageMeter()
 41 |     top5 = AverageMeter()
 42 | 
 43 |     mask = get_distance_grid(args.crop_size)
 44 | 
 45 |     model.eval()
 46 |     for (images, target) in tqdm(val_loader, desc='validating', unit='batch'):
 47 |         if args.bandpass is not None:
 48 |             assert len(args.bandpass) == 2, '--bandpass : [min, max]'
 49 |             if args.filter_type == 'ideal':
 50 |                 kernel = (mask < args.bandpass[0]) | (mask > args.bandpass[1])
 51 |                 images = spectral_filter(images, lambda fr: torch.masked_fill(fr, kernel.to(fr.device), 0.0))
 52 |             elif args.filter_type == 'gaussian':
 53 |                 kernel = get_gaussian_bandpass_kernel2d(
 54 |                     images.size()[-1],
 55 |                     (args.bandpass[0] + args.bandpass[1]) / 2,
 56 |                     args.bandpass[1] - args.bandpass[0]
 57 |                 )
 58 |                 images = spectral_filter(images, lambda fr: fr * kernel.to(fr.device))
 59 | 
 60 |         if args.bandreject is not None:
 61 |             assert len(args.bandreject) == 2, '--bandreject : [min, max]'
 62 |             if args.filter_type == 'ideal':
 63 |                 kernel = (mask > args.bandreject[0]) & (mask < args.bandreject[1])
 64 |                 images = spectral_filter(images, lambda fr: lambda fr: torch.masked_fill(fr, kernel.to(fr.device), 0.0))
 65 |             elif args.filter_type == 'gaussian':
 66 |                 kernel = get_gaussian_bandpass_kernel2d(
 67 |                     images.size()[-1],
 68 |                     (args.bandpass[0] + args.bandpass[1]) / 2,
 69 |                     args.bandpass[1] - args.bandpass[0]
 70 |                 )
 71 |                 images = spectral_filter(images, lambda fr: fr * (1.0 - kernel.to(fr.devcie)))
 72 | 
 73 |         with torch.inference_mode():
 74 |             output = model(images)
 75 | 
 76 |         if real_evaluator:
 77 |             real_evaluator.put(output)
 78 | 
 79 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
 80 | 
 81 |         top1.update(acc1.item(), images.size(0))
 82 |         top5.update(acc5.item(), images.size(0))
 83 | 
 84 |     print(
 85 |         f' ================\n  - top1: {top1.avg:6.3f}\n  - top5: {top5.avg:6.3f}\n ================'
 86 |     )
 87 |     if real_evaluator:
 88 |         print(
 89 |             f'Real Labels: \n ================\n  - top1: {real_evaluator.accuracy[1]:6.3f}\n  - top5: {real_evaluator.accuracy[5]:6.3f}\n ================'
 90 |         )
 91 | 
 92 | 
 93 | if __name__ == '__main__':
 94 |     assert torch.cuda.is_available(), 'CUDA IS NOT AVAILABLE!!'
 95 |     torch.backends.cudnn.benchmark = True
 96 | 
 97 |     args = parse_args()
 98 |     print(json.dumps(vars(args), indent=4))
 99 | 
100 |     assert not (args.real_labels and args.dali), ''
101 | 
102 |     model = create_model(
103 |         args.model,
104 |         in_channels=args.in_channels,
105 |         num_classes=args.num_classes,
106 |         thumbnail=(args.crop_size < 128),
107 |         pretrained=True,
108 |         pth=args.model_path,
109 |         weights=args.model_weights
110 |     )
111 | 
112 |     val_loader = create_loader(
113 |         args.dataset,
114 |         root=args.data_dir,
115 |         is_training=False,
116 |         batch_size=args.batch_size,
117 |         val_resize_size=args.resize_size,
118 |         val_crop_size=args.crop_size,
119 |         workers=args.workers,
120 |         dali=args.dali,
121 |         dali_cpu=args.dali_cpu
122 |     )
123 | 
124 |     real_evaluator = ImageNet1KRealLabelsEvaluator(
125 |         val_loader.dataset.samples,
126 |         args.real_labels
127 |     ) if args.real_labels else None
128 | 
129 |     validate(val_loader, model, real_evaluator, args)
130 | 


--------------------------------------------------------------------------------