├── .gitignore ├── LICENSE ├── README.md ├── convert.py └── examples ├── barlowtwins ├── BarlowTwins.res50.imagenet.256bs.224size.300e.lin_cls │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py └── BarlowTwins.res50.imagenet.256bs.224size.300e │ ├── barlow_twins.py │ ├── config.py │ └── net.py ├── byol ├── BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars.lin_cls │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py ├── BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars.online_linear │ ├── byol.py │ ├── config.py │ └── net.py └── BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars │ ├── byol.py │ ├── config.py │ └── net.py ├── classification ├── res34.scratch.stl10.96size.150e.new.v2 │ ├── README.md │ ├── SVHN.py │ ├── config.py │ ├── imagenet.py │ ├── net.py │ └── stl10.py ├── res50.scratch.cifar10.32size.200e.v3.epoch_wise │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py ├── res50.scratch.imagenet.224size.100e │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py └── svhn.scratch.stl10.96size.150e.v3 │ ├── README.md │ ├── SVHN.py │ ├── config.py │ ├── imagenet.py │ ├── net.py │ └── stl10.py ├── downstream ├── faster_rcnn │ └── faster_rcnn.res50.fpn.coco.multiscale.1x.syncbn │ │ ├── README.md │ │ ├── config.py │ │ └── net.py └── mask_rcnn │ └── mask_rcnn.res50.fpn.coco.multiscale.1x.syncbn │ ├── README.md │ ├── config.py │ └── net.py ├── moco ├── moco.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2 │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py ├── moco.res50.scratch.imagenet.224size.256bs.200e │ ├── config.py │ ├── moco.py │ └── net.py ├── mocov2.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2 │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py └── mocov2.res50.scratch.imagenet.224size.256bs.200e │ ├── config.py │ ├── moco.py │ └── net.py ├── momentum2teacher ├── m2t.imagenet.mom0.99.224size.100e.lin_cls │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py └── m2t.imagenet.mom0.99.224size.100e │ ├── config.py │ ├── m2_teacher.py │ ├── net.py │ ├── resnet_mbn.py │ └── transforms.py ├── simclr ├── simclr.res50.scratch.imagenet.224size.256bs.200e.lin_cls │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py ├── simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting.lin_clsv2 │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py ├── simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting │ ├── config.py │ ├── loader.py │ ├── net.py │ ├── nt_xent2.py │ └── simclr.py └── simclr.res50.scratch.imagenet.224size.256bs.200e │ ├── config.py │ ├── net.py │ ├── nt_xent2.py │ └── simclr.py ├── simo ├── simo.res50.scratch.imagenet.224size.256bs.200e.lin_cls │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py └── simo.res50.scratch.imagenet.224size.256bs.200e │ ├── config.py │ ├── net.py │ └── simo.py ├── simsiam ├── SimSiam.res18.cifar10.512bs.32size.800e.lin_cls │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py ├── SimSiam.res18.cifar10.512bs.32size.800e │ ├── config.py │ ├── net.py │ └── simsiam.py ├── SimSiam.res50.imagenet.256bs.224size.100e.lin_cls │ ├── README.md │ ├── config.py │ ├── imagenet.py │ └── net.py └── SimSiam.res50.imagenet.256bs.224size.100e │ ├── config.py │ ├── net.py │ └── simsiam.py └── swav ├── swav.res50.imagenet.256bs.2x224_6x96.200e.lars.lin_cls ├── README.md ├── config.py ├── imagenet.py ├── net.py └── swav_resnet.py ├── swav.res50.imagenet.256bs.2x224_6x96.200e.lars ├── config.py ├── net.py ├── swav.py ├── swav_resnet.py └── swav_trainer.py ├── swav.res50.imagenet.256bs.2x224_6x96.200e.lin_cls ├── README.md ├── config.py ├── imagenet.py ├── net.py └── swav_resnet.py └── swav.res50.imagenet.256bs.2x224_6x96.200e ├── config.py ├── net.py ├── swav.py ├── swav_resnet.py └── swav_trainer.py /.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | .vscode/ 3 | 4 | *.jpg 5 | *.png 6 | *.txt 7 | 8 | # compilation and distribution 9 | __pycache__ 10 | _ext 11 | *.pyc 12 | *.so 13 | # 14 | # pytorch/python/numpy formats 15 | *.pth 16 | *.pkl 17 | *.npy 18 | 19 | # ipython/jupyter notebooks 20 | *.ipynb 21 | **/.ipynb_checkpoints/ 22 | 23 | # Editor temporaries 24 | *.swn 25 | *.swo 26 | *.swp 27 | *~ 28 | 29 | # Pycharm editor settings 30 | .idea 31 | 32 | # soft link 33 | **/log 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SelfSup 2 | 3 | Collections of self-supervised methods (MoCo series, SimCLR, **SiMo**, BYOL, SimSiam, SwAV, PointContrast, etc.). 4 | 5 | 6 | ## Get Started 7 | 8 | ### Install cvpods following the instructions. 9 | 10 | Install cvpods from https://github.com/Megvii-BaseDetection/cvpods.git . 11 | 12 | ### Prepare Datasets 13 | 14 | ```shell 15 | cd cvpods 16 | ln -s /path/to/your/ImageNet datasets/imagenet 17 | ``` 18 | 19 | ### Train your own models 20 | 21 | ``` 22 | cd /path/to/your/SelfSup/examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e 23 | # pre-train 24 | pods_train --num-gpus 8 25 | # convert to weights 26 | python convert.py simclr.res50.scratch.imagenet.224size.256bs.200e/log/model_final.pth weights.pkl 27 | # downstream evaluation 28 | cd /path/to/your/simclr.res50.scratch.imagenet.224size.256bs.200e.lin_cls 29 | pods_train --num-gpus 8 MODEL.WEIGHTS /path/to/your/weights.pkl 30 | 31 | ``` 32 | 33 | ## Model Zoo 34 | 35 | ### Supervised Classification 36 | 37 | #### ImageNet 38 | | Methods | Training Schedule | Top 1 Acc | 39 | | ------- | ------ | ------------------ | 40 | | Res50 | 100e | 76.4 | 41 | 42 | #### CIFAR 10 43 | | Methods | Training Schedule | Top 1 Acc | 44 | | ------- | ------ | ------------------ | 45 | | Res50 | 200e | 95.4 | 46 | 47 | #### STL 10 48 | | Methods | Training Schedule | Top 1 Acc | 49 | | ------- | ------ | ------------------ | 50 | | Res50 | 150e | 86.1 | 51 | 52 | 53 | ### Self-Supervised Learning - Classification 54 | 55 | > All results in the below table are trained using resnet-50 and reported on the ILSVRC2012 dataset. 56 | 57 | | Methods | Training Schedule | Batch Size | Our Acc@1 | Official Acc@1 | 58 | | ------- | ------ | ---------- | --------- | -------------- | 59 | | MoCo | 200e | 256 | 60.5 | 60.5 | 60 | | MoCov2 | 200e | 256 | **67.6** | 67.5 | 61 | | SimCLR | 200e | 256 | **63.2** | 61.9 | 62 | | **SimCLR*** | 200e | 256 | **67.3** | **Ours** | 63 | | **SiMo** | 200e | 256 | **68.1** | **Ours** | 64 | | SimSiam | 100e | 256 | 67.6 | 67.7 | 65 | | SwAV | 200e | 256 | **73.0** | 72.7 | 66 | | BYOL | 100e | 2048 | **69.8** | 66.5 (bs4096 from SimSiam paper) | 67 | | BarlowTwins | 300e | 1024 | Comming Soon| 71.7 | 68 | 69 | ### Self-Supervised Learning - Detection (2D) 70 | 71 | > All the results reported below are trained on ILSVRC2012 and evaluated on MS COCO using Faster-RCNN-FPN and resnet-50. 72 | 73 | | Methods | Training Schedule | Batch Size | Box AP | 74 | | ------- | ------ | ---------- | ------------------ | 75 | | SCRL | 200 | 4096 | 39.9 ( official: 40.5 with bs 8192) | 76 | | DetCon | 200 | 256 | Comming Soon. | 77 | 78 | ### Self-Supervised Learning - 3D Scene Understanding 79 | 80 | | Methods | Training Schedule | Downstream task | 81 | | ------------- | ----- | --------------- | 82 | | PointContrast | - | Comming Soon. | 83 | 84 | 85 | ## Citation 86 | 87 | SelfSup is a part of [cvpods](https://github.com/Megvii-BaseDetection/cvpods), so if you find this repo useful in your research, or if you want to refer the implementations in this repo, please consider cite: 88 | 89 | ```BibTeX 90 | 91 | @article{zhu2020eqco, 92 | title={EqCo: Equivalent Rules for Self-supervised Contrastive Learning}, 93 | author={Zhu, Benjin and Huang, Junqiang and Li, Zeming and Zhang, Xiangyu and Sun, Jian}, 94 | journal={arXiv preprint arXiv:2010.01929}, 95 | year={2020} 96 | } 97 | 98 | @misc{zhu2020cvpods, 99 | title={cvpods: All-in-one Toolbox for Computer Vision Research}, 100 | author={Zhu*, Benjin and Wang*, Feng and Wang, Jianfeng and Yang, Siwei and Chen, Jianhu and Li, Zeming}, 101 | year={2020} 102 | } 103 | ``` 104 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import pickle as pkl 5 | import sys 6 | import torch 7 | 8 | if __name__ == "__main__": 9 | input = sys.argv[1] 10 | 11 | obj = torch.load(input, map_location="cpu") 12 | obj = obj["model"] 13 | 14 | newmodel = {} 15 | for k, v in obj.items(): 16 | if not k.startswith("encoder_q.") and not k.startswith("network"): 17 | continue 18 | old_k = k 19 | if k.startswith("encoder_q."): 20 | k = k.replace("encoder_q.", "") 21 | elif k.startswith("network"): 22 | k = k.replace("network.", "") 23 | print(old_k, "->", k) 24 | newmodel[k] = v.numpy() 25 | 26 | res = { 27 | "model": newmodel, 28 | "__author__": "MOCO" if k.startswith("encoder_q.") else "CLS", 29 | "matching_heuristics": True 30 | } 31 | 32 | with open(sys.argv[2], "wb") as f: 33 | pkl.dump(res, f) 34 | -------------------------------------------------------------------------------- /examples/barlowtwins/BarlowTwins.res50.imagenet.256bs.224size.300e.lin_cls/README.md: -------------------------------------------------------------------------------- 1 | # BarlowTwins.res50.imagenet.256bs.224size.200e.lin_cls 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 65.310 | 86.670 | 8 | -------------------------------------------------------------------------------- /examples/barlowtwins/BarlowTwins.res50.imagenet.256bs.224size.300e.lin_cls/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | 8 | MODEL=dict( 9 | WEIGHTS="../BarlowTwins.res50.imagenet.256bs.224size.200e/log/model_final.pkl", 10 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 11 | RESNETS=dict( 12 | DEPTH=50, 13 | NUM_CLASSES=1000, 14 | NORM="BN", 15 | OUT_FEATURES=["res5", "linear"], 16 | STRIDE_IN_1X1=False, 17 | ), 18 | ), 19 | DATASETS=dict( 20 | TRAIN=("imagenet_train", ), 21 | TEST=("imagenet_val", ), 22 | ), 23 | DATALOADER=dict( 24 | NUM_WORKERS=4, 25 | ), 26 | SOLVER=dict( 27 | LR_SCHEDULER=dict( 28 | NAME="WarmupCosineLR", 29 | MAX_EPOCH=100, 30 | WARMUP_ITERS=0, 31 | ), 32 | OPTIMIZER=dict( 33 | NAME="SGD", 34 | BASE_LR=0.3, 35 | MOMENTUM=0.9, 36 | WEIGHT_DECAY=1e-6, 37 | ), 38 | CHECKPOINT_PERIOD=10, 39 | IMS_PER_BATCH=256, 40 | IMS_PER_DEVICE=32, 41 | ), 42 | INPUT=dict( 43 | FORMAT="RGB", 44 | AUG=dict( 45 | TRAIN_PIPELINES=[ 46 | ("Torch_Compose", transforms.Compose([ 47 | transforms.RandomResizedCrop(224), 48 | transforms.RandomHorizontalFlip(), 49 | transforms.ToTensor(), 50 | transforms.Normalize( 51 | mean=[0.485, 0.456, 0.406], 52 | std=[0.229, 0.224, 0.225]), 53 | ])), 54 | ], 55 | TEST_PIPELINES=[ 56 | ("Torch_Compose", transforms.Compose([ 57 | transforms.Resize(256), 58 | transforms.CenterCrop(224), 59 | transforms.ToTensor(), 60 | transforms.Normalize( 61 | mean=[0.485, 0.456, 0.406], 62 | std=[0.229, 0.224, 0.225]), 63 | ])) 64 | ], 65 | ) 66 | ), 67 | TEST=dict( 68 | EVAL_PERIOD=10, 69 | ), 70 | OUTPUT_DIR=osp.join( 71 | '/data/Outputs/model_logs/cvpods_playground/self_supervised', 72 | osp.split(osp.realpath(__file__))[0].split("self_supervised/")[-1] 73 | ) 74 | ) 75 | 76 | 77 | class ClassificationConfig(BaseClassificationConfig): 78 | def __init__(self): 79 | super(ClassificationConfig, self).__init__() 80 | self._register_configuration(_config_dict) 81 | 82 | 83 | config = ClassificationConfig() 84 | -------------------------------------------------------------------------------- /examples/barlowtwins/BarlowTwins.res50.imagenet.256bs.224size.300e.lin_cls/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | 7 | 8 | def accuracy(output, target, topk=(1,)): 9 | """Computes the accuracy over the k top predictions for the specified values of k""" 10 | with torch.no_grad(): 11 | maxk = max(topk) 12 | batch_size = target.size(0) 13 | 14 | _, pred = output.topk(maxk, 1, True, True) 15 | pred = pred.t() 16 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 17 | 18 | res = [] 19 | for k in topk: 20 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 21 | res.append(correct_k.mul_(100.0 / batch_size)) 22 | return res 23 | 24 | 25 | class Classification(nn.Module): 26 | def __init__(self, cfg): 27 | super(Classification, self).__init__() 28 | 29 | self.device = torch.device(cfg.MODEL.DEVICE) 30 | 31 | self.network = cfg.build_backbone( 32 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 33 | 34 | self.freeze() 35 | self.network.eval() 36 | 37 | # init the fc layer 38 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 39 | self.network.linear.bias.data.zero_() 40 | 41 | self.loss_evaluator = nn.CrossEntropyLoss() 42 | 43 | self.to(self.device) 44 | 45 | def freeze(self): 46 | for name, param in self.network.named_parameters(): 47 | if name not in ['linear.weight', 'linear.bias']: 48 | param.requires_grad = False 49 | 50 | def forward(self, batched_inputs): 51 | self.network.eval() 52 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 53 | outputs = self.network(images) 54 | preds = outputs["linear"] 55 | 56 | if self.training: 57 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 58 | losses = self.loss_evaluator(preds, labels) 59 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 60 | 61 | return { 62 | "loss_cls": losses, 63 | "top1_acc": acc1, 64 | "top5_acc": acc5, 65 | } 66 | else: 67 | return preds 68 | -------------------------------------------------------------------------------- /examples/barlowtwins/BarlowTwins.res50.imagenet.256bs.224size.300e.lin_cls/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/barlowtwins/BarlowTwins.res50.imagenet.256bs.224size.300e/barlow_twins.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from cvpods.utils import comm 5 | 6 | from cvpods.layers import ShapeSpec 7 | 8 | 9 | def off_diagonal(x): 10 | # return a flattened view of the off-diagonal elements of a square matrix 11 | n, m = x.shape 12 | assert n == m 13 | return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten() 14 | 15 | 16 | class BarlowTwins(nn.Module): 17 | def __init__(self, cfg): 18 | super().__init__() 19 | 20 | self.device = torch.device(cfg.MODEL.DEVICE) 21 | 22 | self.backbone = cfg.build_backbone( 23 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 24 | self.backbone.linear = nn.Identity() 25 | 26 | self.lambd = cfg.MODEL.BT.LAMBD 27 | self.scale_loss = cfg.MODEL.BT.SCALE_LOSS 28 | 29 | # projector 30 | sizes = [2048] + list(map(int, cfg.MODEL.BT.PROJECTOR.split('-'))) 31 | layers = [] 32 | for i in range(len(sizes) - 2): 33 | layers.append(nn.Linear(sizes[i], sizes[i + 1], bias=False)) 34 | layers.append(nn.BatchNorm1d(sizes[i + 1])) 35 | layers.append(nn.ReLU(inplace=True)) 36 | layers.append(nn.Linear(sizes[-2], sizes[-1], bias=False)) 37 | self.projector = nn.Sequential(*layers) 38 | 39 | # normalization layer for the representations z1 and z2 40 | self.bn = nn.BatchNorm1d(sizes[-1], affine=False) 41 | 42 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1) 43 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1) 44 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 45 | 46 | self.to(self.device) 47 | 48 | def forward(self, batched_inputs): 49 | 50 | cur_bs = len(batched_inputs) 51 | 52 | t_inputs = [bi["t"] for bi in batched_inputs] 53 | p_inputs = [bi["t_prime"] for bi in batched_inputs] 54 | 55 | y1 = self.preprocess_image([bi["image"][0] for bi in t_inputs]) 56 | y2 = self.preprocess_image([bi["image"][0] for bi in p_inputs]) 57 | 58 | z1 = self.projector(self.backbone(y1)["linear"]) 59 | z2 = self.projector(self.backbone(y2)["linear"]) 60 | 61 | # empirical cross-correlation matrix 62 | c = self.bn(z1).T @ self.bn(z2) 63 | 64 | # sum the cross-correlation matrix between all gpus 65 | c.div_(cur_bs * comm.get_world_size()) 66 | torch.distributed.all_reduce(c) 67 | 68 | # use --scale-loss to multiply the loss by a constant factor 69 | # see the Issues section of the readme 70 | on_diag = torch.diagonal(c).add_(-1).pow_(2).sum().mul(self.scale_loss) 71 | off_diag = off_diagonal(c).pow_(2).sum().mul(self.scale_loss) 72 | loss = on_diag + self.lambd * off_diag 73 | return dict(loss=loss) 74 | 75 | def preprocess_image(self, batched_inputs): 76 | """ 77 | Normalize, pad and batch the input images. 78 | """ 79 | # images = [x["image"].float().to(self.device) for x in batched_inputs] 80 | images = [x.float().to(self.device) for x in batched_inputs] 81 | images = torch.stack([self.normalizer(x) for x in images]) 82 | 83 | return images 84 | -------------------------------------------------------------------------------- /examples/barlowtwins/BarlowTwins.res50.imagenet.256bs.224size.300e/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from PIL import Image 5 | 6 | from cvpods.configs.base_classification_config import BaseClassificationConfig 7 | 8 | _config_dict = dict( 9 | MODEL=dict( 10 | WEIGHTS="", 11 | AS_PRETRAIN=True, 12 | RESNETS=dict( 13 | DEPTH=50, 14 | NUM_CLASSES=1000, 15 | NORM="BN", 16 | OUT_FEATURES=["linear"], 17 | STRIDE_IN_1X1=False, # default true for msra models 18 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 19 | ), 20 | BT=dict( 21 | PROJECTOR="8192-8192-8192", 22 | LAMBD=3.9e-3, 23 | SCALE_LOSS=1 / 32, 24 | # LAMBD=0.0051, 25 | # SCALE_LOSS=0.024, 26 | ), 27 | ), 28 | DATASETS=dict( 29 | TRAIN=("imagenet_train", ), 30 | TEST=("imagenet_val", ), 31 | ), 32 | DATALOADER=dict(NUM_WORKERS=6, ), 33 | SOLVER=dict( 34 | LR_SCHEDULER=dict( 35 | NAME="WarmupCosineLR", 36 | MAX_EPOCH=300, 37 | WARMUP_ITERS=10, 38 | ), 39 | OPTIMIZER=dict( 40 | NAME="LARS_SGD", 41 | EPS=1e-8, 42 | TRUST_COEF=1e-3, 43 | CLIP=False, 44 | BASE_LR=0.2, # 0.2 for bs 256 => 4.8 for 4096 45 | MOMENTUM=0.9, 46 | WEIGHT_DECAY=1e-6, 47 | EXCLUDE_BIAS_AND_BN=True, 48 | ), 49 | CHECKPOINT_PERIOD=10, 50 | IMS_PER_BATCH=256, 51 | IMS_PER_DEVICE=32, # 8 gpus per node 52 | BATCH_SUBDIVISIONS=1, # Simulate Batch Size 4096 53 | ), 54 | INPUT=dict( 55 | AUG=dict( 56 | TRAIN_PIPELINES=dict( 57 | t=[ 58 | ("RepeatList", dict(transforms=[ 59 | ("Torch_Compose", transforms.Compose([ 60 | transforms.RandomResizedCrop(224, interpolation=Image.BICUBIC), 61 | transforms.RandomHorizontalFlip(p=0.5), 62 | transforms.RandomApply([ 63 | transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), 64 | transforms.RandomGrayscale(p=0.2), 65 | ])), 66 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=1.0)), 67 | ("RandomSolarization", dict(p=0.0)), 68 | ], repeat_times=1)), 69 | ], 70 | t_prime=[ 71 | ("RepeatList", dict(transforms=[ 72 | ("Torch_Compose", transforms.Compose([ 73 | transforms.RandomResizedCrop(224, interpolation=Image.BICUBIC), 74 | transforms.RandomHorizontalFlip(p=0.5), 75 | transforms.RandomApply([ 76 | transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), 77 | transforms.RandomGrayscale(p=0.2), 78 | ])), 79 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=0.1)), 80 | ("RandomSolarization", dict(p=0.2)), 81 | ], repeat_times=1)), 82 | ], 83 | ) 84 | )), 85 | TRAINER=dict(FP16=dict(ENABLED=False, OPTS=dict(OPT_LEVEL="O1"))), 86 | OUTPUT_DIR=osp.join( 87 | '/data/Outputs/model_logs/cvpods_playground/self_supervised', 88 | osp.split(osp.realpath(__file__))[0].split("self_supervised/")[-1])) 89 | 90 | 91 | class MoCoV2Config(BaseClassificationConfig): 92 | def __init__(self): 93 | super(MoCoV2Config, self).__init__() 94 | self._register_configuration(_config_dict) 95 | 96 | 97 | config = MoCoV2Config() 98 | -------------------------------------------------------------------------------- /examples/barlowtwins/BarlowTwins.res50.imagenet.256bs.224size.300e/net.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from cvpods.utils import comm 8 | 9 | from barlow_twins import BarlowTwins 10 | 11 | 12 | def build_backbone(cfg, input_shape=None): 13 | """ 14 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 15 | 16 | Returns: 17 | an instance of :class:`Backbone` 18 | """ 19 | if input_shape is None: 20 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 21 | 22 | backbone = build_resnet_backbone(cfg, input_shape) 23 | assert isinstance(backbone, Backbone) 24 | return backbone 25 | 26 | 27 | def build_model(cfg): 28 | 29 | cfg.build_backbone = build_backbone 30 | 31 | model = BarlowTwins(cfg) 32 | if comm.get_world_size() > 1: 33 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) 34 | 35 | return model 36 | -------------------------------------------------------------------------------- /examples/byol/BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars.lin_cls/README.md: -------------------------------------------------------------------------------- 1 | # BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars.lin_cls 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 69.782 | 89.434 | 8 | -------------------------------------------------------------------------------- /examples/byol/BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars.lin_cls/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="../BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars/log/model_final.pkl", 9 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5", "linear"], 15 | STRIDE_IN_1X1=False, 16 | ), 17 | ), 18 | DATASETS=dict( 19 | TRAIN=("imagenet_nori_train", ), 20 | TEST=("imagenet_nori_val", ), 21 | ), 22 | DATALOADER=dict( 23 | NUM_WORKERS=6, 24 | ), 25 | SOLVER=dict( 26 | LR_SCHEDULER=dict( 27 | NAME="WarmupCosineLR", 28 | MAX_EPOCH=80, 29 | WARMUP_ITERS=0, 30 | ), 31 | OPTIMIZER=dict( 32 | NAME="SGD", 33 | BASE_LR=0.4, 34 | MOMENTUM=0.9, 35 | WEIGHT_DECAY=0.0, 36 | NESTEROV=True, 37 | ), 38 | CHECKPOINT_PERIOD=10, 39 | IMS_PER_BATCH=256, 40 | ), 41 | INPUT=dict( 42 | AUG=dict( 43 | TRAIN_PIPELINES=[ 44 | ("Torch_RRC", transforms.RandomResizedCrop(224)), 45 | ("Torch_RHF", transforms.RandomHorizontalFlip()), 46 | ], 47 | TEST_PIPELINES=[ 48 | ("Torch_R", transforms.Resize(256)), 49 | ("Torch_CC", transforms.CenterCrop(224)), 50 | ] 51 | ) 52 | ), 53 | TEST=dict( 54 | EVAL_PERIOD=10, 55 | ), 56 | OUTPUT_DIR=osp.join( 57 | '/data/Outputs/model_logs/cvpods_playground', 58 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 59 | ) 60 | ) 61 | 62 | 63 | class ClassificationConfig(BaseClassificationConfig): 64 | def __init__(self): 65 | super(ClassificationConfig, self).__init__() 66 | self._register_configuration(_config_dict) 67 | 68 | 69 | config = ClassificationConfig() 70 | -------------------------------------------------------------------------------- /examples/byol/BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars.lin_cls/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | def __init__(self, cfg): 28 | super(Classification, self).__init__() 29 | 30 | self.device = torch.device(cfg.MODEL.DEVICE) 31 | 32 | self.network = cfg.build_backbone( 33 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 34 | 35 | self.freeze() 36 | self.network.eval() 37 | 38 | # init the fc layer 39 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 40 | self.network.linear.bias.data.zero_() 41 | 42 | self.loss_evaluator = nn.CrossEntropyLoss() 43 | 44 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1) 45 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1) 46 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 47 | 48 | self.to(self.device) 49 | 50 | def freeze(self): 51 | for name, param in self.network.named_parameters(): 52 | if name not in ['linear.weight', 'linear.bias']: 53 | param.requires_grad = False 54 | 55 | def forward(self, batched_inputs): 56 | self.network.eval() 57 | images = self.preprocess_image(batched_inputs) 58 | 59 | outputs = self.network(images.tensor) 60 | preds = outputs["linear"] 61 | 62 | if self.training: 63 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 64 | losses = self.loss_evaluator(preds, labels) 65 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 66 | 67 | return { 68 | "loss_cls": losses, 69 | "top1_acc": acc1, 70 | "top5_acc": acc5, 71 | } 72 | else: 73 | return preds 74 | 75 | def preprocess_image(self, batched_inputs): 76 | """ 77 | Normalize, pad and batch the input images. 78 | """ 79 | images = [x["image"].float().to(self.device) for x in batched_inputs] 80 | images = [self.normalizer(x) for x in images] 81 | images = ImageList.from_tensors(images, self.network.size_divisibility) 82 | return images 83 | -------------------------------------------------------------------------------- /examples/byol/BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars.lin_cls/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone import build_resnet_backbone 4 | 5 | from imagenet import Classification 6 | 7 | def build_backbone(cfg, input_shape=None): 8 | """ 9 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 10 | 11 | Returns: 12 | an instance of :class:`Backbone` 13 | """ 14 | if input_shape is None: 15 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 16 | 17 | backbone = build_resnet_backbone(cfg, input_shape) 18 | assert isinstance(backbone, Backbone) 19 | return backbone 20 | 21 | def build_model(cfg): 22 | 23 | cfg.build_backbone = build_backbone 24 | 25 | model = Classification(cfg) 26 | 27 | return model 28 | -------------------------------------------------------------------------------- /examples/byol/BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars.online_linear/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="", 9 | AS_PRETRAIN=True, 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5"], 15 | STRIDE_IN_1X1=False, # default true for msra models 16 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 17 | ), 18 | BYOL=dict( 19 | BASE_MOMENTUM=0.99, 20 | PROJ_DEPTH=2, 21 | PROJ_DIM=4096, 22 | OUT_DIM=256, 23 | ), 24 | ), 25 | DATASETS=dict( 26 | TRAIN=("imagenet_nori_train", ), 27 | TEST=("imagenet_nori_val", ), 28 | ), 29 | DATALOADER=dict(NUM_WORKERS=6, ), 30 | SOLVER=dict( 31 | LR_SCHEDULER=dict( 32 | NAME="WarmupCosineLR", 33 | MAX_EPOCH=100, 34 | WARMUP_ITERS=5, 35 | ), 36 | OPTIMIZER=dict( 37 | NAME="LARS_SGD", 38 | EPS=1e-8, 39 | TRUST_COEF=1e-3, 40 | CLIP=False, 41 | # _LR_PRESETS = {40: 0.45, 100: 0.45, 300: 0.3, 1000: 0.2} 42 | # _WD_PRESETS = {40: 1e-6, 100: 1e-6, 300: 1e-6, 1000: 1.5e-6} 43 | # _EMA_PRESETS = {40: 0.97, 100: 0.99, 300: 0.99, 1000: 0.996} 44 | BASE_LR=0.45 * 4, # 0.3 for bs 256 => 4.8 for 4096 45 | MOMENTUM=0.9, 46 | WEIGHT_DECAY=1e-6, 47 | WD_EXCLUDE_BN_BIAS=True, 48 | ), 49 | CHECKPOINT_PERIOD=10, 50 | IMS_PER_BATCH=1024, 51 | IMS_PER_DEVICE=128, # 8 gpus per node 52 | BATCH_SUBDIVISIONS=1, # Simulate Batch Size 4096 53 | ), 54 | INPUT=dict( 55 | AUG=dict( 56 | TRAIN_PIPELINES=dict( 57 | q=[ 58 | ("RepeatList", dict(transforms=[ 59 | ("Torch_Compose", transforms.Compose([ 60 | transforms.RandomResizedCrop(224), 61 | transforms.RandomHorizontalFlip(), 62 | transforms.RandomApply([ 63 | transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), 64 | transforms.RandomGrayscale(p=0.2), 65 | ])), 66 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=1.0)), 67 | ("RandomSolarization", dict(p=0.0)), 68 | ("Torch_Compose", transforms.Compose([ 69 | transforms.ToTensor(), 70 | transforms.Normalize( 71 | mean=[0.485, 0.456, 0.406], 72 | std=[0.229, 0.224, 0.225]) 73 | ])), 74 | ], repeat_times=1)), 75 | ], 76 | k=[ 77 | ("RepeatList", dict(transforms=[ 78 | ("Torch_Compose", transforms.Compose([ 79 | transforms.RandomResizedCrop(224, scale=(0.08, 1.)), 80 | transforms.RandomHorizontalFlip(), 81 | transforms.RandomApply([ 82 | transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), 83 | transforms.RandomGrayscale(p=0.2), 84 | ])), 85 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=0.1)), 86 | ("RandomSolarization", dict(p=0.2)), 87 | ("Torch_Compose", transforms.Compose([ 88 | transforms.ToTensor(), 89 | transforms.Normalize( 90 | mean=[0.485, 0.456, 0.406], 91 | std=[0.229, 0.224, 0.225]) 92 | ])), 93 | ], repeat_times=1)), 94 | ], 95 | linear=[ 96 | ("Torch_Compose", transforms.Compose([ 97 | transforms.RandomResizedCrop(224), 98 | transforms.RandomHorizontalFlip(), 99 | transforms.ToTensor(), 100 | transforms.Normalize( 101 | mean=[0.485, 0.456, 0.406], 102 | std=[0.229, 0.224, 0.225] 103 | ), 104 | ])), 105 | ], 106 | ) 107 | )), 108 | TRAINER=dict(FP16=dict(ENABLED=False, OPTS=dict(OPT_LEVEL="O1"))), 109 | OUTPUT_DIR=osp.join( 110 | '/data/Outputs/model_logs/cvpods_playground', 111 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1])) 112 | 113 | 114 | class MoCoV2Config(BaseClassificationConfig): 115 | def __init__(self): 116 | super(MoCoV2Config, self).__init__() 117 | self._register_configuration(_config_dict) 118 | 119 | 120 | config = MoCoV2Config() 121 | -------------------------------------------------------------------------------- /examples/byol/BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars.online_linear/net.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from byol import BYOL 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_resnet_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_model(cfg): 26 | 27 | cfg.build_backbone = build_backbone 28 | 29 | model = BYOL(cfg) 30 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) 31 | 32 | return model 33 | -------------------------------------------------------------------------------- /examples/byol/BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars/byol.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | from torch.nn import functional as F 6 | 7 | from cvpods.layers import ShapeSpec 8 | 9 | 10 | class EncoderWithProjection(nn.Module): 11 | def __init__(self, cfg): 12 | super(EncoderWithProjection, self).__init__() 13 | self.proj_dim = cfg.MODEL.BYOL.PROJ_DIM 14 | self.out_dim = cfg.MODEL.BYOL.OUT_DIM 15 | 16 | self.encoder = cfg.build_backbone( 17 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 18 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 19 | 20 | self.projector = nn.Sequential( 21 | nn.Linear(2048, self.proj_dim), 22 | nn.BatchNorm1d(self.proj_dim), 23 | nn.ReLU(), 24 | nn.Linear(self.proj_dim, self.out_dim, bias=False), 25 | ) 26 | 27 | def forward(self, x): 28 | embedding = torch.flatten(self.avgpool(self.encoder(x)["res5"]), 1) 29 | return self.projector(embedding) 30 | 31 | 32 | class BYOL(nn.Module): 33 | def __init__(self, cfg): 34 | super(BYOL, self).__init__() 35 | 36 | self.device = torch.device(cfg.MODEL.DEVICE) 37 | self.base_mom = cfg.MODEL.BYOL.BASE_MOMENTUM 38 | self.total_steps = cfg.SOLVER.LR_SCHEDULER.MAX_ITER * cfg.SOLVER.BATCH_SUBDIVISIONS 39 | 40 | self.online_network = EncoderWithProjection(cfg) 41 | self.target_network = EncoderWithProjection(cfg) 42 | 43 | self.size_divisibility = self.online_network.encoder.size_divisibility 44 | 45 | self.predictor = nn.Sequential( 46 | nn.Linear(self.online_network.out_dim, self.online_network.proj_dim), 47 | nn.BatchNorm1d(self.online_network.proj_dim), 48 | nn.ReLU(), 49 | nn.Linear(self.online_network.proj_dim, self.online_network.out_dim, bias=False), 50 | ) 51 | 52 | for param_q, param_k in zip(self.online_network.parameters(), self.target_network.parameters()): 53 | param_k.data.copy_(param_q.data) # initialize 54 | param_k.requires_grad = False # not update by gradient 55 | 56 | self.register_parameter("step", nn.Parameter(torch.zeros(1), requires_grad=False)) 57 | self.register_parameter("mom", nn.Parameter(torch.zeros(1), requires_grad=False)) 58 | 59 | self.to(self.device) 60 | 61 | def losses(self, preds, targets): 62 | bz = preds.size(0) 63 | preds_norm = F.normalize(preds, dim=1) 64 | targets_norm = F.normalize(targets, dim=1) 65 | loss = 2 - 2 * (preds_norm * targets_norm).sum() / bz 66 | return loss 67 | 68 | def update_mom(self): 69 | mom = 1 - (1 - self.base_mom) * (math.cos(math.pi * self.step.item() / self.total_steps) + 1) / 2. 70 | self.step += 1 71 | return mom 72 | 73 | @torch.no_grad() 74 | def _momentum_update_key_encoder(self): 75 | """ 76 | Momentum update of the key encoder 77 | """ 78 | mom = self.update_mom() 79 | self.mom[0] = mom 80 | for param_q, param_k in zip(self.online_network.parameters(), self.target_network.parameters()): 81 | param_k.data = param_k.data * mom + param_q.data * (1. - mom) 82 | 83 | def forward(self, batched_inputs): 84 | """ 85 | Input: 86 | im_q: a batch of query images 87 | im_k: a batch of key images 88 | Output: 89 | logits, targets 90 | """ 91 | q_inputs = [bi["q"] for bi in batched_inputs] 92 | k_inputs = [bi["k"] for bi in batched_inputs] 93 | 94 | x_i = torch.stack([bi["image"][0] for bi in q_inputs]).to(self.device) 95 | x_j = torch.stack([bi["image"][0] for bi in k_inputs]).to(self.device) 96 | 97 | online_out_1 = self.predictor(self.online_network(x_i)) 98 | online_out_2 = self.predictor(self.online_network(x_j)) 99 | 100 | with torch.no_grad(): 101 | self._momentum_update_key_encoder() 102 | 103 | target_out_1 = self.target_network(x_i) 104 | target_out_2 = self.target_network(x_j) 105 | 106 | loss_i = self.losses(online_out_1, target_out_2) 107 | loss_j = self.losses(online_out_2, target_out_1) 108 | 109 | return { 110 | "loss_i": loss_i, 111 | "loss_j": loss_j, 112 | "mom": self.mom, 113 | } 114 | -------------------------------------------------------------------------------- /examples/byol/BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="", 9 | AS_PRETRAIN=True, 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5"], 15 | STRIDE_IN_1X1=False, # default true for msra models 16 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 17 | ), 18 | BYOL=dict( 19 | BASE_MOMENTUM=0.99, 20 | PROJ_DEPTH=2, 21 | PROJ_DIM=4096, 22 | OUT_DIM=256, 23 | ), 24 | ), 25 | DATASETS=dict( 26 | TRAIN=("imagenet_nori_train", ), 27 | TEST=("imagenet_nori_val", ), 28 | ), 29 | DATALOADER=dict(NUM_WORKERS=6, ), 30 | SOLVER=dict( 31 | LR_SCHEDULER=dict( 32 | NAME="WarmupCosineLR", 33 | MAX_EPOCH=100, 34 | WARMUP_ITERS=5, 35 | ), 36 | OPTIMIZER=dict( 37 | NAME="LARS_SGD", 38 | EPS=1e-8, 39 | TRUST_COEF=1e-3, 40 | CLIP=False, 41 | # _LR_PRESETS = {40: 0.45, 100: 0.45, 300: 0.3, 1000: 0.2} 42 | # _WD_PRESETS = {40: 1e-6, 100: 1e-6, 300: 1e-6, 1000: 1.5e-6} 43 | # _EMA_PRESETS = {40: 0.97, 100: 0.99, 300: 0.99, 1000: 0.996} 44 | BASE_LR=0.45 * 4, # 0.3 for bs 256 => 4.8 for 4096 45 | MOMENTUM=0.9, 46 | WEIGHT_DECAY=1e-6, 47 | WD_EXCLUDE_BN_BIAS=True, 48 | ), 49 | CHECKPOINT_PERIOD=10, 50 | IMS_PER_BATCH=1024, 51 | IMS_PER_DEVICE=128, # 8 gpus per node 52 | BATCH_SUBDIVISIONS=1, # Simulate Batch Size 4096 53 | ), 54 | INPUT=dict( 55 | AUG=dict( 56 | TRAIN_PIPELINES=dict( 57 | q=[ 58 | ("RepeatList", dict(transforms=[ 59 | ("Torch_Compose", transforms.Compose([ 60 | transforms.RandomResizedCrop(224), 61 | transforms.RandomHorizontalFlip(), 62 | transforms.RandomApply([ 63 | transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), 64 | transforms.RandomGrayscale(p=0.2), 65 | ])), 66 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=1.0)), 67 | ("RandomSolarization", dict(p=0.0)), 68 | ("Torch_Compose", transforms.Compose([ 69 | transforms.ToTensor(), 70 | transforms.Normalize( 71 | mean=[0.485, 0.456, 0.406], 72 | std=[0.229, 0.224, 0.225]) 73 | ])), 74 | ], repeat_times=1)), 75 | ], 76 | k=[ 77 | ("RepeatList", dict(transforms=[ 78 | ("Torch_Compose", transforms.Compose([ 79 | transforms.RandomResizedCrop(224, scale=(0.08, 1.)), 80 | transforms.RandomHorizontalFlip(), 81 | transforms.RandomApply([ 82 | transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), 83 | transforms.RandomGrayscale(p=0.2), 84 | ])), 85 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=0.1)), 86 | ("RandomSolarization", dict(p=0.2)), 87 | ("Torch_Compose", transforms.Compose([ 88 | transforms.ToTensor(), 89 | transforms.Normalize( 90 | mean=[0.485, 0.456, 0.406], 91 | std=[0.229, 0.224, 0.225]) 92 | ])), 93 | ], repeat_times=1)), 94 | ], 95 | ) 96 | )), 97 | TRAINER=dict(FP16=dict(ENABLED=False, OPTS=dict(OPT_LEVEL="O1"))), 98 | OUTPUT_DIR=osp.join( 99 | '/data/Outputs/model_logs/cvpods_playground', 100 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1])) 101 | 102 | 103 | class MoCoV2Config(BaseClassificationConfig): 104 | def __init__(self): 105 | super(MoCoV2Config, self).__init__() 106 | self._register_configuration(_config_dict) 107 | 108 | 109 | config = MoCoV2Config() 110 | -------------------------------------------------------------------------------- /examples/byol/BYOL.res50.imagenet.1024bsx2nodes.224size.100e.lars/net.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from byol import BYOL 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_resnet_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_model(cfg): 26 | 27 | cfg.build_backbone = build_backbone 28 | 29 | model = BYOL(cfg) 30 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) 31 | 32 | return model 33 | -------------------------------------------------------------------------------- /examples/classification/res34.scratch.stl10.96size.150e.new.v2/README.md: -------------------------------------------------------------------------------- 1 | # res34.scratch.stl10.96size.150e.new.v2 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 87.175 | 99.162 | 8 | -------------------------------------------------------------------------------- /examples/classification/res34.scratch.stl10.96size.150e.new.v2/SVHN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | import os 5 | from collections import OrderedDict 6 | 7 | model_urls = { 8 | 'stl10': 'http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/stl10-866321e9.pth', 9 | } 10 | 11 | class SVHN(nn.Module): 12 | def __init__(self, features, n_channel, num_classes): 13 | super(SVHN, self).__init__() 14 | assert isinstance(features, nn.Sequential), type(features) 15 | self.features = features 16 | self.classifier = nn.Sequential( 17 | nn.Linear(n_channel, num_classes) 18 | ) 19 | 20 | def forward(self, x): 21 | x = self.features(x) 22 | x = x.view(x.size(0), -1) 23 | x = self.classifier(x) 24 | return x 25 | 26 | def make_layers(cfg, batch_norm=False): 27 | layers = [] 28 | in_channels = 3 29 | for i, v in enumerate(cfg): 30 | if v == 'M': 31 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 32 | else: 33 | padding = v[1] if isinstance(v, tuple) else 1 34 | out_channels = v[0] if isinstance(v, tuple) else v 35 | conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=padding) 36 | if batch_norm: 37 | layers += [conv2d, nn.BatchNorm2d(out_channels, affine=False), nn.ReLU()] 38 | else: 39 | layers += [conv2d, nn.ReLU()] 40 | in_channels = out_channels 41 | return nn.Sequential(*layers) 42 | 43 | def stl10(n_channel, pretrained=None): 44 | cfg = [ 45 | n_channel, 'M', 46 | 2*n_channel, 'M', 47 | 4*n_channel, 'M', 48 | 4*n_channel, 'M', 49 | (8*n_channel, 0), (8*n_channel, 0), 'M' 50 | ] 51 | layers = make_layers(cfg, batch_norm=True) 52 | model = SVHN(layers, n_channel=8*n_channel, num_classes=10) 53 | if pretrained is not None: 54 | m = model_zoo.load_url(model_urls['stl10']) 55 | state_dict = m.state_dict() if isinstance(m, nn.Module) else m 56 | assert isinstance(state_dict, (dict, OrderedDict)), type(state_dict) 57 | model.load_state_dict(state_dict) 58 | return model 59 | -------------------------------------------------------------------------------- /examples/classification/res34.scratch.stl10.96size.150e.new.v2/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | import torchvision.transforms as transforms 4 | 5 | from cvpods.configs.base_classification_config import BaseClassificationConfig 6 | 7 | _config_dict = dict( 8 | MODEL=dict( 9 | WEIGHTS="", 10 | PIXEL_MEAN=[x/255.0 for x in [113.9, 123.0, 125.3]], # BGR 11 | PIXEL_STD=[x/255.0 for x in [66.7, 62.1, 63.0]], 12 | AS_PRETRAIN=True, # Automatically convert ckpt to pretrain pkl 13 | RESNETS=dict( 14 | DEPTH=34, 15 | RES2_OUT_CHANNELS=64, 16 | NUM_CLASSES=10, 17 | STRIDE_IN_1X1=False, # default true for msra models 18 | NORM="BN", 19 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 20 | OUT_FEATURES=["linear"], 21 | ), 22 | ), 23 | DATASETS=dict( 24 | TRAIN=("stl10_train", ), 25 | TEST=("stl10_test", ), 26 | ), 27 | DATALOADER=dict( 28 | NUM_WORKERS=2, 29 | ), 30 | SOLVER=dict( 31 | LR_SCHEDULER=dict( 32 | NAME="WarmupMultiStepLR", 33 | STEPS=(100, 125), 34 | MAX_EPOCH=150, 35 | WARMUP_ITERS=0, 36 | # EPOCH_WISE=False, 37 | ), 38 | OPTIMIZER=dict( 39 | NAME="SGD", 40 | BASE_LR=0.1, 41 | MOMENTUM=0.9, 42 | WEIGHT_DECAY=5e-4, 43 | WEIGHT_DECAY_NORM=5e-4, 44 | NESTEROV=True, 45 | ), 46 | CHECKPOINT_PERIOD=50, 47 | IMS_PER_BATCH=128, 48 | IMS_PER_DEVICE=16, 49 | ), 50 | INPUT=dict( 51 | AUG=dict( 52 | TRAIN_PIPELINES=[ 53 | ("Torch_RRC", transforms.RandomCrop(96, padding=4)), 54 | ("Torch_RHF", transforms.RandomHorizontalFlip()), 55 | ], 56 | TEST_PIPELINES=[ 57 | ] 58 | ) 59 | ), 60 | TEST=dict( 61 | EVAL_PERIOD=50, 62 | ), 63 | OUTPUT_DIR=osp.join( 64 | '/data/Outputs/model_logs/cvpods_playground', 65 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 66 | ) 67 | 68 | 69 | class ClassificationConfig(BaseClassificationConfig): 70 | def __init__(self): 71 | super(ClassificationConfig, self).__init__() 72 | self._register_configuration(_config_dict) 73 | 74 | 75 | config = ClassificationConfig() 76 | -------------------------------------------------------------------------------- /examples/classification/res34.scratch.stl10.96size.150e.new.v2/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec, Conv2d, get_norm 6 | from cvpods.structures import ImageList 7 | 8 | from SVHN import stl10 9 | 10 | 11 | def accuracy(output, target, topk=(1,)): 12 | """Computes the accuracy over the k top predictions for the specified values of k""" 13 | with torch.no_grad(): 14 | maxk = max(topk) 15 | batch_size = target.size(0) 16 | 17 | _, pred = output.topk(maxk, 1, True, True) 18 | pred = pred.t() 19 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 20 | 21 | res = [] 22 | for k in topk: 23 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 24 | res.append(correct_k.mul_(100.0 / batch_size)) 25 | return res 26 | 27 | 28 | class Classification(nn.Module): 29 | def __init__(self, cfg): 30 | super(Classification, self).__init__() 31 | 32 | self.device = torch.device(cfg.MODEL.DEVICE) 33 | 34 | # self.network = stl10(n_channel=32) 35 | # self.network.size_divisibility = 1 36 | 37 | self.network = cfg.build_backbone( 38 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 39 | 40 | self.network.stem = nn.Sequential( 41 | Conv2d( 42 | 3, 43 | 64, 44 | kernel_size=3, 45 | stride=1, 46 | padding=1, 47 | bias=False, 48 | norm=get_norm("BN", 64) 49 | ), 50 | nn.ReLU(), 51 | ) 52 | 53 | self.loss_evaluator = nn.CrossEntropyLoss() 54 | 55 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 56 | 3, 1, 1) 57 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 58 | 3, 1, 1) 59 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 60 | 61 | self.to(self.device) 62 | 63 | def forward(self, batched_inputs): 64 | images = self.preprocess_image(batched_inputs) 65 | 66 | preds = self.network(images.tensor)["linear"] 67 | 68 | if self.training: 69 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 70 | losses = self.loss_evaluator(preds, labels) 71 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 72 | 73 | return { 74 | "loss_cls": losses, 75 | "Acc@1": acc1, 76 | "Acc@5": acc5, 77 | } 78 | else: 79 | return preds 80 | 81 | def preprocess_image(self, batched_inputs): 82 | """ 83 | Normalize, pad and batch the input images. 84 | """ 85 | images = [x["image"].float().to(self.device) for x in batched_inputs] 86 | images = [self.normalizer(x.div(255)) for x in images] 87 | images = ImageList.from_tensors(images, self.network.size_divisibility) 88 | return images 89 | -------------------------------------------------------------------------------- /examples/classification/res34.scratch.stl10.96size.150e.new.v2/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from stl10 import * 3 | 4 | from cvpods.layers import ShapeSpec 5 | from cvpods.modeling.backbone import Backbone 6 | from cvpods.modeling.backbone import build_resnet_backbone 7 | 8 | from imagenet import Classification 9 | 10 | 11 | def build_backbone(cfg, input_shape=None): 12 | """ 13 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 14 | 15 | Returns: 16 | an instance of :class:`Backbone` 17 | """ 18 | if input_shape is None: 19 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 20 | 21 | backbone = build_resnet_backbone(cfg, input_shape) 22 | assert isinstance(backbone, Backbone) 23 | return backbone 24 | 25 | 26 | def build_model(cfg): 27 | 28 | cfg.build_backbone = build_backbone 29 | 30 | model = Classification(cfg) 31 | 32 | logger = logging.getLogger(__name__) 33 | logger.info("Model:\n{}".format(model)) 34 | return model 35 | -------------------------------------------------------------------------------- /examples/classification/res34.scratch.stl10.96size.150e.new.v2/stl10.py: -------------------------------------------------------------------------------- 1 | import math 2 | from copy import deepcopy 3 | import torch 4 | import torch.nn as nn 5 | 6 | from torch.nn import functional as F 7 | 8 | import cvpods 9 | from cvpods.data.registry import DATASETS, PATH_ROUTES 10 | from torchvision import datasets 11 | 12 | import os.path as osp 13 | import numpy as np 14 | 15 | 16 | _PREDEFINED_SPLITS_STL10 = { 17 | "dataset_type": "STLDatasets", 18 | "evaluator_type": {"stl10": "classification"}, 19 | "stl10": { 20 | "stl10_train": ("stl10", "train"), 21 | "stl10_unlabeled": ("stl10", "unlabeled"), 22 | "stl10_test": ("stl10", "test"), 23 | }, 24 | } 25 | PATH_ROUTES.register(_PREDEFINED_SPLITS_STL10, "STL10") 26 | 27 | 28 | @DATASETS.register() 29 | class STLDatasets(datasets.STL10): 30 | def __init__(self, cfg, dataset_name, transforms=[], is_train=True): 31 | 32 | self.meta = {"evaluator_type": "classification"} 33 | image_root, split = _PREDEFINED_SPLITS_STL10["stl10"][dataset_name] 34 | self.data_root = osp.join(osp.split(osp.split(cvpods.__file__)[0])[0], "datasets") 35 | self.image_root = osp.join(self.data_root, image_root) 36 | super(STLDatasets, self).__init__(self.image_root, split=split, download=True, transform=None) 37 | self.aspect_ratios = np.zeros(len(self), dtype=np.uint8) 38 | self.transforms = transforms 39 | self.is_train = is_train 40 | 41 | def _apply_transforms(self, image, annotations=None): 42 | 43 | if isinstance(self.transforms, dict): 44 | dataset_dict = {} 45 | for key, tfms in self.transforms.items(): 46 | img = deepcopy(image) 47 | annos = deepcopy(annotations) 48 | for tfm in tfms: 49 | img, annos = tfm(img) 50 | dataset_dict[key] = (img, annos) 51 | return dataset_dict, None 52 | else: 53 | for tfm in self.transforms: 54 | image, annos = tfm(image) 55 | 56 | return image, annotations 57 | 58 | def __getitem__(self, index): 59 | image, annotations = super().__getitem__(index) 60 | dataset_dict = {"image_id": index, "category_id": annotations} 61 | 62 | image = image.convert("RGB") 63 | image = np.asarray(image) 64 | image = image[:, :, ::-1] 65 | images, anno = self._apply_transforms(image, annotations) 66 | 67 | def process(dd, img): 68 | 69 | if len(img.shape) == 3: 70 | image_shape = img.shape[:2] # h, w 71 | dd["image"] = torch.as_tensor(np.ascontiguousarray(img.transpose(2, 0, 1))) 72 | elif len(img.shape) == 4: 73 | image_shape = img.shape[1:3] 74 | # NHWC -> NCHW 75 | dd["image"] = torch.as_tensor(np.ascontiguousarray(img.transpose(0, 3, 1, 2))) 76 | 77 | return dd 78 | 79 | if isinstance(images, dict): 80 | ret = {} 81 | # multiple input pipelines 82 | for desc, item in images.items(): 83 | img, anno = item 84 | ret[desc] = process(deepcopy(dataset_dict), img) 85 | return ret 86 | else: 87 | return process(dataset_dict, images) 88 | -------------------------------------------------------------------------------- /examples/classification/res50.scratch.cifar10.32size.200e.v3.epoch_wise/README.md: -------------------------------------------------------------------------------- 1 | # res50.scratch.cifar10.32size.200e.v3.epoch_wise 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 95.260 | 99.890 | 8 | -------------------------------------------------------------------------------- /examples/classification/res50.scratch.cifar10.32size.200e.v3.epoch_wise/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | import torchvision.transforms as transforms 4 | 5 | from cvpods.configs.base_classification_config import BaseClassificationConfig 6 | 7 | _config_dict = dict( 8 | MODEL=dict( 9 | WEIGHTS="", 10 | PIXEL_MEAN=[0.4465, 0.4822, 0.4914], # BGR 11 | PIXEL_STD=[0.2010, 0.1994, 0.2023], 12 | AS_PRETRAIN=True, # Automatically convert ckpt to pretrain pkl 13 | RESNETS=dict( 14 | DEPTH=50, 15 | NUM_CLASSES=10, 16 | STRIDE_IN_1X1=False, # default true for msra models 17 | NORM="BN", 18 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 19 | OUT_FEATURES=["linear"], 20 | ), 21 | ), 22 | DATASETS=dict( 23 | TRAIN=("cifar10_train", ), 24 | TEST=("cifar10_test", ), 25 | ), 26 | DATALOADER=dict( 27 | NUM_WORKERS=6, 28 | ), 29 | SOLVER=dict( 30 | LR_SCHEDULER=dict( 31 | # NAME="CosineAnnealingLR", 32 | NAME="WarmupCosineLR", 33 | MAX_EPOCH=200, 34 | WARMUP_ITERS=0, 35 | EPOCH_WISE=True, 36 | ), 37 | OPTIMIZER=dict( 38 | NAME="SGD", 39 | BASE_LR=0.1, 40 | MOMENTUM=0.9, 41 | WEIGHT_DECAY=1e-4, 42 | ), 43 | CHECKPOINT_PERIOD=50, 44 | IMS_PER_BATCH=128, 45 | IMS_PER_DEVICE=16, 46 | ), 47 | INPUT=dict( 48 | AUG=dict( 49 | TRAIN_PIPELINES=[ 50 | ("Torch_RRC", transforms.RandomCrop(32, padding=4)), 51 | ("Torch_RHF", transforms.RandomHorizontalFlip()), 52 | ], 53 | TEST_PIPELINES=[ 54 | ] 55 | ) 56 | ), 57 | TEST=dict( 58 | EVAL_PERIOD=50, 59 | ), 60 | OUTPUT_DIR=osp.join( 61 | '/data/Outputs/model_logs/cvpods_playground', 62 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 63 | ) 64 | 65 | 66 | class ClassificationConfig(BaseClassificationConfig): 67 | def __init__(self): 68 | super(ClassificationConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = ClassificationConfig() 73 | -------------------------------------------------------------------------------- /examples/classification/res50.scratch.cifar10.32size.200e.v3.epoch_wise/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec, Conv2d, get_norm 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | def __init__(self, cfg): 28 | super(Classification, self).__init__() 29 | 30 | self.device = torch.device(cfg.MODEL.DEVICE) 31 | 32 | self.network = cfg.build_backbone( 33 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 34 | 35 | self.network.stem = nn.Sequential( 36 | Conv2d( 37 | 3, 38 | 64, 39 | kernel_size=3, 40 | stride=1, 41 | padding=1, 42 | bias=False, 43 | norm=get_norm("BN", 64) 44 | ), 45 | nn.ReLU(), 46 | ) 47 | 48 | self.loss_evaluator = nn.CrossEntropyLoss() 49 | 50 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 51 | 3, 1, 1) 52 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 53 | 3, 1, 1) 54 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 55 | 56 | self.to(self.device) 57 | 58 | def forward(self, batched_inputs): 59 | images = self.preprocess_image(batched_inputs) 60 | 61 | preds = self.network(images.tensor)["linear"] 62 | 63 | if self.training: 64 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 65 | losses = self.loss_evaluator(preds, labels) 66 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 67 | 68 | return { 69 | "loss_cls": losses, 70 | "Acc@1": acc1, 71 | "Acc@5": acc5, 72 | } 73 | else: 74 | return preds 75 | 76 | def preprocess_image(self, batched_inputs): 77 | """ 78 | Normalize, pad and batch the input images. 79 | """ 80 | images = [x["image"].float().to(self.device) for x in batched_inputs] 81 | images = [self.normalizer(x.div(255)) for x in images] 82 | images = ImageList.from_tensors(images, self.network.size_divisibility) 83 | return images 84 | -------------------------------------------------------------------------------- /examples/classification/res50.scratch.cifar10.32size.200e.v3.epoch_wise/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/classification/res50.scratch.imagenet.224size.100e/README.md: -------------------------------------------------------------------------------- 1 | # res50.scratch.imagenet.224size.100e 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 76.352 | 93.050 | 8 | -------------------------------------------------------------------------------- /examples/classification/res50.scratch.imagenet.224size.100e/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | import torchvision.transforms as transforms 4 | 5 | from cvpods.configs.base_classification_config import BaseClassificationConfig 6 | 7 | _config_dict = dict( 8 | MODEL=dict( 9 | WEIGHTS="", 10 | AS_PRETRAIN=True, # Automatically convert ckpt to pretrain pkl 11 | RESNETS=dict( 12 | DEPTH=50, 13 | NUM_CLASSES=1000, 14 | STRIDE_IN_1X1=False, # default true for msra models 15 | NORM="BN", 16 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 17 | OUT_FEATURES=["linear"], 18 | ), 19 | ), 20 | DATASETS=dict( 21 | TRAIN=("imagenet_train", ), 22 | TEST=("imagenet_val", ), 23 | ), 24 | DATALOADER=dict( 25 | NUM_WORKERS=6, 26 | ), 27 | SOLVER=dict( 28 | LR_SCHEDULER=dict( 29 | STEPS=(30, 60, 90), 30 | MAX_EPOCH=100, 31 | WARMUP_ITERS=10, 32 | ), 33 | OPTIMIZER=dict( 34 | BASE_LR=0.1, 35 | WEIGHT_DECAY=0.0001, 36 | WEIGHT_DECAY_NORM=0.0, 37 | ), 38 | CHECKPOINT_PERIOD=10, 39 | IMS_PER_BATCH=256, 40 | ), 41 | INPUT=dict( 42 | AUG=dict( 43 | TRAIN_PIPELINES=[ 44 | ("Torch_Compose", transforms.Compose([ 45 | transforms.RandomResizedCrop(224), 46 | transforms.RandomHorizontalFlip(), 47 | transforms.ToTensor(), 48 | transforms.Normalize( 49 | mean=[0.485, 0.456, 0.406], 50 | std=[0.229, 0.224, 0.225]) 51 | ]) 52 | ), 53 | ], 54 | TEST_PIPELINES=[ 55 | ("Torch_Compose", transforms.Compose([ 56 | transforms.Resize(256), 57 | transforms.CenterCrop(224), 58 | transforms.ToTensor(), 59 | transforms.Normalize( 60 | mean=[0.485, 0.456, 0.406], 61 | std=[0.229, 0.224, 0.225]) 62 | ]) 63 | ), 64 | ], 65 | ) 66 | ), 67 | TEST=dict( 68 | EVAL_PERIOD=10, 69 | ), 70 | OUTPUT_DIR=osp.join( 71 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 72 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1]), 73 | ) 74 | 75 | 76 | class ClassificationConfig(BaseClassificationConfig): 77 | def __init__(self): 78 | super(ClassificationConfig, self).__init__() 79 | self._register_configuration(_config_dict) 80 | 81 | 82 | config = ClassificationConfig() 83 | -------------------------------------------------------------------------------- /examples/classification/res50.scratch.imagenet.224size.100e/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | def __init__(self, cfg): 28 | super(Classification, self).__init__() 29 | 30 | self.device = torch.device(cfg.MODEL.DEVICE) 31 | 32 | self.network = cfg.build_backbone( 33 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 34 | self.loss_evaluator = nn.CrossEntropyLoss() 35 | 36 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 37 | 1, 3, 1, 1) 38 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 39 | 1, 3, 1, 1) 40 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 41 | 42 | self.to(self.device) 43 | 44 | def forward(self, batched_inputs): 45 | images = self.preprocess_image(batched_inputs) 46 | preds = self.network(images)["linear"] 47 | 48 | if self.training: 49 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 50 | losses = self.loss_evaluator(preds, labels) 51 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 52 | 53 | return { 54 | "loss_cls": losses, 55 | "Acc@1": acc1, 56 | "Acc@5": acc5, 57 | } 58 | else: 59 | return preds 60 | 61 | def preprocess_image(self, batched_inputs): 62 | """ 63 | Normalize, pad and batch the input images. 64 | """ 65 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 66 | return images 67 | -------------------------------------------------------------------------------- /examples/classification/res50.scratch.imagenet.224size.100e/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/classification/svhn.scratch.stl10.96size.150e.v3/README.md: -------------------------------------------------------------------------------- 1 | # svhn.scratch.stl10.96size.150e.v3 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 75.013 | 97.363 | 8 | -------------------------------------------------------------------------------- /examples/classification/svhn.scratch.stl10.96size.150e.v3/SVHN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | import os 5 | from collections import OrderedDict 6 | 7 | model_urls = { 8 | 'stl10': 'http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/stl10-866321e9.pth', 9 | } 10 | 11 | class SVHN(nn.Module): 12 | def __init__(self, features, n_channel, num_classes): 13 | super(SVHN, self).__init__() 14 | assert isinstance(features, nn.Sequential), type(features) 15 | self.features = features 16 | self.classifier = nn.Sequential( 17 | nn.Linear(n_channel, num_classes) 18 | ) 19 | 20 | def forward(self, x): 21 | x = self.features(x) 22 | x = x.view(x.size(0), -1) 23 | x = self.classifier(x) 24 | return x 25 | 26 | def make_layers(cfg, batch_norm=False): 27 | layers = [] 28 | in_channels = 3 29 | for i, v in enumerate(cfg): 30 | if v == 'M': 31 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 32 | else: 33 | padding = v[1] if isinstance(v, tuple) else 1 34 | out_channels = v[0] if isinstance(v, tuple) else v 35 | conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=padding) 36 | if batch_norm: 37 | layers += [conv2d, nn.BatchNorm2d(out_channels, affine=False), nn.ReLU()] 38 | else: 39 | layers += [conv2d, nn.ReLU()] 40 | in_channels = out_channels 41 | return nn.Sequential(*layers) 42 | 43 | def stl10(n_channel, pretrained=None): 44 | cfg = [ 45 | n_channel, 'M', 46 | 2*n_channel, 'M', 47 | 4*n_channel, 'M', 48 | 4*n_channel, 'M', 49 | (8*n_channel, 0), (8*n_channel, 0), 'M' 50 | ] 51 | layers = make_layers(cfg, batch_norm=True) 52 | model = SVHN(layers, n_channel=8*n_channel, num_classes=10) 53 | if pretrained is not None: 54 | m = model_zoo.load_url(model_urls['stl10']) 55 | state_dict = m.state_dict() if isinstance(m, nn.Module) else m 56 | assert isinstance(state_dict, (dict, OrderedDict)), type(state_dict) 57 | model.load_state_dict(state_dict) 58 | return model 59 | -------------------------------------------------------------------------------- /examples/classification/svhn.scratch.stl10.96size.150e.v3/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | import torchvision.transforms as transforms 4 | 5 | from cvpods.configs.base_classification_config import BaseClassificationConfig 6 | 7 | _config_dict = dict( 8 | MODEL=dict( 9 | WEIGHTS="", 10 | PIXEL_MEAN=[0.5, 0.5, 0.5], # BGR 11 | PIXEL_STD=[0.5, 0.5, 0.5], 12 | AS_PRETRAIN=True, # Automatically convert ckpt to pretrain pkl 13 | RESNETS=dict( 14 | DEPTH=50, 15 | NUM_CLASSES=10, 16 | STRIDE_IN_1X1=False, # default true for msra models 17 | NORM="BN", 18 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 19 | OUT_FEATURES=["linear"], 20 | ), 21 | ), 22 | DATASETS=dict( 23 | TRAIN=("stl10_train", ), 24 | TEST=("stl10_test", ), 25 | ), 26 | DATALOADER=dict( 27 | NUM_WORKERS=2, 28 | ), 29 | SOLVER=dict( 30 | LR_SCHEDULER=dict( 31 | # NAME="CosineAnnealingLR", 32 | NAME="WarmupMultiStepLR", 33 | STEPS=(80, 120), 34 | MAX_EPOCH=150, 35 | WARMUP_ITERS=0, 36 | EPOCH_WISE=False, 37 | ), 38 | OPTIMIZER=dict( 39 | NAME="Adam", 40 | BASE_LR=0.001, 41 | WEIGHT_DECAY=0.0, 42 | ), 43 | CHECKPOINT_PERIOD=50, 44 | IMS_PER_BATCH=1600, 45 | IMS_PER_DEVICE=200, 46 | ), 47 | INPUT=dict( 48 | AUG=dict( 49 | TRAIN_PIPELINES=[ 50 | ("Torch_P", transforms.Pad(4)), 51 | ("Torch_RRC", transforms.RandomCrop(96)), 52 | ("Torch_RHF", transforms.RandomHorizontalFlip()), 53 | ], 54 | TEST_PIPELINES=[ 55 | ] 56 | ) 57 | ), 58 | TEST=dict( 59 | EVAL_PERIOD=50, 60 | ), 61 | OUTPUT_DIR=osp.join( 62 | '/data/Outputs/model_logs/cvpods_playground', 63 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 64 | ) 65 | 66 | 67 | class ClassificationConfig(BaseClassificationConfig): 68 | def __init__(self): 69 | super(ClassificationConfig, self).__init__() 70 | self._register_configuration(_config_dict) 71 | 72 | 73 | config = ClassificationConfig() 74 | -------------------------------------------------------------------------------- /examples/classification/svhn.scratch.stl10.96size.150e.v3/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec, Conv2d, get_norm 6 | from cvpods.structures import ImageList 7 | 8 | from SVHN import stl10 9 | 10 | 11 | def accuracy(output, target, topk=(1,)): 12 | """Computes the accuracy over the k top predictions for the specified values of k""" 13 | with torch.no_grad(): 14 | maxk = max(topk) 15 | batch_size = target.size(0) 16 | 17 | _, pred = output.topk(maxk, 1, True, True) 18 | pred = pred.t() 19 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 20 | 21 | res = [] 22 | for k in topk: 23 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 24 | res.append(correct_k.mul_(100.0 / batch_size)) 25 | return res 26 | 27 | 28 | class Classification(nn.Module): 29 | def __init__(self, cfg): 30 | super(Classification, self).__init__() 31 | 32 | self.device = torch.device(cfg.MODEL.DEVICE) 33 | 34 | self.network = stl10(n_channel=32) 35 | self.network.size_divisibility = 1 36 | 37 | self.loss_evaluator = nn.CrossEntropyLoss() 38 | 39 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 40 | 3, 1, 1) 41 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 42 | 3, 1, 1) 43 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 44 | 45 | self.to(self.device) 46 | 47 | def forward(self, batched_inputs): 48 | images = self.preprocess_image(batched_inputs) 49 | 50 | preds = self.network(images.tensor) 51 | 52 | if self.training: 53 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 54 | losses = self.loss_evaluator(preds, labels) 55 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 56 | 57 | return { 58 | "loss_cls": losses, 59 | "Acc@1": acc1, 60 | "Acc@5": acc5, 61 | } 62 | else: 63 | return preds 64 | 65 | def preprocess_image(self, batched_inputs): 66 | """ 67 | Normalize, pad and batch the input images. 68 | """ 69 | images = [x["image"].float().to(self.device) for x in batched_inputs] 70 | images = [self.normalizer(x.div(255)) for x in images] 71 | images = ImageList.from_tensors(images, self.network.size_divisibility) 72 | return images 73 | -------------------------------------------------------------------------------- /examples/classification/svhn.scratch.stl10.96size.150e.v3/net.py: -------------------------------------------------------------------------------- 1 | from stl10 import * 2 | from imagenet import Classification 3 | 4 | 5 | def build_model(cfg): 6 | 7 | model = Classification(cfg) 8 | 9 | return model 10 | -------------------------------------------------------------------------------- /examples/classification/svhn.scratch.stl10.96size.150e.v3/stl10.py: -------------------------------------------------------------------------------- 1 | import math 2 | from copy import deepcopy 3 | import torch 4 | import torch.nn as nn 5 | 6 | from torch.nn import functional as F 7 | 8 | import cvpods 9 | from cvpods.data.registry import DATASETS, PATH_ROUTES 10 | from torchvision import datasets 11 | 12 | import os.path as osp 13 | import numpy as np 14 | 15 | 16 | _PREDEFINED_SPLITS_STL10 = { 17 | "dataset_type": "STLDatasets", 18 | "evaluator_type": {"stl10": "classification"}, 19 | "stl10": { 20 | "stl10_train": ("stl10", "train"), 21 | "stl10_unlabeled": ("stl10", "unlabeled"), 22 | "stl10_test": ("stl10", "test"), 23 | }, 24 | } 25 | PATH_ROUTES.register(_PREDEFINED_SPLITS_STL10, "STL10") 26 | 27 | 28 | @DATASETS.register() 29 | class STLDatasets(datasets.STL10): 30 | def __init__(self, cfg, dataset_name, transforms=[], is_train=True): 31 | 32 | self.meta = {"evaluator_type": "classification"} 33 | image_root, split = _PREDEFINED_SPLITS_STL10["stl10"][dataset_name] 34 | self.data_root = osp.join(osp.split(osp.split(cvpods.__file__)[0])[0], "datasets") 35 | self.image_root = osp.join(self.data_root, image_root) 36 | super(STLDatasets, self).__init__(self.image_root, split=split, download=True, transform=None) 37 | self.aspect_ratios = np.zeros(len(self), dtype=np.uint8) 38 | self.transforms = transforms 39 | self.is_train = is_train 40 | 41 | def _apply_transforms(self, image, annotations=None): 42 | 43 | if isinstance(self.transforms, dict): 44 | dataset_dict = {} 45 | for key, tfms in self.transforms.items(): 46 | img = deepcopy(image) 47 | annos = deepcopy(annotations) 48 | for tfm in tfms: 49 | img, annos = tfm(img) 50 | dataset_dict[key] = (img, annos) 51 | return dataset_dict, None 52 | else: 53 | for tfm in self.transforms: 54 | image, annos = tfm(image) 55 | 56 | return image, annotations 57 | 58 | def __getitem__(self, index): 59 | image, annotations = super().__getitem__(index) 60 | dataset_dict = {"image_id": index, "category_id": annotations} 61 | 62 | image = image.convert("RGB") 63 | image = np.asarray(image) 64 | image = image[:, :, ::-1] 65 | images, anno = self._apply_transforms(image, annotations) 66 | 67 | def process(dd, img): 68 | 69 | if len(img.shape) == 3: 70 | image_shape = img.shape[:2] # h, w 71 | dd["image"] = torch.as_tensor(np.ascontiguousarray(img.transpose(2, 0, 1))) 72 | elif len(img.shape) == 4: 73 | image_shape = img.shape[1:3] 74 | # NHWC -> NCHW 75 | dd["image"] = torch.as_tensor(np.ascontiguousarray(img.transpose(0, 3, 1, 2))) 76 | 77 | return dd 78 | 79 | if isinstance(images, dict): 80 | ret = {} 81 | # multiple input pipelines 82 | for desc, item in images.items(): 83 | img, anno = item 84 | ret[desc] = process(deepcopy(dataset_dict), img) 85 | return ret 86 | else: 87 | return process(dataset_dict, images) 88 | -------------------------------------------------------------------------------- /examples/downstream/faster_rcnn/faster_rcnn.res50.fpn.coco.multiscale.1x.syncbn/README.md: -------------------------------------------------------------------------------- 1 | # faster_rcnn.res50.fpn.coco.multiscale.1x.syncbn 2 | 3 | seed: 50837708 4 | 5 | ## Evaluation results for bbox: 6 | 7 | ``` 8 | Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.386 9 | Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.598 10 | Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.419 11 | Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.238 12 | Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.419 13 | Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.488 14 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.314 15 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.501 16 | Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.527 17 | Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.353 18 | Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.562 19 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.650 20 | ``` 21 | | AP | AP50 | AP75 | APs | APm | APl | 22 | |:------:|:------:|:------:|:------:|:------:|:------:| 23 | | 38.550 | 59.809 | 41.855 | 23.774 | 41.945 | 48.761 | 24 | 25 | ### Per-category bbox AP: 26 | 27 | | category | AP | category | AP | category | AP | 28 | |:--------------|:-------|:-------------|:-------|:---------------|:-------| 29 | | person | 53.008 | bicycle | 29.739 | car | 42.657 | 30 | | motorcycle | 40.859 | airplane | 58.369 | bus | 60.521 | 31 | | train | 58.182 | truck | 33.538 | boat | 25.290 | 32 | | traffic light | 27.818 | fire hydrant | 64.357 | stop sign | 63.783 | 33 | | parking meter | 46.235 | bench | 21.461 | bird | 34.857 | 34 | | cat | 59.967 | dog | 55.376 | horse | 52.850 | 35 | | sheep | 49.031 | cow | 53.114 | elephant | 57.785 | 36 | | bear | 62.888 | zebra | 63.047 | giraffe | 63.762 | 37 | | backpack | 12.711 | umbrella | 36.155 | handbag | 13.284 | 38 | | tie | 30.258 | suitcase | 35.907 | frisbee | 63.544 | 39 | | skis | 21.467 | snowboard | 33.252 | sports ball | 48.123 | 40 | | kite | 41.124 | baseball bat | 27.473 | baseball glove | 35.415 | 41 | | skateboard | 49.033 | surfboard | 33.083 | tennis racket | 43.608 | 42 | | bottle | 39.109 | wine glass | 34.184 | cup | 41.301 | 43 | | fork | 29.817 | knife | 14.860 | spoon | 13.945 | 44 | | bowl | 40.682 | banana | 23.017 | apple | 18.021 | 45 | | sandwich | 30.446 | orange | 29.719 | broccoli | 19.988 | 46 | | carrot | 18.481 | hot dog | 29.861 | pizza | 48.975 | 47 | | donut | 40.916 | cake | 33.945 | chair | 24.699 | 48 | | couch | 39.177 | potted plant | 22.986 | bed | 39.977 | 49 | | dining table | 25.003 | toilet | 55.545 | tv | 51.425 | 50 | | laptop | 56.210 | mouse | 58.329 | remote | 27.846 | 51 | | keyboard | 47.537 | cell phone | 33.824 | microwave | 52.382 | 52 | | oven | 28.069 | toaster | 44.780 | sink | 33.973 | 53 | | refrigerator | 50.924 | book | 14.558 | clock | 49.649 | 54 | | vase | 36.613 | scissors | 18.512 | teddy bear | 40.968 | 55 | | hair drier | 0.000 | toothbrush | 20.829 | | | 56 | -------------------------------------------------------------------------------- /examples/downstream/faster_rcnn/faster_rcnn.res50.fpn.coco.multiscale.1x.syncbn/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | PIXEL_MEAN=[0.485 * 255, 0.456 * 255, 0.406 * 255], # RGB 8 | PIXEL_STD=[0.229 * 255, 0.224 * 255, 0.225 * 255], 9 | # WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl", 10 | WEIGHTS="../../classification/resnet/res50.scratch.imagenet.224size.100e/log/model_final_pretrain_weight.pkl", 11 | MASK_ON=False, 12 | BACKBONE=dict( 13 | FREEZE_AT=0, 14 | ), 15 | RESNETS=dict( 16 | DEPTH=50, 17 | NORM="nnSyncBN", 18 | STRIDE_IN_1X1=False, # default true only for msra models 19 | ), 20 | FPN=dict( 21 | NORM="nnSyncBN", 22 | ), 23 | ROI_BOX_HEAD=dict( 24 | NORM="nnSyncBN", 25 | ), 26 | ), 27 | DATASETS=dict( 28 | TRAIN=("coco_2017_train",), 29 | TEST=("coco_2017_val",), 30 | ), 31 | SOLVER=dict( 32 | LR_SCHEDULER=dict( 33 | STEPS=(60000, 80000), 34 | MAX_ITER=90000, 35 | ), 36 | OPTIMIZER=dict( 37 | BASE_LR=0.02, 38 | ), 39 | IMS_PER_BATCH=16, 40 | ), 41 | INPUT=dict( 42 | FORMAT="RGB", 43 | AUG=dict( 44 | TRAIN_PIPELINES=[ 45 | ("ResizeShortestEdge", 46 | dict(short_edge_length=(640, 672, 704, 736, 768, 800), 47 | max_size=1333, sample_style="choice")), 48 | ("RandomFlip", dict()), 49 | ], 50 | TEST_PIPELINES=[ 51 | ("ResizeShortestEdge", 52 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 53 | ], 54 | ) 55 | ), 56 | TEST=dict( 57 | EVAL_PEROID=10000, 58 | PRECISE_BN=dict( 59 | ENABLED=True, 60 | ), 61 | ), 62 | OUTPUT_DIR=osp.join( 63 | '/data/Outputs/model_logs/cvpods_playground', 64 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 65 | ) 66 | 67 | 68 | class FasterRCNNConfig(RCNNFPNConfig): 69 | def __init__(self): 70 | super(FasterRCNNConfig, self).__init__() 71 | self._register_configuration(_config_dict) 72 | 73 | 74 | config = FasterRCNNConfig() 75 | -------------------------------------------------------------------------------- /examples/downstream/faster_rcnn/faster_rcnn.res50.fpn.coco.multiscale.1x.syncbn/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN 5 | from cvpods.modeling.proposal_generator import RPN 6 | from cvpods.modeling.roi_heads import StandardROIHeads 7 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return StandardROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_model(cfg): 31 | cfg.build_backbone = build_backbone 32 | cfg.build_proposal_generator = build_proposal_generator 33 | cfg.build_roi_heads = build_roi_heads 34 | cfg.build_box_head = build_box_head 35 | 36 | model = GeneralizedRCNN(cfg) 37 | return model 38 | -------------------------------------------------------------------------------- /examples/downstream/mask_rcnn/mask_rcnn.res50.fpn.coco.multiscale.1x.syncbn/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | PIXEL_MEAN=[0.485 * 255, 0.456 * 255, 0.406 * 255], # RGB 8 | PIXEL_STD=[0.229 * 255, 0.224 * 255, 0.225 * 255], 9 | WEIGHTS="/data/repos/cvpods/playground/self_supervised/classification/resnet/res50.scratch.imagenet.224size.100e/log/model_final_pretrain_weight.pkl", 10 | MASK_ON=True, 11 | BACKBONE=dict( 12 | FREEZE_AT=0, 13 | ), 14 | RESNETS=dict( 15 | DEPTH=50, 16 | NORM="nnSyncBN", 17 | STRIDE_IN_1X1=False, # True only for msra weights 18 | ), 19 | FPN=dict( 20 | NORM="nnSyncBN", 21 | ), 22 | ROI_BOX_HEAD=dict( 23 | NORM="nnSyncBN", 24 | NUM_CONV=4, 25 | NUM_FC=1, 26 | ), 27 | ROI_MASK_HEAD=dict( 28 | NORM="nnSyncBN", 29 | ), 30 | ), 31 | DATASETS=dict( 32 | TRAIN=("coco_2017_train",), 33 | TEST=("coco_2017_val",), 34 | ), 35 | SOLVER=dict( 36 | LR_SCHEDULER=dict( 37 | STEPS=(60000, 80000), 38 | MAX_ITER=90000, 39 | ), 40 | OPTIMIZER=dict( 41 | BASE_LR=0.02, 42 | ), 43 | IMS_PER_BATCH=16, 44 | CHECKPOINT_PERIOD=30000, 45 | ), 46 | INPUT=dict( 47 | FORMAT="RGB", 48 | AUG=dict( 49 | TRAIN_PIPELINES=[ 50 | ("ResizeShortestEdge", 51 | dict(short_edge_length=(640, 672, 704, 736, 768, 800), 52 | max_size=1333, sample_style="choice")), 53 | ("RandomFlip", dict()), 54 | ], 55 | TEST_PIPELINES=[ 56 | ("ResizeShortestEdge", 57 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 58 | ], 59 | ), 60 | ), 61 | TEST=dict( 62 | # EVAL_PEROID=10000, 63 | PRECISE_BN=dict( 64 | ENABLED=True, 65 | ), 66 | ), 67 | OUTPUT_DIR=osp.join( 68 | '/data/Outputs/model_logs/cvpods_playground', 69 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1]), 70 | ) 71 | 72 | 73 | class MaskRCNNConfig(RCNNFPNConfig): 74 | def __init__(self): 75 | super(MaskRCNNConfig, self).__init__() 76 | self._register_configuration(_config_dict) 77 | 78 | 79 | config = MaskRCNNConfig() 80 | -------------------------------------------------------------------------------- /examples/downstream/mask_rcnn/mask_rcnn.res50.fpn.coco.multiscale.1x.syncbn/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN 5 | from cvpods.modeling.proposal_generator import RPN 6 | from cvpods.modeling.roi_heads import StandardROIHeads 7 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead 8 | from cvpods.modeling.roi_heads.mask_head import MaskRCNNConvUpsampleHead 9 | 10 | 11 | def build_backbone(cfg, input_shape=None): 12 | if input_shape is None: 13 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 14 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 15 | assert isinstance(backbone, Backbone) 16 | return backbone 17 | 18 | 19 | def build_proposal_generator(cfg, input_shape): 20 | return RPN(cfg, input_shape) 21 | 22 | 23 | def build_roi_heads(cfg, input_shape): 24 | return StandardROIHeads(cfg, input_shape) 25 | 26 | 27 | def build_box_head(cfg, input_shape): 28 | return FastRCNNConvFCHead(cfg, input_shape) 29 | 30 | 31 | def build_mask_head(cfg, input_shape): 32 | return MaskRCNNConvUpsampleHead(cfg, input_shape) 33 | 34 | 35 | def build_model(cfg): 36 | cfg.build_backbone = build_backbone 37 | cfg.build_proposal_generator = build_proposal_generator 38 | cfg.build_roi_heads = build_roi_heads 39 | cfg.build_box_head = build_box_head 40 | cfg.build_mask_head = build_mask_head 41 | 42 | model = GeneralizedRCNN(cfg) 43 | return model 44 | -------------------------------------------------------------------------------- /examples/moco/moco.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2/README.md: -------------------------------------------------------------------------------- 1 | # simclr.moco_setting.lin_clsv2 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 67.286 | 87.992 | 8 | -------------------------------------------------------------------------------- /examples/moco/moco.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="../simclr.moco_setting/log/model_final_pretrain_weight.pkl", 9 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5", "linear"], 15 | STRIDE_IN_1X1=False, 16 | ), 17 | ), 18 | DATASETS=dict( 19 | TRAIN=("imagenet_train", ), 20 | TEST=("imagenet_val", ), 21 | ), 22 | DATALOADER=dict( 23 | NUM_WORKERS=6, 24 | ), 25 | SOLVER=dict( 26 | LR_SCHEDULER=dict( 27 | STEPS=(60, 80), 28 | MAX_EPOCH=90, 29 | WARMUP_ITERS=0, 30 | ), 31 | OPTIMIZER=dict( 32 | BASE_LR=30, 33 | MOMENTUM=0.9, 34 | WEIGHT_DECAY=0.0, 35 | WEIGHT_DECAY_NORM=0.0, 36 | ), 37 | CHECKPOINT_PERIOD=10, 38 | IMS_PER_BATCH=256, 39 | ), 40 | INPUT=dict( 41 | AUG=dict( 42 | TRAIN_PIPELINES=[ 43 | ("Torch_Compose", transforms.Compose([ 44 | transforms.RandomResizedCrop(224), 45 | transforms.RandomHorizontalFlip(), 46 | ])), 47 | ], 48 | TEST_PIPELINES=[ 49 | ("Torch_Compose", transforms.Compose([ 50 | transforms.Resize(256), 51 | transforms.CenterCrop(224), 52 | ])), 53 | ] 54 | ) 55 | ), 56 | TEST=dict( 57 | EVAL_PERIOD=10, 58 | ), 59 | OUTPUT_DIR=osp.join( 60 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 61 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1] 62 | ) 63 | ) 64 | 65 | 66 | class ClassificationConfig(BaseClassificationConfig): 67 | def __init__(self): 68 | super(ClassificationConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = ClassificationConfig() 73 | -------------------------------------------------------------------------------- /examples/moco/moco.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | def __init__(self, cfg): 28 | super(Classification, self).__init__() 29 | 30 | self.device = torch.device(cfg.MODEL.DEVICE) 31 | 32 | self.network = cfg.build_backbone( 33 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 34 | 35 | self.freeze() 36 | self.network.eval() 37 | 38 | # init the fc layer 39 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 40 | self.network.linear.bias.data.zero_() 41 | 42 | self.loss_evaluator = nn.CrossEntropyLoss() 43 | 44 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(1, 3, 1, 1) 45 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(1, 3, 1, 1) 46 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 47 | 48 | self.to(self.device) 49 | 50 | def freeze(self): 51 | for name, param in self.network.named_parameters(): 52 | if name not in ['linear.weight', 'linear.bias']: 53 | param.requires_grad = False 54 | 55 | def forward(self, batched_inputs): 56 | self.network.eval() 57 | images = self.preprocess_image(batched_inputs) 58 | 59 | outputs = self.network(images) 60 | preds = outputs["linear"] 61 | 62 | if self.training: 63 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 64 | losses = self.loss_evaluator(preds, labels) 65 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 66 | 67 | return { 68 | "loss_cls": losses, 69 | "top1_acc": acc1, 70 | "top5_acc": acc5, 71 | } 72 | else: 73 | return preds 74 | 75 | def preprocess_image(self, batched_inputs): 76 | """ 77 | Normalize, pad and batch the input images. 78 | """ 79 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 80 | images = self.normalizer(images) 81 | return images 82 | -------------------------------------------------------------------------------- /examples/moco/moco.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/moco/moco.res50.scratch.imagenet.224size.256bs.200e/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="", 9 | AS_PRETRAIN=True, 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["linear"], 15 | STRIDE_IN_1X1=False, # default true for msra models 16 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 17 | ), 18 | MOCO=dict( 19 | DIM=128, 20 | K=65536, 21 | MOMENTUM=0.999, 22 | TAU=0.07, 23 | MLP=False, 24 | ), 25 | ), 26 | DATASETS=dict( 27 | TRAIN=("imagenet_train", ), 28 | TEST=("imagenet_val", ), 29 | ), 30 | DATALOADER=dict(NUM_WORKERS=6, ), 31 | SOLVER=dict( 32 | LR_SCHEDULER=dict( 33 | STEPS=(120, 160), 34 | MAX_EPOCH=200, 35 | WARMUP_ITERS=5, 36 | ), 37 | OPTIMIZER=dict( 38 | BASE_LR=0.03, 39 | MOMENTUM=0.9, 40 | WEIGHT_DECAY=1e-4, 41 | WEIGHT_DECAY_NORM=1e-4, 42 | ), 43 | CHECKPOINT_PERIOD=10, 44 | IMS_PER_BATCH=256, 45 | ), 46 | INPUT=dict( 47 | AUG=dict( 48 | TRAIN_PIPELINES=[ 49 | ("RepeatList", dict(transforms=[ 50 | ("Torch_Compose", transforms.Compose([ 51 | transforms.RandomResizedCrop(224, scale=(0.2, 1.)), 52 | transforms.RandomGrayscale(p=0.2), 53 | transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), 54 | transforms.RandomHorizontalFlip(), 55 | ])) 56 | ], repeat_times=2)), 57 | ], 58 | ) 59 | ), 60 | OUTPUT_DIR=osp.join( 61 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 62 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1]), 63 | ) 64 | 65 | 66 | class MoCoConfig(BaseClassificationConfig): 67 | def __init__(self): 68 | super(MoCoConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = MoCoConfig() 73 | -------------------------------------------------------------------------------- /examples/moco/moco.res50.scratch.imagenet.224size.256bs.200e/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from moco import MoCo 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_resnet_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_model(cfg): 26 | 27 | cfg.build_backbone = build_backbone 28 | 29 | model = MoCo(cfg) 30 | 31 | logger = logging.getLogger(__name__) 32 | logger.info("Model:\n{}".format(model)) 33 | return model 34 | -------------------------------------------------------------------------------- /examples/moco/mocov2.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2/README.md: -------------------------------------------------------------------------------- 1 | # simclr.moco_setting.lin_clsv2 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 67.286 | 87.992 | 8 | -------------------------------------------------------------------------------- /examples/moco/mocov2.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="../simclr.moco_setting/log/model_final_pretrain_weight.pkl", 9 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5", "linear"], 15 | STRIDE_IN_1X1=False, 16 | ), 17 | ), 18 | DATASETS=dict( 19 | TRAIN=("imagenet_train", ), 20 | TEST=("imagenet_val", ), 21 | ), 22 | DATALOADER=dict( 23 | NUM_WORKERS=6, 24 | ), 25 | SOLVER=dict( 26 | LR_SCHEDULER=dict( 27 | STEPS=(60, 80), 28 | MAX_EPOCH=90, 29 | WARMUP_ITERS=0, 30 | ), 31 | OPTIMIZER=dict( 32 | BASE_LR=30, 33 | MOMENTUM=0.9, 34 | WEIGHT_DECAY=0.0, 35 | WEIGHT_DECAY_NORM=0.0, 36 | ), 37 | CHECKPOINT_PERIOD=10, 38 | IMS_PER_BATCH=256, 39 | ), 40 | INPUT=dict( 41 | AUG=dict( 42 | TRAIN_PIPELINES=[ 43 | ("Torch_Compose", transforms.Compose([ 44 | transforms.RandomResizedCrop(224), 45 | transforms.RandomHorizontalFlip(), 46 | ])) 47 | ], 48 | TEST_PIPELINES=[ 49 | ("Torch_Compose", transforms.Compose([ 50 | transforms.Resize(256), 51 | transforms.CenterCrop(224), 52 | ])) 53 | ] 54 | ) 55 | ), 56 | TEST=dict( 57 | EVAL_PERIOD=10, 58 | ), 59 | OUTPUT_DIR=osp.join( 60 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 61 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1] 62 | ) 63 | ) 64 | 65 | 66 | class ClassificationConfig(BaseClassificationConfig): 67 | def __init__(self): 68 | super(ClassificationConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = ClassificationConfig() 73 | -------------------------------------------------------------------------------- /examples/moco/mocov2.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | def __init__(self, cfg): 28 | super(Classification, self).__init__() 29 | 30 | self.device = torch.device(cfg.MODEL.DEVICE) 31 | 32 | self.network = cfg.build_backbone( 33 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 34 | 35 | self.freeze() 36 | self.network.eval() 37 | 38 | # init the fc layer 39 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 40 | self.network.linear.bias.data.zero_() 41 | 42 | self.loss_evaluator = nn.CrossEntropyLoss() 43 | 44 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(1, 3, 1, 1) 45 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(1, 3, 1, 1) 46 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 47 | 48 | self.to(self.device) 49 | 50 | def freeze(self): 51 | for name, param in self.network.named_parameters(): 52 | if name not in ['linear.weight', 'linear.bias']: 53 | param.requires_grad = False 54 | 55 | def forward(self, batched_inputs): 56 | self.network.eval() 57 | images = self.preprocess_image(batched_inputs) 58 | 59 | outputs = self.network(images) 60 | preds = outputs["linear"] 61 | 62 | if self.training: 63 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 64 | losses = self.loss_evaluator(preds, labels) 65 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 66 | 67 | return { 68 | "loss_cls": losses, 69 | "top1_acc": acc1, 70 | "top5_acc": acc5, 71 | } 72 | else: 73 | return preds 74 | 75 | def preprocess_image(self, batched_inputs): 76 | """ 77 | Normalize, pad and batch the input images. 78 | """ 79 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 80 | images = self.normalizer(images) 81 | return images 82 | -------------------------------------------------------------------------------- /examples/moco/mocov2.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/moco/mocov2.res50.scratch.imagenet.224size.256bs.200e/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="", 9 | AS_PRETRAIN=True, 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["linear"], 15 | STRIDE_IN_1X1=False, # default true for msra models 16 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 17 | ), 18 | MOCO=dict( 19 | DIM=128, 20 | K=65536, 21 | MOMENTUM=0.999, 22 | TAU=0.2, 23 | MLP=True, 24 | ), 25 | ), 26 | DATASETS=dict( 27 | TRAIN=("imagenet_train", ), 28 | TEST=("imagenet_val", ), 29 | ), 30 | DATALOADER=dict(NUM_WORKERS=6, ), 31 | SOLVER=dict( 32 | LR_SCHEDULER=dict( 33 | NAME="WarmupCosineLR", 34 | MAX_EPOCH=200, 35 | WARMUP_ITERS=5, 36 | ), 37 | OPTIMIZER=dict( 38 | BASE_LR=0.03, 39 | MOMENTUM=0.9, 40 | WEIGHT_DECAY=1e-4, 41 | WEIGHT_DECAY_NORM=1e-4, 42 | ), 43 | CHECKPOINT_PERIOD=10, 44 | IMS_PER_BATCH=256, 45 | ), 46 | INPUT=dict( 47 | AUG=dict( 48 | TRAIN_PIPELINES=[ 49 | ("RepeatList", dict(transforms=[ 50 | ("Torch_Compose", transforms.Compose([ 51 | transforms.RandomResizedCrop(224, scale=(0.2, 1.)), 52 | transforms.RandomApply([ 53 | transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), 54 | transforms.RandomGrayscale(p=0.2), 55 | transforms.RandomHorizontalFlip(), 56 | ])), 57 | ("GaussianBlur", dict(sigma=[.1, 2.], p=0.5)), 58 | ], repeat_times=2)), 59 | ], 60 | ) 61 | ), 62 | OUTPUT_DIR=osp.join( 63 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 64 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1])) 65 | 66 | 67 | class MoCoV2Config(BaseClassificationConfig): 68 | def __init__(self): 69 | super(MoCoV2Config, self).__init__() 70 | self._register_configuration(_config_dict) 71 | 72 | 73 | config = MoCoV2Config() 74 | -------------------------------------------------------------------------------- /examples/moco/mocov2.res50.scratch.imagenet.224size.256bs.200e/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from moco import MoCo 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_resnet_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_model(cfg): 26 | 27 | cfg.build_backbone = build_backbone 28 | 29 | model = MoCo(cfg) 30 | 31 | logger = logging.getLogger(__name__) 32 | logger.info("Model:\n{}".format(model)) 33 | return model 34 | -------------------------------------------------------------------------------- /examples/momentum2teacher/m2t.imagenet.mom0.99.224size.100e.lin_cls/README.md: -------------------------------------------------------------------------------- 1 | # m2t.imagenet.mom0.99.224size.100e.lin_cls 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 10.878 | 24.048 | 8 | -------------------------------------------------------------------------------- /examples/momentum2teacher/m2t.imagenet.mom0.99.224size.100e.lin_cls/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | 8 | MODEL=dict( 9 | WEIGHTS="../m2t.imagenet.mom0.99.224size.100e/log/model_final.pkl", 10 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 11 | RESNETS=dict( 12 | DEPTH=50, 13 | NUM_CLASSES=1000, 14 | NORM="BN", 15 | OUT_FEATURES=["res5", "linear"], 16 | STRIDE_IN_1X1=False, 17 | ), 18 | ), 19 | DATASETS=dict( 20 | TRAIN=("imagenet_train", ), 21 | TEST=("imagenet_val", ), 22 | ), 23 | DATALOADER=dict( 24 | NUM_WORKERS=4, 25 | ), 26 | SOLVER=dict( 27 | LR_SCHEDULER=dict( 28 | NAME="WarmupCosineLR", 29 | MAX_EPOCH=80, 30 | WARMUP_ITERS=0, 31 | ), 32 | OPTIMIZER=dict( 33 | NAME="SGD", 34 | BASE_LR=0.5, 35 | MOMENTUM=0.9, 36 | WEIGHT_DECAY=0.0, 37 | ), 38 | CHECKPOINT_PERIOD=10, 39 | IMS_PER_BATCH=256, 40 | IMS_PER_DEVICE=32, 41 | ), 42 | INPUT=dict( 43 | FORMAT="RGB", 44 | AUG=dict( 45 | TRAIN_PIPELINES=[ 46 | ("Torch_Compose", transforms.Compose([ 47 | transforms.RandomResizedCrop(224), 48 | transforms.RandomHorizontalFlip(), 49 | transforms.ToTensor(), 50 | transforms.Normalize( 51 | mean=[0.485, 0.456, 0.406], 52 | std=[0.229, 0.224, 0.225]), 53 | ])), 54 | ], 55 | TEST_PIPELINES=[ 56 | ("Torch_Compose", transforms.Compose([ 57 | transforms.Resize(256), 58 | transforms.CenterCrop(224), 59 | transforms.ToTensor(), 60 | transforms.Normalize( 61 | mean=[0.485, 0.456, 0.406], 62 | std=[0.229, 0.224, 0.225]), 63 | ])) 64 | ], 65 | ) 66 | ), 67 | TEST=dict( 68 | EVAL_PERIOD=10, 69 | ), 70 | OUTPUT_DIR=osp.join( 71 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 72 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1] 73 | ) 74 | ) 75 | 76 | 77 | class ClassificationConfig(BaseClassificationConfig): 78 | def __init__(self): 79 | super(ClassificationConfig, self).__init__() 80 | self._register_configuration(_config_dict) 81 | 82 | 83 | config = ClassificationConfig() 84 | -------------------------------------------------------------------------------- /examples/momentum2teacher/m2t.imagenet.mom0.99.224size.100e.lin_cls/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | 7 | 8 | def accuracy(output, target, topk=(1,)): 9 | """Computes the accuracy over the k top predictions for the specified values of k""" 10 | with torch.no_grad(): 11 | maxk = max(topk) 12 | batch_size = target.size(0) 13 | 14 | _, pred = output.topk(maxk, 1, True, True) 15 | pred = pred.t() 16 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 17 | 18 | res = [] 19 | for k in topk: 20 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 21 | res.append(correct_k.mul_(100.0 / batch_size)) 22 | return res 23 | 24 | 25 | class Classification(nn.Module): 26 | def __init__(self, cfg): 27 | super(Classification, self).__init__() 28 | 29 | self.device = torch.device(cfg.MODEL.DEVICE) 30 | 31 | self.network = cfg.build_backbone( 32 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 33 | 34 | self.freeze() 35 | self.network.eval() 36 | 37 | # init the fc layer 38 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 39 | self.network.linear.bias.data.zero_() 40 | 41 | self.norm = nn.BatchNorm1d(1000) 42 | 43 | self.loss_evaluator = nn.CrossEntropyLoss() 44 | 45 | self.to(self.device) 46 | 47 | def freeze(self): 48 | for name, param in self.network.named_parameters(): 49 | if name not in ['linear.weight', 'linear.bias']: 50 | param.requires_grad = False 51 | 52 | def forward(self, batched_inputs): 53 | self.network.eval() 54 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 55 | outputs = self.network(images) 56 | preds = self.norm(outputs["linear"]) 57 | 58 | if self.training: 59 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 60 | losses = self.loss_evaluator(preds, labels) 61 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 62 | 63 | return { 64 | "loss_cls": losses, 65 | "top1_acc": acc1, 66 | "top5_acc": acc5, 67 | } 68 | else: 69 | return preds 70 | -------------------------------------------------------------------------------- /examples/momentum2teacher/m2t.imagenet.mom0.99.224size.100e.lin_cls/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/momentum2teacher/m2t.imagenet.mom0.99.224size.100e/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | from torchvision import transforms 6 | from transforms import GaussianBlur, Solarization 7 | 8 | _config_dict = dict( 9 | MODEL=dict( 10 | WEIGHTS="", 11 | AS_PRETRAIN=True, 12 | RESNETS=dict( 13 | DEPTH=50, 14 | NUM_CLASSES=1000, 15 | NORM="SyncBN", 16 | OUT_FEATURES=["res5"], 17 | STRIDE_IN_1X1=False, # default true for msra models 18 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 19 | ), 20 | M2T=dict( 21 | PARAM_MOMENTUM=0.99, 22 | ), 23 | ), 24 | DATASETS=dict( 25 | TRAIN=("imagenet_train", ), 26 | TEST=("imagenet_val", ), 27 | ), 28 | SOLVER=dict( 29 | LR_SCHEDULER=dict( 30 | NAME="WarmupCosineLR", 31 | MAX_EPOCH=100, 32 | WARMUP_ITERS=10, 33 | WARMUP_METHOD="linear", 34 | WARMUP_FACTOR=1e-6/0.05, 35 | EPOCH_WISE=False, 36 | ), 37 | OPTIMIZER=dict( 38 | NAME="SGD", 39 | BASE_LR=0.05, 40 | MOMENTUM=0.9, 41 | WEIGHT_DECAY=1e-4, 42 | ), 43 | CHECKPOINT_PERIOD=5, 44 | IMS_PER_BATCH=256, 45 | IMS_PER_DEVICE=32, 46 | ), 47 | DATALOADER=dict(NUM_WORKERS=8, ), 48 | TRAINER=dict(FP16=dict(ENABLED=False),), 49 | INPUT=dict( 50 | AUG=dict( 51 | TRAIN_PIPELINES=dict( 52 | q=[ 53 | ("Torch_Compose", transforms.Compose([ 54 | transforms.RandomResizedCrop(224, scale=(0.08, 1.0)), 55 | transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), 56 | transforms.RandomApply([GaussianBlur([0.1, 2.0])], p=1.0), 57 | transforms.RandomGrayscale(p=0.2), 58 | transforms.RandomHorizontalFlip(), 59 | transforms.ToTensor(), 60 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 61 | ])), 62 | ], 63 | k=[ 64 | ("Torch_Compose", transforms.Compose([ 65 | transforms.RandomResizedCrop(224, scale=(0.08, 1.0)), 66 | transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), 67 | transforms.RandomApply([GaussianBlur([0.1, 2.0])], p=0.1), 68 | transforms.RandomGrayscale(p=0.2), 69 | transforms.RandomHorizontalFlip(), 70 | transforms.RandomApply([Solarization()], p=0.2), 71 | transforms.ToTensor(), 72 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 73 | ])) 74 | ], 75 | ) 76 | )), 77 | OUTPUT_DIR=osp.join( 78 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 79 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1])) 80 | 81 | 82 | class MoCoV2Config(BaseClassificationConfig): 83 | def __init__(self): 84 | super(MoCoV2Config, self).__init__() 85 | self._register_configuration(_config_dict) 86 | 87 | 88 | config = MoCoV2Config() 89 | -------------------------------------------------------------------------------- /examples/momentum2teacher/m2t.imagenet.mom0.99.224size.100e/m2_teacher.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | import torch 7 | import math 8 | from torch.nn import Module 9 | import resnet_mbn as resnet 10 | 11 | 12 | class M2Teacher(Module): 13 | def __init__(self, cfg): 14 | super(M2Teacher, self).__init__() 15 | self.device = torch.device(cfg.MODEL.DEVICE) 16 | 17 | self.param_momentum = cfg.MODEL.M2T.PARAM_MOMENTUM 18 | self.total_iters = cfg.SOLVER.LR_SCHEDULER.MAX_ITER * cfg.SOLVER.BATCH_SUBDIVISIONS 19 | self.current_train_iter = 0 20 | 21 | self.student_encoder = resnet.resnet50( 22 | low_dim=256, width=1, hidden_dim=4096, MLP="byol", CLS=False, bn="customized", predictor=True 23 | ) 24 | self.teacher_encoder = resnet.resnet50( 25 | low_dim=256, width=1, hidden_dim=4096, MLP="byol", CLS=False, bn="mbn", predictor=False 26 | ) 27 | for p in self.teacher_encoder.parameters(): 28 | p.requires_grad = False 29 | 30 | self.momentum_update(m=0) 31 | for m in self.teacher_encoder.modules(): 32 | if isinstance(m, resnet.MomentumBatchNorm1d) or isinstance(m, resnet.MomentumBatchNorm2d): 33 | m.total_iters = self.total_iters 34 | 35 | self.to(self.device) 36 | 37 | @torch.no_grad() 38 | def momentum_update(self, m): 39 | for p1, p2 in zip(self.student_encoder.parameters(), self.teacher_encoder.parameters()): 40 | # p2.data.mul_(m).add_(1 - m, p1.detach().data) 41 | p2.data = m * p2.data + (1.0 - m) * p1.detach().data 42 | 43 | def get_param_momentum(self): 44 | return 1.0 - (1.0 - self.param_momentum) * ( 45 | (math.cos(math.pi * self.current_train_iter / self.total_iters) + 1) * 0.5 46 | ) 47 | 48 | def forward(self, batched_inputs, update_param=True): 49 | if update_param: 50 | current_param_momentum = self.get_param_momentum() 51 | self.momentum_update(current_param_momentum) 52 | 53 | x1 = torch.stack([bi['q']["image"] for bi in batched_inputs]).to(self.device) 54 | x2 = torch.stack([bi['k']["image"] for bi in batched_inputs]).to(self.device) 55 | 56 | q1 = self.student_encoder(x1) 57 | q2 = self.student_encoder(x2) 58 | 59 | with torch.no_grad(): 60 | k1 = self.teacher_encoder(x2) 61 | k2 = self.teacher_encoder(x1) 62 | con_loss = (4 - 2 * ((q1 * k1).sum(dim=-1, keepdim=True) + (q2 * k2).sum(dim=-1, keepdim=True))).mean() 63 | 64 | self.current_train_iter += 1 65 | if self.training: 66 | return {"loss": con_loss} 67 | 68 | def preprocess_image(self, batched_inputs): 69 | """ 70 | Normalize, pad and batch the input images. 71 | """ 72 | images = [x.float().to(self.device) for x in batched_inputs] 73 | images = [self.normalizer(x) for x in images] 74 | images = ImageList.from_tensors(images, self.size_divisibility) 75 | 76 | return images 77 | 78 | -------------------------------------------------------------------------------- /examples/momentum2teacher/m2t.imagenet.mom0.99.224size.100e/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone import build_resnet_backbone 4 | 5 | from m2_teacher import M2Teacher 6 | 7 | 8 | def build_backbone(cfg, input_shape=None): 9 | """ 10 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 11 | 12 | Returns: 13 | an instance of :class:`Backbone` 14 | """ 15 | if input_shape is None: 16 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 17 | 18 | backbone = build_resnet_backbone(cfg, input_shape) 19 | assert isinstance(backbone, Backbone) 20 | return backbone 21 | 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = M2Teacher(cfg) 28 | 29 | return model 30 | -------------------------------------------------------------------------------- /examples/momentum2teacher/m2t.imagenet.mom0.99.224size.100e/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | from PIL import ImageFilter, ImageOps 4 | import torchvision.transforms as transforms 5 | 6 | class Solarization(object): 7 | def __call__(self, x): 8 | return ImageOps.solarize(x) 9 | 10 | 11 | class GaussianBlur(object): 12 | """Gaussian blur augmentation in SimCLR https://arxiv.org/abs/2002.05709""" 13 | 14 | def __init__(self, sigma=[0.1, 2.0]): 15 | self.sigma = sigma 16 | 17 | def __call__(self, x): 18 | sigma = random.uniform(self.sigma[0], self.sigma[1]) 19 | x = x.filter(ImageFilter.GaussianBlur(radius=sigma)) 20 | return x 21 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.lin_cls/README.md: -------------------------------------------------------------------------------- 1 | # simclr.lin_cls 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 60.830 | 83.578 | 8 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.lin_cls/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="../simclr.100e/log/model_final_pretrain_weight.pkl", 9 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5", "linear"], 15 | STRIDE_IN_1X1=False, 16 | ), 17 | ), 18 | DATASETS=dict( 19 | TRAIN=("imagenet_train", ), 20 | TEST=("imagenet_val", ), 21 | ), 22 | DATALOADER=dict( 23 | NUM_WORKERS=6, 24 | ), 25 | SOLVER=dict( 26 | LR_SCHEDULER=dict( 27 | STEPS=(60, 80), 28 | MAX_EPOCH=90, 29 | WARMUP_ITERS=0, 30 | ), 31 | OPTIMIZER=dict( 32 | BASE_LR=0.1, 33 | MOMENTUM=0.9, 34 | WEIGHT_DECAY=1e-6, 35 | WEIGHT_DECAY_NORM=1e-6, 36 | ), 37 | CHECKPOINT_PERIOD=10, 38 | IMS_PER_BATCH=256, 39 | ), 40 | INPUT=dict( 41 | AUG=dict( 42 | TRAIN_PIPELINES=[ 43 | ("Torch_Compose", transforms.Compose([ 44 | transforms.RandomResizedCrop(224), 45 | transforms.RandomHorizontalFlip(), 46 | ])) 47 | ], 48 | TEST_PIPELINES=[ 49 | ("Torch_Compose", transforms.Compose([ 50 | transforms.Resize(256), 51 | transforms.CenterCrop(224), 52 | ])) 53 | ] 54 | ) 55 | ), 56 | TEST=dict( 57 | EVAL_PERIOD=10, 58 | ), 59 | OUTPUT_DIR=osp.join( 60 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 61 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1] 62 | ) 63 | ) 64 | 65 | 66 | class ClassificationConfig(BaseClassificationConfig): 67 | def __init__(self): 68 | super(ClassificationConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = ClassificationConfig() 73 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.lin_cls/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | def __init__(self, cfg): 28 | super(Classification, self).__init__() 29 | 30 | self.device = torch.device(cfg.MODEL.DEVICE) 31 | 32 | self.network = cfg.build_backbone( 33 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 34 | 35 | self.freeze() 36 | self.network.eval() 37 | 38 | # init the fc layer 39 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 40 | self.network.linear.bias.data.zero_() 41 | 42 | self.loss_evaluator = nn.CrossEntropyLoss() 43 | 44 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(1, 3, 1, 1) 45 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(1, 3, 1, 1) 46 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 47 | 48 | self.to(self.device) 49 | 50 | def freeze(self): 51 | for name, param in self.network.named_parameters(): 52 | if name not in ['linear.weight', 'linear.bias']: 53 | param.requires_grad = False 54 | 55 | def forward(self, batched_inputs): 56 | self.network.eval() 57 | 58 | images = self.preprocess_image(batched_inputs) 59 | 60 | outputs = self.network(images) 61 | preds = outputs["linear"] 62 | 63 | if self.training: 64 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 65 | losses = self.loss_evaluator(preds, labels) 66 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 67 | 68 | return { 69 | "loss_cls": losses, 70 | "top1_acc": acc1, 71 | "top5_acc": acc5, 72 | } 73 | else: 74 | return preds 75 | 76 | def preprocess_image(self, batched_inputs): 77 | """ 78 | Normalize, pad and batch the input images. 79 | """ 80 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 81 | images = elf.normalizer(images) 82 | return images 83 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.lin_cls/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting.lin_clsv2/README.md: -------------------------------------------------------------------------------- 1 | # simclr.moco_setting.lin_clsv2 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 67.286 | 87.992 | 8 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting.lin_clsv2/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="../simclr.moco_setting/log/model_final_pretrain_weight.pkl", 9 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5", "linear"], 15 | STRIDE_IN_1X1=False, 16 | ), 17 | ), 18 | DATASETS=dict( 19 | TRAIN=("imagenet_train", ), 20 | TEST=("imagenet_val", ), 21 | ), 22 | DATALOADER=dict( 23 | NUM_WORKERS=6, 24 | ), 25 | SOLVER=dict( 26 | LR_SCHEDULER=dict( 27 | STEPS=(60, 80), 28 | MAX_EPOCH=90, 29 | WARMUP_ITERS=0, 30 | ), 31 | OPTIMIZER=dict( 32 | BASE_LR=30, 33 | MOMENTUM=0.9, 34 | WEIGHT_DECAY=0.0, 35 | WEIGHT_DECAY_NORM=0.0, 36 | ), 37 | CHECKPOINT_PERIOD=10, 38 | IMS_PER_BATCH=256, 39 | ), 40 | INPUT=dict( 41 | AUG=dict( 42 | TRAIN_PIPELINES=[ 43 | ("Torch_Compose", transforms.Compose([ 44 | transforms.RandomResizedCrop(224), 45 | transforms.RandomHorizontalFlip(), 46 | ])) 47 | ], 48 | TEST_PIPELINES=[ 49 | ("Torch_Compose", transforms.Compose([ 50 | transforms.Resize(256), 51 | transforms.CenterCrop(224), 52 | ])) 53 | ] 54 | ) 55 | ), 56 | TEST=dict( 57 | EVAL_PERIOD=10, 58 | ), 59 | OUTPUT_DIR=osp.join( 60 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 61 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1] 62 | ) 63 | ) 64 | 65 | 66 | class ClassificationConfig(BaseClassificationConfig): 67 | def __init__(self): 68 | super(ClassificationConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = ClassificationConfig() 73 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting.lin_clsv2/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | def __init__(self, cfg): 28 | super(Classification, self).__init__() 29 | 30 | self.device = torch.device(cfg.MODEL.DEVICE) 31 | 32 | self.network = cfg.build_backbone( 33 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 34 | 35 | self.freeze() 36 | self.network.eval() 37 | 38 | # init the fc layer 39 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 40 | self.network.linear.bias.data.zero_() 41 | 42 | self.loss_evaluator = nn.CrossEntropyLoss() 43 | 44 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(1, 3, 1, 1) 45 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(1, 3, 1, 1) 46 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 47 | 48 | self.to(self.device) 49 | 50 | def freeze(self): 51 | for name, param in self.network.named_parameters(): 52 | if name not in ['linear.weight', 'linear.bias']: 53 | param.requires_grad = False 54 | 55 | def forward(self, batched_inputs): 56 | self.network.eval() 57 | images = self.preprocess_image(batched_inputs) 58 | 59 | outputs = self.network(images) 60 | preds = outputs["linear"] 61 | 62 | if self.training: 63 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 64 | losses = self.loss_evaluator(preds, labels) 65 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 66 | 67 | return { 68 | "loss_cls": losses, 69 | "top1_acc": acc1, 70 | "top5_acc": acc5, 71 | } 72 | else: 73 | return preds 74 | 75 | def preprocess_image(self, batched_inputs): 76 | """ 77 | Normalize, pad and batch the input images. 78 | """ 79 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 80 | images = self.normalizer(images) 81 | return images 82 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting.lin_clsv2/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | import loader 7 | 8 | _config_dict = dict( 9 | MODEL=dict( 10 | WEIGHTS="", 11 | AS_PRETRAIN=True, 12 | RESNETS=dict( 13 | DEPTH=50, 14 | NUM_CLASSES=1000, 15 | NORM="SyncBN", 16 | OUT_FEATURES=["linear"], 17 | STRIDE_IN_1X1=False, # default true for msra models 18 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 19 | ), 20 | CLR=dict( 21 | DIM=128, 22 | TAU=0.2, 23 | MLP=True, 24 | NORM="SyncBN", 25 | ), 26 | ), 27 | DATASETS=dict( 28 | TRAIN=("imagenet_train", ), 29 | TEST=("imagenet_val", ), 30 | ), 31 | DATALOADER=dict(NUM_WORKERS=8, ), 32 | SOLVER=dict( 33 | LR_SCHEDULER=dict( 34 | NAME="WarmupCosineLR", 35 | MAX_EPOCH=200, 36 | WARMUP_ITERS=10, 37 | ), 38 | OPTIMIZER=dict( 39 | NAME="SGD", 40 | LARS=dict( 41 | ENABLED=False, 42 | EPS=1e-8, 43 | TRUST_COEF=1e-3, 44 | ), 45 | BASE_LR=0.03, 46 | MOMENTUM=0.9, 47 | WEIGHT_DECAY=1e-4, 48 | WEIGHT_DECAY_NORM=1e-4, 49 | ), 50 | CHECKPOINT_PERIOD=10, 51 | IMS_PER_BATCH=256, 52 | IMS_PER_DEVICE=32, 53 | ), 54 | INPUT=dict( 55 | AUG=dict( 56 | TRAIN_PIPELINES=[ 57 | ("RepeatList", dict(transforms=[ 58 | ("Torch_Compose", transforms.Compose([ 59 | transforms.RandomResizedCrop(224, scale=(0.2, 1.)), 60 | transforms.RandomApply([ 61 | transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), 62 | transforms.RandomApply([loader.GaussianBlur([.1, 2.])], p=0.5), 63 | transforms.RandomGrayscale(p=0.2), 64 | transforms.RandomHorizontalFlip(), 65 | ])), 66 | ], repeat_times=2)), 67 | ], 68 | ) 69 | ), 70 | OUTPUT_DIR=osp.join( 71 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 72 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1])) 73 | 74 | 75 | class MoCoV2Config(BaseClassificationConfig): 76 | def __init__(self): 77 | super(MoCoV2Config, self).__init__() 78 | self._register_configuration(_config_dict) 79 | 80 | 81 | config = MoCoV2Config() 82 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting/loader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from PIL import ImageFilter 3 | import random 4 | 5 | 6 | class TwoCropsTransform: 7 | """Take two random crops of one image as the query and key.""" 8 | 9 | def __init__(self, base_transform): 10 | self.base_transform = base_transform 11 | 12 | def __call__(self, x): 13 | q = self.base_transform(x) 14 | k = self.base_transform(x) 15 | return [q, k] 16 | 17 | 18 | class GaussianBlur(object): 19 | """Gaussian blur augmentation in SimCLR https://arxiv.org/abs/2002.05709""" 20 | 21 | def __init__(self, sigma=[.1, 2.]): 22 | self.sigma = sigma 23 | 24 | def __call__(self, x): 25 | sigma = random.uniform(self.sigma[0], self.sigma[1]) 26 | x = x.filter(ImageFilter.GaussianBlur(radius=sigma)) 27 | return x 28 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from simclr import SimCLR 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_resnet_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_model(cfg): 26 | 27 | cfg.build_backbone = build_backbone 28 | 29 | model = SimCLR(cfg) 30 | 31 | logger = logging.getLogger(__name__) 32 | logger.info("Model:\n{}".format(model)) 33 | return model 34 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e.moco_setting/simclr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | import torch.nn as nn 4 | 5 | from torch.nn import functional as F 6 | 7 | from cvpods.layers import ShapeSpec 8 | from cvpods.structures import ImageList 9 | from cvpods.layers.batch_norm import NaiveSyncBatchNorm1d 10 | 11 | from nt_xent2 import NT_Xent 12 | 13 | 14 | def accuracy(output, target, topk=(1,)): 15 | """Computes the accuracy over the k top predictions for the specified values of k""" 16 | with torch.no_grad(): 17 | maxk = max(topk) 18 | batch_size = target.size(0) 19 | 20 | _, pred = output.topk(maxk, 1, True, True) 21 | pred = pred.t() 22 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 23 | 24 | res = [] 25 | for k in topk: 26 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 27 | res.append(correct_k.mul_(100.0 / batch_size)) 28 | return res 29 | 30 | 31 | class SimCLR(nn.Module): 32 | """ 33 | Build a MoCo model with: a query encoder, a key encoder, and a queue 34 | https://arxiv.org/abs/1911.05722 35 | """ 36 | def __init__(self, cfg): 37 | """ 38 | dim: feature dimension (default: 128) 39 | K: queue size; number of negative keys (default: 65536) 40 | m: moco momentum of updating key encoder (default: 0.999) 41 | T: softmax temperature (default: 0.07) 42 | """ 43 | super(SimCLR, self).__init__() 44 | 45 | self.device = torch.device(cfg.MODEL.DEVICE) 46 | 47 | self.dim = cfg.MODEL.CLR.DIM 48 | self.T = cfg.MODEL.CLR.TAU 49 | self.mlp = cfg.MODEL.CLR.MLP 50 | self.norm = cfg.MODEL.CLR.NORM 51 | 52 | # create the encoders 53 | # num_classes is the output fc dimension 54 | cfg.MODEL.RESNETS.NUM_CLASSES = self.dim 55 | 56 | self.network = cfg.build_backbone( 57 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 58 | 59 | self.size_divisibility = self.network.size_divisibility 60 | 61 | if self.mlp: # hack: brute-force replacement 62 | dim_mlp = self.network.linear.weight.shape[1] 63 | if self.norm == "SyncBN": 64 | self.network.linear = nn.Sequential( 65 | nn.Linear(dim_mlp, dim_mlp, bias=False), 66 | NaiveSyncBatchNorm1d(dim_mlp), 67 | nn.ReLU(), 68 | nn.Linear(dim_mlp, self.dim, bias=False), 69 | NaiveSyncBatchNorm1d(self.dim) 70 | ) 71 | nn.init.normal_(self.network.linear[0].weight, mean=0.0, std=0.01) # linear weight 72 | nn.init.normal_(self.network.linear[3].weight, mean=0.0, std=0.01) # linear weight 73 | nn.init.constant_(self.network.linear[1].weight, 1.0) # bn gamma 74 | nn.init.constant_(self.network.linear[4].weight, 1.0) # bn gamma 75 | else: 76 | self.network.linear = nn.Sequential( 77 | nn.Linear(dim_mlp, dim_mlp), 78 | nn.ReLU(), 79 | nn.Linear(dim_mlp, self.dim), 80 | ) 81 | nn.init.normal_(self.network.linear[0].weight, mean=0.0, std=0.01) # linear weight 82 | nn.init.normal_(self.network.linear[2].weight, mean=0.0, std=0.01) # linear weight 83 | 84 | # self.loss_evaluator = NTXentLoss(self.device, cfg.SOLVER.IMS_PER_DEVICE, self.T, True) 85 | self.loss_evaluator = NT_Xent(cfg.SOLVER.IMS_PER_DEVICE, self.T, self.device) 86 | 87 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(1, 3, 1, 1) 88 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(1, 3, 1, 1) 89 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 90 | 91 | self.to(self.device) 92 | 93 | def forward(self, batched_inputs): 94 | """ 95 | Input: 96 | im_q: a batch of query images 97 | im_k: a batch of key images 98 | Output: 99 | logits, targets 100 | """ 101 | 102 | x_i = self.preprocess_image([bi["image"][0] for bi in batched_inputs]) 103 | x_j = self.preprocess_image([bi["image"][1] for bi in batched_inputs]) 104 | 105 | z_i = self.network(x_i)["linear"] 106 | z_j = self.network(x_j)["linear"] 107 | 108 | z_in = F.normalize(z_i, dim=1) 109 | z_jn = F.normalize(z_j, dim=1) 110 | 111 | loss_i, loss_j = self.loss_evaluator(z_in, z_jn) 112 | 113 | return { 114 | "loss_nt_xenti": loss_i, 115 | "loss_nt_xentj": loss_j, 116 | } 117 | 118 | def preprocess_image(self, batched_inputs): 119 | """ 120 | Normalize, pad and batch the input images. 121 | """ 122 | # images = [x["image"].float().to(self.device) for x in batched_inputs] 123 | images = torch.stack([x for x in batched_inputs]).to(self.device) 124 | images = self.normalizer(images) 125 | 126 | return images 127 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | 7 | _config_dict = dict( 8 | MODEL=dict( 9 | WEIGHTS="", 10 | AS_PRETRAIN=True, 11 | RESNETS=dict( 12 | DEPTH=50, 13 | NUM_CLASSES=1000, 14 | NORM="SyncBN", 15 | OUT_FEATURES=["linear"], 16 | STRIDE_IN_1X1=False, # default true for msra models 17 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 18 | ), 19 | CLR=dict( 20 | DIM=128, 21 | TAU=0.1, 22 | MLP=True, 23 | NORM="SyncBN", 24 | ), 25 | ), 26 | DATASETS=dict( 27 | TRAIN=("imagenet_train", ), 28 | TEST=("imagenet_val", ), 29 | ), 30 | DATALOADER=dict(NUM_WORKERS=8, ), 31 | SOLVER=dict( 32 | LR_SCHEDULER=dict( 33 | NAME="WarmupCosineLR", 34 | MAX_EPOCH=200, 35 | WARMUP_ITERS=10, 36 | ), 37 | OPTIMIZER=dict( 38 | NAME="SGD", 39 | LARS=dict( 40 | ENABLED=False, 41 | EPS=1e-8, 42 | TRUST_COEF=1e-3, 43 | ), 44 | BASE_LR=0.3, 45 | MOMENTUM=0.9, 46 | WEIGHT_DECAY=1e-6, 47 | WEIGHT_DECAY_NORM=1e-6, 48 | ), 49 | CHECKPOINT_PERIOD=10, 50 | IMS_PER_BATCH=256, 51 | IMS_PER_DEVICE=32, 52 | ), 53 | INPUT=dict( 54 | AUG=dict( 55 | TRAIN_PIPELINES=[ 56 | ("RepeatList", dict(transforms=[ 57 | ("Torch_Compose", transforms.Compose([ 58 | transforms.RandomResizedCrop(224, scale=(0.08, 1.)), 59 | transforms.RandomApply([ 60 | transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), 61 | ])), 62 | ("GaussianBlur", dict(sigma=[.1, 2.], p=0.5)), 63 | ("Torch_Compose", transforms.Compose([ 64 | transforms.RandomGrayscale(p=0.2), 65 | transforms.RandomHorizontalFlip(), 66 | ])) 67 | ], repeat_times=2)), 68 | ], 69 | ) 70 | ), 71 | OUTPUT_DIR=osp.join( 72 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 73 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1])) 74 | 75 | 76 | class MoCoV2Config(BaseClassificationConfig): 77 | def __init__(self): 78 | super(MoCoV2Config, self).__init__() 79 | self._register_configuration(_config_dict) 80 | 81 | 82 | config = MoCoV2Config() 83 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from simclr import SimCLR 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_resnet_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_model(cfg): 26 | 27 | cfg.build_backbone = build_backbone 28 | 29 | model = SimCLR(cfg) 30 | 31 | logger = logging.getLogger(__name__) 32 | logger.info("Model:\n{}".format(model)) 33 | return model 34 | -------------------------------------------------------------------------------- /examples/simclr/simclr.res50.scratch.imagenet.224size.256bs.200e/nt_xent2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from cvpods.utils import comm 5 | 6 | from torch import distributed as dist 7 | 8 | 9 | class NT_Xent(nn.Module): 10 | def __init__(self, device_size, temperature, device): 11 | super(NT_Xent, self).__init__() 12 | self.device_size = device_size 13 | self.temperature = temperature 14 | self.device = device 15 | 16 | self.criterion = nn.CrossEntropyLoss(reduction="sum") 17 | self.similarity_f = nn.CosineSimilarity(dim=2) 18 | 19 | pos_mask_i, pos_mask_j, neg_mask_i, neg_mask_j = \ 20 | self.mask_correlated_samples(comm.get_world_size() * self.device_size, self.device_size) 21 | 22 | self.pos_mask_i = pos_mask_i.to(self.device) 23 | self.neg_mask_i = neg_mask_i.to(self.device) 24 | 25 | self.pos_mask_j = pos_mask_j.to(self.device) 26 | self.neg_mask_j = neg_mask_j.to(self.device) 27 | 28 | def mask_correlated_samples(self, batch_size, device_size, _rank=0): 29 | neg_mask_i = torch.ones((batch_size, batch_size * 2), dtype=bool) 30 | neg_mask_j = torch.ones((batch_size, batch_size * 2), dtype=bool) 31 | 32 | for rank in range(int(batch_size / device_size)): 33 | for idx in range(device_size): 34 | neg_mask_i[device_size * rank + idx, device_size * (2 * rank + 1) + idx] = 0 # i 35 | neg_mask_j[device_size * rank + idx, device_size * (2 * rank) + idx] = 0 # j 36 | 37 | pos_mask_i = neg_mask_i.clone() 38 | pos_mask_j = neg_mask_j.clone() 39 | 40 | for rank in range(int(batch_size / device_size)): 41 | neg_mask_i[ 42 | device_size * rank: device_size * (rank + 1), 43 | device_size * 2 * rank: device_size * (2 * rank + 1), 44 | ].fill_diagonal_(0) 45 | neg_mask_j[ 46 | device_size * rank: device_size * (rank + 1), 47 | device_size * (2 * rank + 1): device_size * 2 * (rank + 1), 48 | ].fill_diagonal_(0) 49 | 50 | return ~pos_mask_i, ~pos_mask_j, neg_mask_i, neg_mask_j 51 | 52 | def forward(self, z_i, z_j): 53 | """ 54 | We do not sample negative examples explicitly. 55 | Instead, given a positive pair, similar to (Chen et al., 2017), we treat the other 2(N − 1) augmented examples within a minibatch as negative examples. 56 | """ 57 | 58 | local_rank = comm.get_rank() 59 | if comm.get_world_size() > 1: 60 | group = comm._get_global_gloo_group() 61 | 62 | zi_large = [torch.zeros_like(z_i) for _ in range(comm.get_world_size())] 63 | zj_large = [torch.zeros_like(z_j) for _ in range(comm.get_world_size())] 64 | 65 | dist.all_gather(zi_large, z_i, group=group) 66 | dist.all_gather(zj_large, z_j, group=group) 67 | else: 68 | zi_large = [z_i] 69 | zj_large = [z_j] 70 | 71 | z_large = [] 72 | for idx in range(comm.get_world_size()): 73 | if idx == local_rank: 74 | # current device 75 | z_large.append(z_i) 76 | z_large.append(z_j) 77 | else: 78 | z_large.append(zi_large[idx]) 79 | z_large.append(zj_large[idx]) 80 | 81 | zi_large[local_rank] = z_i 82 | zj_large[local_rank] = z_j 83 | 84 | zi_large = torch.cat(zi_large) 85 | zj_large = torch.cat(zj_large) 86 | 87 | device_size = z_i.shape[0] 88 | batch_size = device_size * comm.get_world_size() 89 | 90 | z_large = torch.cat(z_large) 91 | 92 | sim_i_large = self.similarity_f(zi_large.unsqueeze(1), z_large.unsqueeze(0)) / self.temperature 93 | sim_j_large = self.similarity_f(zj_large.unsqueeze(1), z_large.unsqueeze(0)) / self.temperature 94 | 95 | positive_samples_i = sim_i_large[self.pos_mask_i].reshape(batch_size, 1) 96 | negative_samples_i = sim_i_large[self.neg_mask_i].reshape(batch_size, -1) 97 | 98 | r = (positive_samples_i.exp() / negative_samples_i.exp().sum(dim=1, keepdim=True)).mean() 99 | if local_rank == 0: 100 | print("SimQK to SimQN: ", r) 101 | 102 | positive_samples_j = sim_j_large[self.pos_mask_j].reshape(batch_size, 1) 103 | negative_samples_j = sim_j_large[self.neg_mask_j].reshape(batch_size, -1) 104 | 105 | labels_i = torch.zeros(batch_size).to(self.device).long() 106 | logits_i = torch.cat((positive_samples_i, negative_samples_i), dim=1) 107 | 108 | labels_j = torch.zeros(batch_size).to(self.device).long() 109 | logits_j = torch.cat((positive_samples_j, negative_samples_j), dim=1) 110 | 111 | loss_i = self.criterion(logits_i, labels_i) 112 | loss_j = self.criterion(logits_j, labels_j) 113 | 114 | loss_i /= device_size 115 | loss_j /= device_size 116 | 117 | return loss_i, loss_j 118 | -------------------------------------------------------------------------------- /examples/simo/simo.res50.scratch.imagenet.224size.256bs.200e.lin_cls/README.md: -------------------------------------------------------------------------------- 1 | # simo.res50.scratch.imagenet.224size.256bs.200e.lin_clsv2 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 68.036 | 88.518 | 8 | -------------------------------------------------------------------------------- /examples/simo/simo.res50.scratch.imagenet.224size.256bs.200e.lin_cls/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="../simo.res50.scratch.imagenet.224size.256bs.200e/log/model_final_pretrain_weight.pkl", 9 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5", "linear"], 15 | STRIDE_IN_1X1=False, 16 | ), 17 | ), 18 | DATASETS=dict( 19 | TRAIN=("imagenet_train", ), 20 | TEST=("imagenet_val", ), 21 | ), 22 | DATALOADER=dict( 23 | NUM_WORKERS=6, 24 | ), 25 | SOLVER=dict( 26 | LR_SCHEDULER=dict( 27 | STEPS=(60, 80), 28 | MAX_EPOCH=90, 29 | WARMUP_ITERS=0, 30 | ), 31 | OPTIMIZER=dict( 32 | BASE_LR=30, 33 | MOMENTUM=0.9, 34 | WEIGHT_DECAY=0.0, 35 | WEIGHT_DECAY_NORM=0.0, 36 | ), 37 | CHECKPOINT_PERIOD=10, 38 | IMS_PER_BATCH=256, 39 | ), 40 | INPUT=dict( 41 | AUG=dict( 42 | TRAIN_PIPELINES=[ 43 | ("Torch_Compose", transforms.Compose([ 44 | transforms.RandomResizedCrop(224), 45 | transforms.RandomHorizontalFlip(), 46 | ])) 47 | ], 48 | TEST_PIPELINES=[ 49 | ("Torch_Compose", transforms.Compose([ 50 | transforms.Resize(256), 51 | transforms.CenterCrop(224), 52 | ])) 53 | ] 54 | ) 55 | ), 56 | TEST=dict( 57 | EVAL_PERIOD=10, 58 | ), 59 | OUTPUT_DIR=osp.join( 60 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 61 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1] 62 | ) 63 | ) 64 | 65 | 66 | class ClassificationConfig(BaseClassificationConfig): 67 | def __init__(self): 68 | super(ClassificationConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = ClassificationConfig() 73 | -------------------------------------------------------------------------------- /examples/simo/simo.res50.scratch.imagenet.224size.256bs.200e.lin_cls/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | def __init__(self, cfg): 28 | super(Classification, self).__init__() 29 | 30 | self.device = torch.device(cfg.MODEL.DEVICE) 31 | 32 | self.network = cfg.build_backbone( 33 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 34 | 35 | self.freeze() 36 | self.network.eval() 37 | 38 | # init the fc layer 39 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 40 | self.network.linear.bias.data.zero_() 41 | 42 | self.loss_evaluator = nn.CrossEntropyLoss() 43 | 44 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(1, 3, 1, 1) 45 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(1, 3, 1, 1) 46 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 47 | 48 | self.to(self.device) 49 | 50 | def freeze(self): 51 | for name, param in self.network.named_parameters(): 52 | if name not in ['linear.weight', 'linear.bias']: 53 | param.requires_grad = False 54 | 55 | def forward(self, batched_inputs): 56 | self.network.eval() 57 | images = self.preprocess_image(batched_inputs) 58 | 59 | outputs = self.network(images) 60 | preds = outputs["linear"] 61 | 62 | if self.training: 63 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 64 | losses = self.loss_evaluator(preds, labels) 65 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 66 | 67 | return { 68 | "loss_cls": losses, 69 | "top1_acc": acc1, 70 | "top5_acc": acc5, 71 | } 72 | else: 73 | return preds 74 | 75 | def preprocess_image(self, batched_inputs): 76 | """ 77 | Normalize, pad and batch the input images. 78 | """ 79 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 80 | images = self.normalizer(images) 81 | return images 82 | -------------------------------------------------------------------------------- /examples/simo/simo.res50.scratch.imagenet.224size.256bs.200e.lin_cls/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/simo/simo.res50.scratch.imagenet.224size.256bs.200e/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="", 9 | AS_PRETRAIN=True, 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="SyncBN", 14 | OUT_FEATURES=["linear"], 15 | STRIDE_IN_1X1=False, # default true for msra models 16 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 17 | ), 18 | CLR=dict( 19 | ALPHA=256, 20 | K=256, 21 | DIM=128, 22 | TAU=0.2, 23 | MLP=True, 24 | NORM="SyncBN", 25 | MOMENTUM=0.999, 26 | ), 27 | ), 28 | DATASETS=dict( 29 | TRAIN=("imagenet_train", ), 30 | TEST=("imagenet_val", ), 31 | ), 32 | DATALOADER=dict(NUM_WORKERS=6, ), 33 | SOLVER=dict( 34 | LR_SCHEDULER=dict( 35 | NAME="WarmupCosineLR", 36 | MAX_EPOCH=200, 37 | WARMUP_ITERS=10, 38 | EPOCH_WISE=False, # update lr in epoch / step 39 | ), 40 | OPTIMIZER=dict( 41 | NAME="SGD", 42 | LARS=dict( 43 | ENABLED=False, 44 | EPS=1e-8, 45 | TRUST_COEF=1e-3, 46 | ), 47 | BASE_LR=0.03, 48 | MOMENTUM=0.9, 49 | WEIGHT_DECAY=1e-4, 50 | WEIGHT_DECAY_NORM=1e-4, 51 | ), 52 | CHECKPOINT_PERIOD=10, 53 | IMS_PER_BATCH=256, 54 | IMS_PER_DEVICE=32, 55 | ), 56 | INPUT=dict( 57 | AUG=dict( 58 | TRAIN_PIPELINES=[ 59 | ("RepeatList", dict(transforms=[ 60 | ("Torch_Compose", transforms.Compose([ 61 | transforms.RandomResizedCrop(224, scale=(0.2, 1.)), 62 | transforms.RandomApply([ 63 | transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), 64 | ])), 65 | ("GaussianBlur", dict(sigma=[.1, 2.], p=0.5)), 66 | ("Torch_Compose", transforms.Compose([ 67 | transforms.RandomGrayscale(p=0.2), 68 | transforms.RandomHorizontalFlip(), 69 | ])) 70 | ], repeat_times=2)), 71 | ], 72 | ) 73 | ), 74 | OUTPUT_DIR=osp.join( 75 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 76 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1])) 77 | 78 | 79 | class MoCoV2Config(BaseClassificationConfig): 80 | def __init__(self): 81 | super(MoCoV2Config, self).__init__() 82 | self._register_configuration(_config_dict) 83 | 84 | 85 | config = MoCoV2Config() 86 | -------------------------------------------------------------------------------- /examples/simo/simo.res50.scratch.imagenet.224size.256bs.200e/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone import build_resnet_backbone 4 | 5 | from simo import SiMo 6 | 7 | 8 | def build_backbone(cfg, input_shape=None): 9 | """ 10 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 11 | 12 | Returns: 13 | an instance of :class:`Backbone` 14 | """ 15 | if input_shape is None: 16 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 17 | backbone = build_resnet_backbone(cfg, input_shape) 18 | assert isinstance(backbone, Backbone) 19 | return backbone 20 | 21 | 22 | def build_model(cfg): 23 | cfg.build_backbone = build_backbone 24 | model = SiMo(cfg) 25 | return model 26 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res18.cifar10.512bs.32size.800e.lin_cls/README.md: -------------------------------------------------------------------------------- 1 | # SimSiam.res18.cifar10.512bs.32size.800e.lin_cls.nolars.256bs.lr0.2.mean_std.lr30_moco_setting 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 90.380 | 99.620 | 8 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res18.cifar10.512bs.32size.800e.lin_cls/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="../SimSiam.res18.cifar10.512bs.32size.800e/log/model_final.pkl", 9 | PIXEL_MEAN=[0.4465, 0.4822, 0.4914], # BGR 10 | PIXEL_STD=[0.2010, 0.1994, 0.2023], 11 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 12 | RESNETS=dict( 13 | DEPTH=18, 14 | RES2_OUT_CHANNELS=64, 15 | NUM_CLASSES=10, 16 | NORM="BN", 17 | OUT_FEATURES=["res5", "linear"], 18 | STRIDE_IN_1X1=False, 19 | ), 20 | ), 21 | DATASETS=dict( 22 | TRAIN=("cifar10_train", ), 23 | TEST=("cifar10_test", ), 24 | ), 25 | DATALOADER=dict( 26 | NUM_WORKERS=2, 27 | ), 28 | SOLVER=dict( 29 | LR_SCHEDULER=dict( 30 | NAME="WarmupMultiStepLR", 31 | STEPS=(60, 80), 32 | MAX_EPOCH=90, 33 | WARMUP_ITERS=0, 34 | ), 35 | OPTIMIZER=dict( 36 | NAME="SGD", 37 | LARC=dict( 38 | ENABLED=False, 39 | EPS=1e-8, 40 | TRUST_COEF=1e-3, 41 | CLIP=False, 42 | ), 43 | BASE_LR=30 / 256 * 256, 44 | MOMENTUM=0.9, 45 | WEIGHT_DECAY=0.0, 46 | ), 47 | CHECKPOINT_PERIOD=10, 48 | IMS_PER_BATCH=256, 49 | IMS_PER_DEVICE=32, 50 | ), 51 | INPUT=dict( 52 | AUG=dict( 53 | TRAIN_PIPELINES=[ 54 | ("Torch_Compose", transforms.Compose([ 55 | transforms.RandomResizedCrop(32), 56 | transforms.RandomHorizontalFlip(), 57 | ])) 58 | ], 59 | ) 60 | ), 61 | TEST=dict( 62 | EVAL_PERIOD=10, 63 | ), 64 | OUTPUT_DIR=osp.join( 65 | '/data/Outputs/model_logs/cvpods_playground/self_supervised', 66 | osp.split(osp.realpath(__file__))[0].split("self_supervised/")[-1] 67 | ) 68 | ) 69 | 70 | 71 | class ClassificationConfig(BaseClassificationConfig): 72 | def __init__(self): 73 | super(ClassificationConfig, self).__init__() 74 | self._register_configuration(_config_dict) 75 | 76 | 77 | config = ClassificationConfig() 78 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res18.cifar10.512bs.32size.800e.lin_cls/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec, Conv2d, get_norm 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | def __init__(self, cfg): 28 | super(Classification, self).__init__() 29 | 30 | self.device = torch.device(cfg.MODEL.DEVICE) 31 | 32 | self.network = cfg.build_backbone( 33 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 34 | self.network.stem = nn.Sequential( 35 | Conv2d( 36 | 3, 37 | 64, 38 | kernel_size=3, 39 | stride=1, 40 | padding=1, 41 | bias=False, 42 | norm=get_norm(cfg.MODEL.RESNETS.NORM, 64) 43 | ), 44 | nn.ReLU(), 45 | ) 46 | 47 | self.freeze() 48 | self.network.eval() 49 | 50 | # init the fc layer 51 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 52 | self.network.linear.bias.data.zero_() 53 | 54 | self.loss_evaluator = nn.CrossEntropyLoss() 55 | 56 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(1, 3, 1, 1) 57 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(1, 3, 1, 1) 58 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 59 | 60 | self.to(self.device) 61 | 62 | def freeze(self): 63 | for name, param in self.network.named_parameters(): 64 | if name not in ['linear.weight', 'linear.bias']: 65 | param.requires_grad = False 66 | 67 | def forward(self, batched_inputs): 68 | self.network.eval() 69 | images = self.preprocess_image(batched_inputs) 70 | 71 | outputs = self.network(images) 72 | preds = outputs["linear"] 73 | 74 | if self.training: 75 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 76 | losses = self.loss_evaluator(preds, labels) 77 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 78 | 79 | return { 80 | "loss_cls": losses, 81 | "top1_acc": acc1, 82 | "top5_acc": acc5, 83 | } 84 | else: 85 | return preds 86 | 87 | def preprocess_image(self, batched_inputs): 88 | """ 89 | Normalize, pad and batch the input images. 90 | """ 91 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 92 | images = self.normalizer(images) 93 | return images 94 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res18.cifar10.512bs.32size.800e.lin_cls/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone import build_resnet_backbone 4 | 5 | from imagenet import Classification 6 | 7 | def build_backbone(cfg, input_shape=None): 8 | """ 9 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 10 | 11 | Returns: 12 | an instance of :class:`Backbone` 13 | """ 14 | if input_shape is None: 15 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 16 | 17 | backbone = build_resnet_backbone(cfg, input_shape) 18 | assert isinstance(backbone, Backbone) 19 | return backbone 20 | 21 | def build_model(cfg): 22 | 23 | cfg.build_backbone = build_backbone 24 | 25 | model = Classification(cfg) 26 | 27 | return model 28 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res18.cifar10.512bs.32size.800e/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="", 9 | AS_PRETRAIN=True, 10 | PIXEL_MEAN=[0.4914, 0.4822, 0.4465], # RGB 11 | PIXEL_STD=[0.2023, 0.1994, 0.2010], 12 | RESNETS=dict( 13 | DEPTH=18, 14 | RES2_OUT_CHANNELS=64, 15 | NUM_CLASSES=10, 16 | NORM="nnSyncBN", 17 | OUT_FEATURES=["linear"], 18 | STRIDE_IN_1X1=False, # default true for msra models 19 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 20 | ), 21 | BYOL=dict( 22 | PROJ_DIM=2048, 23 | PRED_DIM=512, 24 | OUT_DIM=2048, 25 | ), 26 | ), 27 | DATASETS=dict( 28 | TRAIN=("cifar10_train", ), 29 | TEST=("cifar10_test", ), 30 | ), 31 | DATALOADER=dict(NUM_WORKERS=4, ), 32 | SOLVER=dict( 33 | LR_SCHEDULER=dict( 34 | NAME="WarmupCosineLR", 35 | MAX_EPOCH=800, 36 | WARMUP_ITERS=0, 37 | ), 38 | OPTIMIZER=dict( 39 | NAME="SGD", 40 | BASE_LR=0.03 / 256 * 512, 41 | MOMENTUM=0.9, 42 | WEIGHT_DECAY=5e-4, 43 | ), 44 | CHECKPOINT_PERIOD=50, 45 | IMS_PER_BATCH=512, 46 | IMS_PER_DEVICE=64, 47 | ), 48 | INPUT=dict( 49 | FORMAT="RGB", 50 | AUG=dict( 51 | TRAIN_PIPELINES=[ 52 | ("RepeatList", dict(transforms=[ 53 | ("Torch_Compose", transforms.Compose([ 54 | transforms.RandomResizedCrop(32, scale=(0.2, 1.)), 55 | transforms.RandomHorizontalFlip(), 56 | transforms.RandomApply([ 57 | transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), 58 | transforms.RandomGrayscale(p=0.2), 59 | ])) 60 | ], repeat_times=2)), 61 | ] 62 | )), 63 | OUTPUT_DIR=osp.join( 64 | '/data/Outputs/model_logs/cvpods_playground/self_supervised', 65 | osp.split(osp.realpath(__file__))[0].split("self_supervised/")[-1])) 66 | 67 | 68 | class MoCoV2Config(BaseClassificationConfig): 69 | def __init__(self): 70 | super(MoCoV2Config, self).__init__() 71 | self._register_configuration(_config_dict) 72 | 73 | 74 | config = MoCoV2Config() 75 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res18.cifar10.512bs.32size.800e/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from simsiam import SimSiam 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_resnet_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_model(cfg): 26 | 27 | cfg.build_backbone = build_backbone 28 | 29 | model = SimSiam(cfg) 30 | 31 | logger = logging.getLogger(__name__) 32 | logger.info("Model:\n{}".format(model)) 33 | return model 34 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res18.cifar10.512bs.32size.800e/simsiam.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | 5 | from torch.nn import functional as F 6 | 7 | from cvpods.layers import ShapeSpec, Conv2d, get_norm 8 | from cvpods.structures import ImageList 9 | from cvpods.layers.batch_norm import NaiveSyncBatchNorm1d 10 | 11 | 12 | class SimSiam(nn.Module): 13 | def __init__(self, cfg): 14 | super(SimSiam, self).__init__() 15 | 16 | self.device = torch.device(cfg.MODEL.DEVICE) 17 | 18 | self.proj_dim = cfg.MODEL.BYOL.PROJ_DIM 19 | self.pred_dim = cfg.MODEL.BYOL.PRED_DIM 20 | self.out_dim = cfg.MODEL.BYOL.OUT_DIM 21 | 22 | self.total_steps = cfg.SOLVER.LR_SCHEDULER.MAX_ITER * cfg.SOLVER.BATCH_SUBDIVISIONS 23 | 24 | # create the encoders 25 | # num_classes is the output fc dimension 26 | cfg.MODEL.RESNETS.NUM_CLASSES = self.out_dim 27 | 28 | self.encoder = cfg.build_backbone( 29 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 30 | self.encoder.stem = nn.Sequential( 31 | Conv2d( 32 | 3, 33 | 64, 34 | kernel_size=3, 35 | stride=1, 36 | padding=1, 37 | bias=False, 38 | norm=get_norm(cfg.MODEL.RESNETS.NORM, 64) 39 | ), 40 | nn.ReLU(), 41 | ) 42 | 43 | self.size_divisibility = self.encoder.size_divisibility 44 | 45 | dim_mlp = self.encoder.linear.weight.shape[1] 46 | 47 | # Projection Head 48 | self.encoder.linear = nn.Sequential( 49 | nn.Linear(dim_mlp, self.proj_dim), 50 | nn.SyncBatchNorm(self.proj_dim), 51 | nn.ReLU(), 52 | nn.Linear(self.proj_dim, self.proj_dim), 53 | nn.SyncBatchNorm(self.proj_dim), 54 | ) 55 | 56 | # Predictor 57 | self.predictor = nn.Sequential( 58 | nn.Linear(self.proj_dim, self.pred_dim), 59 | nn.SyncBatchNorm(self.pred_dim), 60 | nn.ReLU(), 61 | nn.Linear(self.pred_dim, self.out_dim), 62 | ) 63 | 64 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(1, 3, 1, 1) 65 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(1, 3, 1, 1) 66 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 67 | 68 | self.to(self.device) 69 | 70 | def D(self, p, z, version='simplified'): # negative cosine similarity 71 | if version == 'original': 72 | z = z.detach() # stop gradient 73 | p = F.normalize(p, dim=1) # l2-normalize 74 | z = F.normalize(z, dim=1) # l2-normalize 75 | return -(p * z).sum(dim=1).mean() 76 | elif version == 'simplified': # same thing, much faster. Scroll down, speed test in __main__ 77 | return -F.cosine_similarity(p, z.detach(), dim=-1).mean() 78 | else: 79 | raise Exception 80 | 81 | def forward(self, batched_inputs): 82 | """ 83 | Input: 84 | im_q: a batch of query images 85 | im_k: a batch of key images 86 | Output: 87 | logits, targets 88 | """ 89 | x1 = self.preprocess_image([bi["image"][0] for bi in batched_inputs]) 90 | x2 = self.preprocess_image([bi["image"][1] for bi in batched_inputs]) 91 | 92 | z1, z2 = self.encoder(x1)["linear"], self.encoder(x2)["linear"] 93 | p1, p2 = self.predictor(z1), self.predictor(z2) 94 | 95 | loss = self.D(p1, z2) / 2 + self.D(p2, z1) / 2 96 | 97 | return dict(loss=loss) 98 | 99 | def preprocess_image(self, batched_inputs): 100 | """ 101 | Normalize, pad and batch the input images. 102 | """ 103 | # images = [x["image"].float().to(self.device) for x in batched_inputs] 104 | images = torch.stack([x for x in batched_inputs]).to(self.device) 105 | images = self.normalizer(images) 106 | 107 | return images 108 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res50.imagenet.256bs.224size.100e.lin_cls/README.md: -------------------------------------------------------------------------------- 1 | # SimSiam.res50.imagenet.256bs.224size.100e.lin_cls 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 67.608 | 87.902 | 8 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res50.imagenet.256bs.224size.100e.lin_cls/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | 8 | MODEL=dict( 9 | WEIGHTS="../SimSiam.res50.imagenet.256bs.224size.100e/log/model_final.pkl", 10 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 11 | RESNETS=dict( 12 | DEPTH=50, 13 | NUM_CLASSES=1000, 14 | NORM="BN", 15 | OUT_FEATURES=["res5", "linear"], 16 | STRIDE_IN_1X1=False, 17 | ), 18 | ), 19 | DATASETS=dict( 20 | TRAIN=("imagenet_train", ), 21 | TEST=("imagenet_val", ), 22 | ), 23 | DATALOADER=dict( 24 | NUM_WORKERS=6, 25 | ), 26 | SOLVER=dict( 27 | LR_SCHEDULER=dict( 28 | NAME="WarmupMultiStepLR", 29 | MAX_EPOCH=90, 30 | STEPS=(60, 80), 31 | WARMUP_ITERS=0, 32 | ), 33 | OPTIMIZER=dict( 34 | NAME="LARS_SGD", 35 | EPS=1e-8, 36 | TRUST_COEF=1e-3, 37 | CLIP=False, 38 | BASE_LR=0.1 * 4096 / 256, 39 | MOMENTUM=0.9, 40 | WEIGHT_DECAY=0.0, 41 | ), 42 | CHECKPOINT_PERIOD=10, 43 | IMS_PER_BATCH=4096, 44 | IMS_PER_DEVICE=512, 45 | ), 46 | INPUT=dict( 47 | FORMAT="RGB", 48 | AUG=dict( 49 | TRAIN_PIPELINES=[ 50 | ("Torch_Compose", transforms.Compose([ 51 | transforms.RandomResizedCrop(224), 52 | transforms.RandomHorizontalFlip(), 53 | transforms.ToTensor(), 54 | transforms.Normalize( 55 | mean=[0.485, 0.456, 0.406], 56 | std=[0.229, 0.224, 0.225]), 57 | ])), 58 | ], 59 | TEST_PIPELINES=[ 60 | ("Torch_Compose", transforms.Compose([ 61 | transforms.Resize(256), 62 | transforms.CenterCrop(224), 63 | transforms.ToTensor(), 64 | transforms.Normalize( 65 | mean=[0.485, 0.456, 0.406], 66 | std=[0.229, 0.224, 0.225]), 67 | ])) 68 | ], 69 | ) 70 | ), 71 | TEST=dict( 72 | EVAL_PERIOD=10, 73 | ), 74 | OUTPUT_DIR=osp.join( 75 | '/data/Outputs/model_logs/cvpods_playground/SelfSup', 76 | osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1] 77 | ) 78 | ) 79 | 80 | 81 | class ClassificationConfig(BaseClassificationConfig): 82 | def __init__(self): 83 | super(ClassificationConfig, self).__init__() 84 | self._register_configuration(_config_dict) 85 | 86 | 87 | config = ClassificationConfig() 88 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res50.imagenet.256bs.224size.100e.lin_cls/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | 7 | 8 | def accuracy(output, target, topk=(1,)): 9 | """Computes the accuracy over the k top predictions for the specified values of k""" 10 | with torch.no_grad(): 11 | maxk = max(topk) 12 | batch_size = target.size(0) 13 | 14 | _, pred = output.topk(maxk, 1, True, True) 15 | pred = pred.t() 16 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 17 | 18 | res = [] 19 | for k in topk: 20 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 21 | res.append(correct_k.mul_(100.0 / batch_size)) 22 | return res 23 | 24 | 25 | class Classification(nn.Module): 26 | def __init__(self, cfg): 27 | super(Classification, self).__init__() 28 | 29 | self.device = torch.device(cfg.MODEL.DEVICE) 30 | 31 | self.network = cfg.build_backbone( 32 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 33 | 34 | self.freeze() 35 | self.network.eval() 36 | 37 | # init the fc layer 38 | self.network.linear.weight.data.normal_(mean=0.0, std=0.01) 39 | self.network.linear.bias.data.zero_() 40 | 41 | self.loss_evaluator = nn.CrossEntropyLoss() 42 | 43 | self.to(self.device) 44 | 45 | def freeze(self): 46 | for name, param in self.network.named_parameters(): 47 | if name not in ['linear.weight', 'linear.bias']: 48 | param.requires_grad = False 49 | 50 | def forward(self, batched_inputs): 51 | self.network.eval() 52 | images = torch.stack([x["image"] for x in batched_inputs]).to(self.device) 53 | 54 | outputs = self.network(images) 55 | preds = outputs["linear"] 56 | 57 | if self.training: 58 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 59 | losses = self.loss_evaluator(preds, labels) 60 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 61 | 62 | return { 63 | "loss_cls": losses, 64 | "top1_acc": acc1, 65 | "top5_acc": acc5, 66 | } 67 | else: 68 | return preds 69 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res50.imagenet.256bs.224size.100e.lin_cls/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res50.imagenet.256bs.224size.100e/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="", 9 | AS_PRETRAIN=True, 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5"], 15 | STRIDE_IN_1X1=False, # default true for msra models 16 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 17 | ), 18 | BYOL=dict( 19 | PROJ_DIM=2048, 20 | PRED_DIM=512, 21 | OUT_DIM=2048, 22 | ), 23 | ), 24 | DATASETS=dict( 25 | TRAIN=("imagenet_train", ), 26 | TEST=("imagenet_val", ), 27 | ), 28 | DATALOADER=dict(NUM_WORKERS=4, ), 29 | SOLVER=dict( 30 | LR_SCHEDULER=dict( 31 | NAME="WarmupCosineLR", 32 | MAX_EPOCH=100, 33 | WARMUP_ITERS=0, 34 | EPOCH_WISE=True, 35 | ), 36 | OPTIMIZER=dict( 37 | NAME="SGD", 38 | BASE_LR=0.05, 39 | MOMENTUM=0.9, 40 | WEIGHT_DECAY=1e-4, 41 | ), 42 | CHECKPOINT_PERIOD=10, 43 | IMS_PER_BATCH=256, 44 | IMS_PER_DEVICE=32, 45 | ), 46 | INPUT=dict( 47 | AUG=dict( 48 | TRAIN_PIPELINES=[ 49 | ("RepeatList", dict(transforms=[ 50 | ("Torch_Compose", transforms.Compose([ 51 | transforms.RandomResizedCrop(224, scale=(0.2, 1.)), 52 | transforms.RandomHorizontalFlip(), 53 | transforms.RandomApply([ 54 | transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), 55 | transforms.RandomGrayscale(p=0.2), 56 | ])), 57 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=0.5, mode="PIL")), 58 | ("Torch_Compose", transforms.Compose([ 59 | transforms.ToTensor(), 60 | transforms.Normalize( 61 | mean=[0.485, 0.456, 0.406], 62 | std=[0.229, 0.224, 0.225]) 63 | ])), 64 | ], repeat_times=2)), 65 | ] 66 | ) 67 | ), 68 | OUTPUT_DIR=osp.join( 69 | '/data/Outputs/model_logs/cvpods_playground/self_supervised', 70 | osp.split(osp.realpath(__file__))[0].split("self_supervised/")[-1])) 71 | 72 | 73 | class MoCoV2Config(BaseClassificationConfig): 74 | def __init__(self): 75 | super(MoCoV2Config, self).__init__() 76 | self._register_configuration(_config_dict) 77 | 78 | 79 | config = MoCoV2Config() 80 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res50.imagenet.256bs.224size.100e/net.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import logging 3 | 4 | from cvpods.layers import ShapeSpec 5 | from cvpods.modeling.backbone import Backbone 6 | from cvpods.modeling.backbone import build_resnet_backbone 7 | 8 | from simsiam import SimSiam 9 | 10 | 11 | def build_backbone(cfg, input_shape=None): 12 | """ 13 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 14 | 15 | Returns: 16 | an instance of :class:`Backbone` 17 | """ 18 | if input_shape is None: 19 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 20 | 21 | backbone = build_resnet_backbone(cfg, input_shape) 22 | assert isinstance(backbone, Backbone) 23 | return backbone 24 | 25 | 26 | def build_model(cfg): 27 | 28 | cfg.build_backbone = build_backbone 29 | 30 | model = SimSiam(cfg) 31 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) 32 | 33 | return model 34 | -------------------------------------------------------------------------------- /examples/simsiam/SimSiam.res50.imagenet.256bs.224size.100e/simsiam.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from torch.nn import functional as F 5 | 6 | from cvpods.layers import ShapeSpec 7 | 8 | 9 | class SimSiam(nn.Module): 10 | def __init__(self, cfg): 11 | super(SimSiam, self).__init__() 12 | 13 | self.device = torch.device(cfg.MODEL.DEVICE) 14 | 15 | self.proj_dim = cfg.MODEL.BYOL.PROJ_DIM 16 | self.pred_dim = cfg.MODEL.BYOL.PRED_DIM 17 | self.out_dim = cfg.MODEL.BYOL.OUT_DIM 18 | 19 | self.encoder_q = cfg.build_backbone( 20 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 21 | 22 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 23 | 24 | # Projection Head 25 | self.projector = nn.Sequential( 26 | nn.Linear(self.out_dim, self.proj_dim), 27 | nn.BatchNorm1d(self.proj_dim), 28 | nn.ReLU(), 29 | nn.Linear(self.proj_dim, self.proj_dim), 30 | nn.BatchNorm1d(self.proj_dim), 31 | nn.ReLU(), 32 | nn.Linear(self.proj_dim, self.proj_dim), 33 | nn.BatchNorm1d(self.proj_dim), 34 | ) 35 | 36 | # Predictor 37 | self.predictor = nn.Sequential( 38 | nn.Linear(self.proj_dim, self.pred_dim), 39 | nn.BatchNorm1d(self.pred_dim), 40 | nn.ReLU(), 41 | nn.Linear(self.pred_dim, self.out_dim), 42 | ) 43 | 44 | self.to(self.device) 45 | 46 | def D(self, p, z, version='simplified'): # negative cosine similarity 47 | if version == 'original': 48 | z = z.detach() # stop gradient 49 | p = F.normalize(p, dim=1) # l2-normalize 50 | z = F.normalize(z, dim=1) # l2-normalize 51 | return -(p * z).sum(dim=1).mean() 52 | elif version == 'simplified': # same thing, much faster. 53 | return -F.cosine_similarity(p, z.detach(), dim=-1).mean() 54 | else: 55 | raise Exception 56 | 57 | def forward(self, batched_inputs): 58 | """ 59 | Input: 60 | im_q: a batch of query images 61 | im_k: a batch of key images 62 | Output: 63 | logits, targets 64 | """ 65 | x1 = torch.stack([bi["image"][0] for bi in batched_inputs]).to(self.device) 66 | x2 = torch.stack([bi["image"][1] for bi in batched_inputs]).to(self.device) 67 | z1 = self.projector(torch.flatten(self.avgpool(self.encoder_q(x1)["res5"]), 1)) 68 | z2 = self.projector(torch.flatten(self.avgpool(self.encoder_q(x2)["res5"]), 1)) 69 | p1, p2 = self.predictor(z1), self.predictor(z2) 70 | 71 | loss = self.D(p1, z2) / 2 + self.D(p2, z1) / 2 72 | 73 | return dict(loss=loss) 74 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lars.lin_cls/README.md: -------------------------------------------------------------------------------- 1 | # swav.res50.imagenet.256bs.2x224_6x96.200e.lars.lin_cls 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 68.722 | 89.166 | 8 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lars.lin_cls/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="../swav.res50.imagenet.256bs.2x224_6x96.200e.lars/log/model_epoch_0095.pkl", 9 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 10 | RESNETS=dict( 11 | ARCH="resnet50", 12 | DEPTH=50, 13 | NUM_CLASSES=1000, 14 | NORM="BN", 15 | OUT_FEATURES=["res5", "linear"], 16 | STRIDE_IN_1X1=False, 17 | ), 18 | ), 19 | DATASETS=dict( 20 | TRAIN=("imagenet_train", ), 21 | TEST=("imagenet_val", ), 22 | ), 23 | DATALOADER=dict( 24 | NUM_WORKERS=6, 25 | ), 26 | SOLVER=dict( 27 | LR_SCHEDULER=dict( 28 | NAME="WarmupCosineLR", 29 | STEPS=[60, 80], 30 | MAX_EPOCH=100, 31 | WARMUP_ITERS=0, 32 | ), 33 | OPTIMIZER=dict( 34 | NAME="SGD", 35 | BASE_LR=0.3, 36 | MOMENTUM=0.9, 37 | WEIGHT_DECAY=1e-6, 38 | ), 39 | CHECKPOINT_PERIOD=10, 40 | IMS_PER_BATCH=256, 41 | IMS_PER_DEVICE=32, 42 | ), 43 | INPUT=dict( 44 | AUG=dict( 45 | TRAIN_PIPELINES=[ 46 | ("Torch_RRC", transforms.RandomResizedCrop(224)), 47 | ("Torch_RHF", transforms.RandomHorizontalFlip()), 48 | ], 49 | TEST_PIPELINES=[ 50 | ("Torch_R", transforms.Resize(256)), 51 | ("Torch_CC", transforms.CenterCrop(224)), 52 | ] 53 | ) 54 | ), 55 | TEST=dict( 56 | EVAL_PERIOD=10, 57 | ), 58 | OUTPUT_DIR=osp.join( 59 | '/data/Outputs/model_logs/cvpods_playground', 60 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 61 | ) 62 | ) 63 | 64 | 65 | class ClassificationConfig(BaseClassificationConfig): 66 | def __init__(self): 67 | super(ClassificationConfig, self).__init__() 68 | self._register_configuration(_config_dict) 69 | 70 | 71 | config = ClassificationConfig() 72 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lars.lin_cls/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | import swav_resnet as resnet_models 6 | 7 | 8 | def accuracy(output, target, topk=(1,)): 9 | """Computes the accuracy over the k top predictions for the specified values of k""" 10 | with torch.no_grad(): 11 | maxk = max(topk) 12 | batch_size = target.size(0) 13 | 14 | _, pred = output.topk(maxk, 1, True, True) 15 | pred = pred.t() 16 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 17 | 18 | res = [] 19 | for k in topk: 20 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 21 | res.append(correct_k.mul_(100.0 / batch_size)) 22 | return res 23 | 24 | 25 | class Classification(nn.Module): 26 | def __init__(self, cfg): 27 | super(Classification, self).__init__() 28 | 29 | self.device = torch.device(cfg.MODEL.DEVICE) 30 | 31 | # self.network = cfg.build_backbone( 32 | # cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 33 | self.network = resnet_models.__dict__[cfg.MODEL.RESNETS.ARCH](output_dim=0, eval_mode=True) 34 | 35 | self.av_pool = nn.AdaptiveAvgPool2d((1, 1)) 36 | self.linear = nn.Linear(2048, cfg.MODEL.RESNETS.NUM_CLASSES) 37 | # init the fc layer 38 | self.linear.weight.data.normal_(mean=0.0, std=0.01) 39 | self.linear.bias.data.zero_() 40 | 41 | self.freeze() 42 | self.network.eval() 43 | 44 | self.loss_evaluator = nn.CrossEntropyLoss() 45 | 46 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1) 47 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1) 48 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 49 | 50 | self.to(self.device) 51 | 52 | def freeze(self): 53 | for name, param in self.network.named_parameters(): 54 | param.requires_grad = False 55 | 56 | def forward(self, batched_inputs): 57 | self.network.eval() 58 | 59 | images = self.preprocess_image(batched_inputs) 60 | outputs = torch.flatten(self.av_pool(self.network(images)), 1) 61 | preds = self.linear(outputs) 62 | 63 | if self.training: 64 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 65 | losses = self.loss_evaluator(preds, labels) 66 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 67 | 68 | return { 69 | "loss_cls": losses, 70 | "top1_acc": acc1, 71 | "top5_acc": acc5, 72 | } 73 | else: 74 | return preds 75 | 76 | def preprocess_image(self, batched_inputs): 77 | """ 78 | Normalize, pad and batch the input images. 79 | """ 80 | images = [x["image"].float().to(self.device) for x in batched_inputs] 81 | images = torch.stack([self.normalizer(x) for x in images]) 82 | return images 83 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lars.lin_cls/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lars/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="", 9 | AS_PRETRAIN=True, 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5"], 15 | STRIDE_IN_1X1=False, # default true for msra models 16 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 17 | ), 18 | SWAV=dict( 19 | CANCEL_EPOCHS=1, # cancel gradient for the first N epoch for prototypes 20 | NMB_CROPS=[2, 6], 21 | CROPS_FOR_ASSIGN=[0, 1], 22 | ARCH="resnet50", 23 | HIDDEN_MLP=2048, 24 | D=128, # Feature Dim 25 | K=3840, # Quele Length 26 | K_START=15, # Epoch Queue Start 27 | P=3000, # Prototypes 28 | TAU=0.1, 29 | EPS=0.05, 30 | SK_ITERS=3, 31 | NUMERICAL_STABILITY=True, 32 | NORM="BN1d" 33 | ), 34 | ), 35 | DATASETS=dict( 36 | TRAIN=("imagenet_train", ), 37 | TEST=("imagenet_val", ), 38 | ), 39 | SOLVER=dict( 40 | LR_SCHEDULER=dict( 41 | NAME="WarmupCosineLR", 42 | MAX_EPOCH=200, 43 | WARMUP_ITERS=0, 44 | EPOCH_WISE=True, 45 | ), 46 | OPTIMIZER=dict( 47 | NAME="LARS_SGD", 48 | EPS=1e-8, 49 | TRUST_COEF=1e-3, 50 | # CLIP=False, 51 | BASE_LR=0.6, 52 | MOMENTUM=0.9, 53 | WEIGHT_DECAY=1e-6, 54 | ), 55 | CHECKPOINT_PERIOD=5, 56 | IMS_PER_BATCH=256, 57 | IMS_PER_DEVICE=32, 58 | ), 59 | DATALOADER=dict(NUM_WORKERS=6, ), 60 | TRAINER=dict( 61 | FP16=dict(ENABLED=False), 62 | NAME="SWAVRunner", 63 | ), 64 | INPUT=dict( 65 | AUG=dict( 66 | TRAIN_PIPELINES=dict( 67 | contrastive=[ 68 | ("RepeatList", dict(transforms=[ 69 | ("Torch_Compose", transforms.Compose([ 70 | transforms.RandomResizedCrop(224, scale=(0.14, 1.)), 71 | transforms.RandomHorizontalFlip(p=0.5), 72 | transforms.RandomApply([ 73 | transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), 74 | transforms.RandomGrayscale(p=0.2), 75 | ])), 76 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=0.5, mode="PIL")), 77 | ("Torch_Compose", transforms.Compose([ 78 | transforms.ToTensor(), 79 | transforms.Normalize( 80 | mean=[0.485, 0.456, 0.406], 81 | std=[0.229, 0.224, 0.225]) 82 | ])), 83 | ], repeat_times=2)), 84 | ], 85 | multiview=[ 86 | ("RepeatList", dict(transforms=[ 87 | ("Torch_Compose", transforms.Compose([ 88 | transforms.RandomResizedCrop(96, scale=(0.05, 0.14)), 89 | transforms.RandomHorizontalFlip(p=0.5), 90 | transforms.RandomApply([ 91 | transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), 92 | transforms.RandomGrayscale(p=0.2), 93 | ])), 94 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=0.5, mode="PIL")), 95 | ("Torch_Compose", transforms.Compose([ 96 | transforms.ToTensor(), 97 | transforms.Normalize( 98 | mean=[0.485, 0.456, 0.406], 99 | std=[0.229, 0.224, 0.225]) 100 | ])), 101 | ], repeat_times=6)), 102 | ], 103 | # linear=[ 104 | # ("Torch_Compose", transforms.Compose([ 105 | # transforms.RandomResizedCrop(224), 106 | # transforms.RandomHorizontalFlip(), 107 | # ])), 108 | # ], 109 | ) 110 | )), 111 | OUTPUT_DIR=osp.join( 112 | '/data/Outputs/model_logs/cvpods_playground', 113 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1])) 114 | 115 | 116 | class MoCoV2Config(BaseClassificationConfig): 117 | def __init__(self): 118 | super(MoCoV2Config, self).__init__() 119 | self._register_configuration(_config_dict) 120 | 121 | 122 | config = MoCoV2Config() 123 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lars/net.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | from cvpods.utils import comm 7 | 8 | from swav_trainer import * 9 | from swav import SwAV 10 | 11 | 12 | def build_backbone(cfg, input_shape=None): 13 | """ 14 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 15 | 16 | Returns: 17 | an instance of :class:`Backbone` 18 | """ 19 | if input_shape is None: 20 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 21 | 22 | backbone = build_resnet_backbone(cfg, input_shape) 23 | assert isinstance(backbone, Backbone) 24 | return backbone 25 | 26 | 27 | def build_model(cfg): 28 | 29 | cfg.build_backbone = build_backbone 30 | 31 | model = SwAV(cfg) 32 | if comm.get_world_size() > 1: 33 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) 34 | 35 | return model 36 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lin_cls/README.md: -------------------------------------------------------------------------------- 1 | # swav.res50.imagenet.256bs.2x224_6x96.200e.lin_cls 2 | 3 | ## Evaluation results for classification: 4 | 5 | | Top_1 Acc | Top_5 Acc | 6 | |:-----------:|:-----------:| 7 | | 67.996 | 88.744 | 8 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lin_cls/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="../swav.res50.imagenet.256bs.2x224_6x96.200e/log/model_epoch_0100.pkl", 9 | BACKBONE=dict(FREEZE_AT=0, ), # freeze all parameters manually in imagenet.py 10 | RESNETS=dict( 11 | ARCH="resnet50", 12 | DEPTH=50, 13 | NUM_CLASSES=1000, 14 | NORM="BN", 15 | OUT_FEATURES=["res5", "linear"], 16 | STRIDE_IN_1X1=False, 17 | ), 18 | ), 19 | DATASETS=dict( 20 | TRAIN=("imagenet_train", ), 21 | TEST=("imagenet_val", ), 22 | ), 23 | DATALOADER=dict( 24 | NUM_WORKERS=6, 25 | ), 26 | SOLVER=dict( 27 | LR_SCHEDULER=dict( 28 | NAME="WarmupCosineLR", 29 | STEPS=[60, 80], 30 | MAX_EPOCH=100, 31 | WARMUP_ITERS=0, 32 | ), 33 | OPTIMIZER=dict( 34 | NAME="SGD", 35 | BASE_LR=0.3, 36 | MOMENTUM=0.9, 37 | WEIGHT_DECAY=1e-6, 38 | ), 39 | CHECKPOINT_PERIOD=10, 40 | IMS_PER_BATCH=256, 41 | IMS_PER_DEVICE=32, 42 | ), 43 | INPUT=dict( 44 | AUG=dict( 45 | TRAIN_PIPELINES=[ 46 | ("Torch_RRC", transforms.RandomResizedCrop(224)), 47 | ("Torch_RHF", transforms.RandomHorizontalFlip()), 48 | ], 49 | TEST_PIPELINES=[ 50 | ("Torch_R", transforms.Resize(256)), 51 | ("Torch_CC", transforms.CenterCrop(224)), 52 | ] 53 | ) 54 | ), 55 | TEST=dict( 56 | EVAL_PERIOD=10, 57 | ), 58 | OUTPUT_DIR=osp.join( 59 | '/data/Outputs/model_logs/cvpods_playground', 60 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 61 | ) 62 | ) 63 | 64 | 65 | class ClassificationConfig(BaseClassificationConfig): 66 | def __init__(self): 67 | super(ClassificationConfig, self).__init__() 68 | self._register_configuration(_config_dict) 69 | 70 | 71 | config = ClassificationConfig() 72 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lin_cls/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | import swav_resnet as resnet_models 6 | 7 | 8 | def accuracy(output, target, topk=(1,)): 9 | """Computes the accuracy over the k top predictions for the specified values of k""" 10 | with torch.no_grad(): 11 | maxk = max(topk) 12 | batch_size = target.size(0) 13 | 14 | _, pred = output.topk(maxk, 1, True, True) 15 | pred = pred.t() 16 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 17 | 18 | res = [] 19 | for k in topk: 20 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 21 | res.append(correct_k.mul_(100.0 / batch_size)) 22 | return res 23 | 24 | 25 | class Classification(nn.Module): 26 | def __init__(self, cfg): 27 | super(Classification, self).__init__() 28 | 29 | self.device = torch.device(cfg.MODEL.DEVICE) 30 | 31 | # self.network = cfg.build_backbone( 32 | # cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 33 | self.network = resnet_models.__dict__[cfg.MODEL.RESNETS.ARCH](output_dim=0, eval_mode=True) 34 | 35 | self.av_pool = nn.AdaptiveAvgPool2d((1, 1)) 36 | self.linear = nn.Linear(2048, cfg.MODEL.RESNETS.NUM_CLASSES) 37 | # init the fc layer 38 | self.linear.weight.data.normal_(mean=0.0, std=0.01) 39 | self.linear.bias.data.zero_() 40 | 41 | self.freeze() 42 | self.network.eval() 43 | 44 | self.loss_evaluator = nn.CrossEntropyLoss() 45 | 46 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1) 47 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1) 48 | self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std 49 | 50 | self.to(self.device) 51 | 52 | def freeze(self): 53 | for name, param in self.network.named_parameters(): 54 | param.requires_grad = False 55 | 56 | def forward(self, batched_inputs): 57 | self.network.eval() 58 | 59 | images = self.preprocess_image(batched_inputs) 60 | outputs = torch.flatten(self.av_pool(self.network(images)), 1) 61 | preds = self.linear(outputs) 62 | 63 | if self.training: 64 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 65 | losses = self.loss_evaluator(preds, labels) 66 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 67 | 68 | return { 69 | "loss_cls": losses, 70 | "top1_acc": acc1, 71 | "top5_acc": acc5, 72 | } 73 | else: 74 | return preds 75 | 76 | def preprocess_image(self, batched_inputs): 77 | """ 78 | Normalize, pad and batch the input images. 79 | """ 80 | images = [x["image"].float().to(self.device) for x in batched_inputs] 81 | images = torch.stack([self.normalizer(x) for x in images]) 82 | return images 83 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e.lin_cls/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from imagenet import Classification 8 | 9 | def build_backbone(cfg, input_shape=None): 10 | """ 11 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 12 | 13 | Returns: 14 | an instance of :class:`Backbone` 15 | """ 16 | if input_shape is None: 17 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 18 | 19 | backbone = build_resnet_backbone(cfg, input_shape) 20 | assert isinstance(backbone, Backbone) 21 | return backbone 22 | 23 | def build_model(cfg): 24 | 25 | cfg.build_backbone = build_backbone 26 | 27 | model = Classification(cfg) 28 | 29 | logger = logging.getLogger(__name__) 30 | logger.info("Model:\n{}".format(model)) 31 | return model 32 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torchvision.transforms as transforms 3 | 4 | from cvpods.configs.base_classification_config import BaseClassificationConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | WEIGHTS="", 9 | AS_PRETRAIN=True, 10 | RESNETS=dict( 11 | DEPTH=50, 12 | NUM_CLASSES=1000, 13 | NORM="BN", 14 | OUT_FEATURES=["res5"], 15 | STRIDE_IN_1X1=False, # default true for msra models 16 | ZERO_INIT_RESIDUAL=True, # default false, use true for all subsequent models 17 | ), 18 | SWAV=dict( 19 | CANCEL_EPOCHS=1, # cancel gradient for the first N epoch for prototypes 20 | NMB_CROPS=[2, 6], 21 | CROPS_FOR_ASSIGN=[0, 1], 22 | ARCH="resnet50", 23 | HIDDEN_MLP=2048, 24 | D=128, # Feature Dim 25 | K=3840, # Quele Length 26 | K_START=15, # Epoch Queue Start 27 | P=3000, # Prototypes 28 | TAU=0.1, 29 | EPS=0.05, 30 | SK_ITERS=3, 31 | NUMERICAL_STABILITY=True, 32 | NORM="BN1d" 33 | ), 34 | ), 35 | DATASETS=dict( 36 | TRAIN=("imagenet_train", ), 37 | TEST=("imagenet_val", ), 38 | ), 39 | SOLVER=dict( 40 | LR_SCHEDULER=dict( 41 | NAME="WarmupCosineLR", 42 | MAX_EPOCH=200, 43 | WARMUP_ITERS=0, 44 | EPOCH_WISE=True, 45 | ), 46 | OPTIMIZER=dict( 47 | NAME="SGD", 48 | # EPS=1e-8, 49 | # TRUST_COEF=1e-3, 50 | # CLIP=False, 51 | BASE_LR=0.6, 52 | MOMENTUM=0.9, 53 | WEIGHT_DECAY=1e-6, 54 | ), 55 | CHECKPOINT_PERIOD=5, 56 | IMS_PER_BATCH=256, 57 | IMS_PER_DEVICE=32, 58 | ), 59 | DATALOADER=dict(NUM_WORKERS=6, ), 60 | TRAINER=dict( 61 | FP16=dict(ENABLED=False), 62 | NAME="SWAVRunner", 63 | ), 64 | INPUT=dict( 65 | AUG=dict( 66 | TRAIN_PIPELINES=dict( 67 | contrastive=[ 68 | ("RepeatList", dict(transforms=[ 69 | ("Torch_Compose", transforms.Compose([ 70 | transforms.RandomResizedCrop(224, scale=(0.14, 1.)), 71 | transforms.RandomHorizontalFlip(p=0.5), 72 | transforms.RandomApply([ 73 | transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), 74 | transforms.RandomGrayscale(p=0.2), 75 | ])), 76 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=0.5, mode="PIL")), 77 | ("Torch_Compose", transforms.Compose([ 78 | transforms.ToTensor(), 79 | transforms.Normalize( 80 | mean=[0.485, 0.456, 0.406], 81 | std=[0.229, 0.224, 0.225]) 82 | ])), 83 | ], repeat_times=2)), 84 | ], 85 | multiview=[ 86 | ("RepeatList", dict(transforms=[ 87 | ("Torch_Compose", transforms.Compose([ 88 | transforms.RandomResizedCrop(96, scale=(0.05, 0.14)), 89 | transforms.RandomHorizontalFlip(p=0.5), 90 | transforms.RandomApply([ 91 | transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), 92 | transforms.RandomGrayscale(p=0.2), 93 | ])), 94 | ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=0.5, mode="PIL")), 95 | ("Torch_Compose", transforms.Compose([ 96 | transforms.ToTensor(), 97 | transforms.Normalize( 98 | mean=[0.485, 0.456, 0.406], 99 | std=[0.229, 0.224, 0.225]) 100 | ])), 101 | ], repeat_times=6)), 102 | ], 103 | # linear=[ 104 | # ("Torch_Compose", transforms.Compose([ 105 | # transforms.RandomResizedCrop(224), 106 | # transforms.RandomHorizontalFlip(), 107 | # ])), 108 | # ], 109 | ) 110 | )), 111 | OUTPUT_DIR=osp.join( 112 | '/data/Outputs/model_logs/cvpods_playground', 113 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1])) 114 | 115 | 116 | class MoCoV2Config(BaseClassificationConfig): 117 | def __init__(self): 118 | super(MoCoV2Config, self).__init__() 119 | self._register_configuration(_config_dict) 120 | 121 | 122 | config = MoCoV2Config() 123 | -------------------------------------------------------------------------------- /examples/swav/swav.res50.imagenet.256bs.2x224_6x96.200e/net.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.backbone import Backbone 5 | from cvpods.modeling.backbone import build_resnet_backbone 6 | 7 | from swav_trainer import * 8 | from swav import SwAV 9 | 10 | 11 | def build_backbone(cfg, input_shape=None): 12 | """ 13 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 14 | 15 | Returns: 16 | an instance of :class:`Backbone` 17 | """ 18 | if input_shape is None: 19 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 20 | 21 | backbone = build_resnet_backbone(cfg, input_shape) 22 | assert isinstance(backbone, Backbone) 23 | return backbone 24 | 25 | 26 | def build_model(cfg): 27 | 28 | cfg.build_backbone = build_backbone 29 | 30 | model = SwAV(cfg) 31 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) 32 | 33 | return model 34 | --------------------------------------------------------------------------------