├── .gitignore ├── README.md ├── README_robustness.md ├── data ├── __init__.py ├── datasets.py ├── samplers.py └── threeaugment.py ├── detection ├── .gitignore ├── README.md ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── cityscapes_detection.py │ │ │ ├── cityscapes_instance.py │ │ │ ├── coco_detection.py │ │ │ ├── coco_instance.py │ │ │ ├── coco_instance_semantic.py │ │ │ ├── deepfashion.py │ │ │ ├── lvis_v0.5_instance.py │ │ │ ├── lvis_v1_instance.py │ │ │ ├── voc0712.py │ │ │ └── wider_face.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ ├── cascade_mask_rcnn_swin_fpn.py │ │ │ ├── cascade_rcnn_r50_fpn.py │ │ │ ├── fast_rcnn_r50_fpn.py │ │ │ ├── faster_rcnn_r50_caffe_c4.py │ │ │ ├── faster_rcnn_r50_caffe_dc5.py │ │ │ ├── faster_rcnn_r50_fpn.py │ │ │ ├── mask_rcnn_lsnet_fpn.py │ │ │ ├── mask_rcnn_r50_caffe_c4.py │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ ├── mask_rcnn_swin_fpn.py │ │ │ ├── mask_reppointsv2_swin_bifpn.py │ │ │ ├── reppointsv2_swin_bifpn.py │ │ │ ├── retinanet_lsnet_fpn.py │ │ │ ├── retinanet_r50_fpn.py │ │ │ ├── rpn_r50_caffe_c4.py │ │ │ ├── rpn_r50_fpn.py │ │ │ └── ssd300.py │ │ └── schedules │ │ │ ├── schedule_1x.py │ │ │ ├── schedule_20e.py │ │ │ └── schedule_2x.py │ ├── mask_rcnn_lsnet_b_fpn_1x_coco.py │ ├── mask_rcnn_lsnet_s_fpn_1x_coco.py │ ├── mask_rcnn_lsnet_t_fpn_1x_coco.py │ ├── retinanet_lsnet_b_fpn_1x_coco.py │ ├── retinanet_lsnet_s_fpn_1x_coco.py │ └── retinanet_lsnet_t_fpn_1x_coco.py ├── dist_test.sh ├── dist_train.sh ├── eval.sh ├── logs │ ├── lsnet_b_maskrcnn.json │ ├── lsnet_b_retinanet.json │ ├── lsnet_s_maskrcnn.json │ ├── lsnet_s_retinanet.json │ ├── lsnet_t_maskrcnn.json │ └── lsnet_t_retinanet.json ├── mmcv_custom │ ├── __init__.py │ ├── checkpoint.py │ └── runner │ │ ├── __init__.py │ │ ├── checkpoint.py │ │ ├── epoch_based_runner.py │ │ └── optimizer.py ├── mmdet_custom │ └── apis │ │ └── train.py ├── model │ ├── lsnet.py │ ├── lsnet_fpn.py │ └── ska.py ├── test.py ├── train.py └── train.sh ├── engine.py ├── eval.sh ├── eval_robust.sh ├── figures └── throughput.svg ├── flops.py ├── logs ├── lsnet_b.log ├── lsnet_b_distill.log ├── lsnet_s.log ├── lsnet_s_distill.log ├── lsnet_t.log └── lsnet_t_distill.log ├── losses.py ├── main.py ├── model ├── __init__.py ├── build.py ├── lsnet.py └── ska.py ├── requirements.txt ├── robust.py ├── robust_utils.py ├── segmentation ├── .gitignore ├── README.md ├── align_resize.py ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ └── ade20k.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ └── fpn_r50.py │ │ └── schedules │ │ │ ├── schedule_160k.py │ │ │ ├── schedule_20k.py │ │ │ ├── schedule_40k.py │ │ │ └── schedule_80k.py │ └── sem_fpn │ │ ├── fpn_lsnet_b_ade20k_40k.py │ │ ├── fpn_lsnet_s_ade20k_40k.py │ │ └── fpn_lsnet_t_ade20k_40k.py ├── eval.sh ├── logs │ ├── lsnet_b_semfpn.json │ ├── lsnet_s_semfpn.json │ └── lsnet_t_semfpn.json ├── mmcv_custom │ ├── __init__.py │ ├── checkpoint.py │ └── runner │ │ ├── __init__.py │ │ ├── checkpoint.py │ │ ├── epoch_based_runner.py │ │ └── optimizer.py ├── model │ ├── lsnet.py │ ├── lsnet_fpn.py │ └── ska.py ├── tools │ ├── analyze_logs.py │ ├── benchmark.py │ ├── browse_dataset.py │ ├── convert_datasets │ │ ├── chase_db1.py │ │ ├── cityscapes.py │ │ ├── coco_stuff10k.py │ │ ├── coco_stuff164k.py │ │ ├── drive.py │ │ ├── hrf.py │ │ ├── pascal_context.py │ │ ├── stare.py │ │ └── voc_aug.py │ ├── deploy_test.py │ ├── dist_test.sh │ ├── dist_train.sh │ ├── get_flops.py │ ├── model_converters │ │ ├── mit2mmseg.py │ │ ├── swin2mmseg.py │ │ └── vit2mmseg.py │ ├── onnx2tensorrt.py │ ├── print_config.py │ ├── publish_model.py │ ├── pytorch2onnx.py │ ├── pytorch2torchscript.py │ ├── slurm_test.sh │ ├── slurm_train.sh │ ├── test.py │ ├── torchserve │ │ ├── mmseg2torchserve.py │ │ ├── mmseg_handler.py │ │ └── test_torchserve.py │ ├── train.py │ └── vis.py └── train.sh ├── speed.py ├── train.sh └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .test/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | **/checkpoints 141 | 142 | wandb 143 | pretrain 144 | *.whl 145 | ignore -------------------------------------------------------------------------------- /README_robustness.md: -------------------------------------------------------------------------------- 1 | # Robustness Evaluation 2 | 3 | ## Models 4 | | Model | ImageNet-C | ImageNet-A | ImageNet-R | ImageNet-Sketch | 5 | |:-:|:-:|:-:|:-:|:-:| 6 | | [LSNet-T](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_t.pth) | 68.2 | 6.7 | 38.5 | 25.5 | 7 | | [LSNet-S](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_s.pth) | 65.7 | 9.6 | 39.4 | 27.5 | 8 | | [LSNet-B](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_b.pth) | 59.3 | 17.3 | 43.1 | 30.7 | 9 | 10 | ## Data preparation 11 | 12 | Please download and prepare ImageNet-C, ImageNet-A, ImageNet-R, ImageNet-Sketch datasets. 13 | 14 | ## Testing 15 | ```bash 16 | set -e 17 | set -x 18 | 19 | MODEL=lsnet_t 20 | CKPT=pretrain/lsnet_t.pth 21 | INPUT=224 22 | 23 | # Optional for mirror 24 | # export HF_ENDPOINT=https://hf-mirror.com 25 | 26 | python main.py --eval --model ${MODEL} --resume ${CKPT} --data-path ~/imagenet \ 27 | --inc_path ~/datasets/OpenDataLab___ImageNet-C/raw \ 28 | --insk_path ~/datasets/OpenDataLab___ImageNet-Sketch/raw/sketch \ 29 | --ina_path ~/datasets/OpenDataLab___ImageNet-A/raw/imagenet-a \ 30 | --inr_path ~/datasets/OpenDataLab___ImageNet-R/raw/imagenet-r \ 31 | --batch-size 512 \ 32 | --input-size ${INPUT} 33 | ``` 34 | -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THU-MIG/lsnet/cbe737c92b7c43ecf02d08545a07f03f1010177c/data/__init__.py -------------------------------------------------------------------------------- /data/datasets.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Build trainining/testing datasets 3 | ''' 4 | import os 5 | import json 6 | 7 | from torchvision import datasets, transforms 8 | from torchvision.datasets.folder import ImageFolder, default_loader 9 | import torch 10 | 11 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 12 | from timm.data import create_transform 13 | 14 | try: 15 | from timm.data import TimmDatasetTar 16 | except ImportError: 17 | # for higher version of timm 18 | from timm.data import ImageDataset as TimmDatasetTar 19 | 20 | class INatDataset(ImageFolder): 21 | def __init__(self, root, train=True, year=2018, transform=None, target_transform=None, 22 | category='name', loader=default_loader): 23 | self.transform = transform 24 | self.loader = loader 25 | self.target_transform = target_transform 26 | self.year = year 27 | # assert category in ['kingdom','phylum','class','order','supercategory','family','genus','name'] 28 | path_json = os.path.join( 29 | root, f'{"train" if train else "val"}{year}.json') 30 | with open(path_json) as json_file: 31 | data = json.load(json_file) 32 | 33 | with open(os.path.join(root, 'categories.json')) as json_file: 34 | data_catg = json.load(json_file) 35 | 36 | path_json_for_targeter = os.path.join(root, f"train{year}.json") 37 | 38 | with open(path_json_for_targeter) as json_file: 39 | data_for_targeter = json.load(json_file) 40 | 41 | targeter = {} 42 | indexer = 0 43 | for elem in data_for_targeter['annotations']: 44 | king = [] 45 | king.append(data_catg[int(elem['category_id'])][category]) 46 | if king[0] not in targeter.keys(): 47 | targeter[king[0]] = indexer 48 | indexer += 1 49 | self.nb_classes = len(targeter) 50 | 51 | self.samples = [] 52 | for elem in data['images']: 53 | cut = elem['file_name'].split('/') 54 | target_current = int(cut[2]) 55 | path_current = os.path.join(root, cut[0], cut[2], cut[3]) 56 | 57 | categors = data_catg[target_current] 58 | target_current_true = targeter[categors[category]] 59 | self.samples.append((path_current, target_current_true)) 60 | 61 | # __getitem__ and __len__ inherited from ImageFolder 62 | 63 | 64 | def build_dataset(is_train, args): 65 | transform = build_transform(is_train, args) 66 | 67 | if args.data_set == 'CIFAR': 68 | dataset = datasets.CIFAR100( 69 | args.data_path, train=is_train, transform=transform) 70 | nb_classes = 100 71 | elif args.data_set == 'IMNET': 72 | prefix = 'train' if is_train else 'val' 73 | data_dir = os.path.join(args.data_path, f'{prefix}.tar') 74 | if os.path.exists(data_dir): 75 | dataset = TimmDatasetTar(data_dir, transform=transform) 76 | else: 77 | root = os.path.join(args.data_path, 'train' if is_train else 'val') 78 | dataset = datasets.ImageFolder(root, transform=transform) 79 | nb_classes = 1000 80 | elif args.data_set == 'IMNETEE': 81 | root = os.path.join(args.data_path, 'train' if is_train else 'val') 82 | dataset = datasets.ImageFolder(root, transform=transform) 83 | nb_classes = 10 84 | elif args.data_set == 'FLOWERS': 85 | root = os.path.join(args.data_path, 'train' if is_train else 'test') 86 | dataset = datasets.ImageFolder(root, transform=transform) 87 | if is_train: 88 | dataset = torch.utils.data.ConcatDataset( 89 | [dataset for _ in range(100)]) 90 | nb_classes = 102 91 | elif args.data_set == 'INAT': 92 | dataset = INatDataset(args.data_path, train=is_train, year=2018, 93 | category=args.inat_category, transform=transform) 94 | nb_classes = dataset.nb_classes 95 | elif args.data_set == 'INAT19': 96 | dataset = INatDataset(args.data_path, train=is_train, year=2019, 97 | category=args.inat_category, transform=transform) 98 | nb_classes = dataset.nb_classes 99 | return dataset, nb_classes 100 | 101 | 102 | def build_transform(is_train, args): 103 | resize_im = args.input_size > 32 104 | if is_train: 105 | # this should always dispatch to transforms_imagenet_train 106 | transform = create_transform( 107 | input_size=args.input_size, 108 | is_training=True, 109 | color_jitter=args.color_jitter, 110 | auto_augment=args.aa, 111 | interpolation=args.train_interpolation, 112 | re_prob=args.reprob, 113 | re_mode=args.remode, 114 | re_count=args.recount, 115 | ) 116 | if not resize_im: 117 | # replace RandomResizedCropAndInterpolation with 118 | # RandomCrop 119 | transform.transforms[0] = transforms.RandomCrop( 120 | args.input_size, padding=4) 121 | return transform 122 | 123 | t = [] 124 | if args.finetune: 125 | t.append( 126 | transforms.Resize((args.input_size, args.input_size), 127 | interpolation=3) 128 | ) 129 | else: 130 | if resize_im: 131 | size = int((256 / 224) * args.input_size) 132 | t.append( 133 | # to maintain same ratio w.r.t. 224 images 134 | transforms.Resize(size, interpolation=3), 135 | ) 136 | t.append(transforms.CenterCrop(args.input_size)) 137 | 138 | t.append(transforms.ToTensor()) 139 | t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)) 140 | return transforms.Compose(t) 141 | -------------------------------------------------------------------------------- /data/samplers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Build samplers for data loading 3 | ''' 4 | import torch 5 | import torch.distributed as dist 6 | import math 7 | 8 | 9 | class RASampler(torch.utils.data.Sampler): 10 | """Sampler that restricts data loading to a subset of the dataset for distributed, 11 | with repeated augmentation. 12 | It ensures that different each augmented version of a sample will be visible to a 13 | different process (GPU) 14 | Heavily based on torch.utils.data.DistributedSampler 15 | """ 16 | 17 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 18 | if num_replicas is None: 19 | if not dist.is_available(): 20 | raise RuntimeError( 21 | "Requires distributed package to be available") 22 | num_replicas = dist.get_world_size() 23 | if rank is None: 24 | if not dist.is_available(): 25 | raise RuntimeError( 26 | "Requires distributed package to be available") 27 | rank = dist.get_rank() 28 | self.dataset = dataset 29 | self.num_replicas = num_replicas 30 | self.rank = rank 31 | self.epoch = 0 32 | self.num_samples = int( 33 | math.ceil(len(self.dataset) * 3.0 / self.num_replicas)) 34 | self.total_size = self.num_samples * self.num_replicas 35 | # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas)) 36 | self.num_selected_samples = int(math.floor( 37 | len(self.dataset) // 256 * 256 / self.num_replicas)) 38 | self.shuffle = shuffle 39 | 40 | def __iter__(self): 41 | # deterministically shuffle based on epoch 42 | g = torch.Generator() 43 | g.manual_seed(self.epoch) 44 | if self.shuffle: 45 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 46 | else: 47 | indices = list(range(len(self.dataset))) 48 | 49 | # add extra samples to make it evenly divisible 50 | indices = [ele for ele in indices for i in range(3)] 51 | indices += indices[:(self.total_size - len(indices))] 52 | assert len(indices) == self.total_size 53 | 54 | # subsample 55 | indices = indices[self.rank:self.total_size:self.num_replicas] 56 | assert len(indices) == self.num_samples 57 | 58 | return iter(indices[:self.num_selected_samples]) 59 | 60 | def __len__(self): 61 | return self.num_selected_samples 62 | 63 | def set_epoch(self, epoch): 64 | self.epoch = epoch 65 | -------------------------------------------------------------------------------- /data/threeaugment.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3Augment implementation from (https://github.com/facebookresearch/deit/blob/main/augment.py) 3 | Data-augmentation (DA) based on dino DA (https://github.com/facebookresearch/dino) 4 | and timm DA(https://github.com/rwightman/pytorch-image-models) 5 | Can be called by adding "--ThreeAugment" to the command line 6 | """ 7 | import torch 8 | from torchvision import transforms 9 | 10 | from timm.data.transforms import str_to_pil_interp, RandomResizedCropAndInterpolation, ToNumpy, ToTensor 11 | 12 | import numpy as np 13 | from torchvision import datasets, transforms 14 | import random 15 | 16 | 17 | 18 | from PIL import ImageFilter, ImageOps 19 | import torchvision.transforms.functional as TF 20 | 21 | 22 | class GaussianBlur(object): 23 | """ 24 | Apply Gaussian Blur to the PIL image. 25 | """ 26 | def __init__(self, p=0.1, radius_min=0.1, radius_max=2.): 27 | self.prob = p 28 | self.radius_min = radius_min 29 | self.radius_max = radius_max 30 | 31 | def __call__(self, img): 32 | do_it = random.random() <= self.prob 33 | if not do_it: 34 | return img 35 | 36 | img = img.filter( 37 | ImageFilter.GaussianBlur( 38 | radius=random.uniform(self.radius_min, self.radius_max) 39 | ) 40 | ) 41 | return img 42 | 43 | class Solarization(object): 44 | """ 45 | Apply Solarization to the PIL image. 46 | """ 47 | def __init__(self, p=0.2): 48 | self.p = p 49 | 50 | def __call__(self, img): 51 | if random.random() < self.p: 52 | return ImageOps.solarize(img) 53 | else: 54 | return img 55 | 56 | class gray_scale(object): 57 | """ 58 | Apply Solarization to the PIL image. 59 | """ 60 | def __init__(self, p=0.2): 61 | self.p = p 62 | self.transf = transforms.Grayscale(3) 63 | 64 | def __call__(self, img): 65 | if random.random() < self.p: 66 | return self.transf(img) 67 | else: 68 | return img 69 | 70 | 71 | 72 | class horizontal_flip(object): 73 | """ 74 | Apply Solarization to the PIL image. 75 | """ 76 | def __init__(self, p=0.2,activate_pred=False): 77 | self.p = p 78 | self.transf = transforms.RandomHorizontalFlip(p=1.0) 79 | 80 | def __call__(self, img): 81 | if random.random() < self.p: 82 | return self.transf(img) 83 | else: 84 | return img 85 | 86 | 87 | 88 | def new_data_aug_generator(args = None): 89 | img_size = args.input_size 90 | remove_random_resized_crop = False 91 | mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] 92 | primary_tfl = [] 93 | scale=(0.08, 1.0) 94 | interpolation='bicubic' 95 | if remove_random_resized_crop: 96 | primary_tfl = [ 97 | transforms.Resize(img_size, interpolation=3), 98 | transforms.RandomCrop(img_size, padding=4,padding_mode='reflect'), 99 | transforms.RandomHorizontalFlip() 100 | ] 101 | else: 102 | primary_tfl = [ 103 | RandomResizedCropAndInterpolation( 104 | img_size, scale=scale, interpolation=interpolation), 105 | transforms.RandomHorizontalFlip() 106 | ] 107 | 108 | 109 | secondary_tfl = [transforms.RandomChoice([gray_scale(p=1.0), 110 | Solarization(p=1.0), 111 | GaussianBlur(p=1.0)])] 112 | 113 | if args.color_jitter is not None and not args.color_jitter==0: 114 | secondary_tfl.append(transforms.ColorJitter(args.color_jitter, args.color_jitter, args.color_jitter)) 115 | final_tfl = [ 116 | transforms.ToTensor(), 117 | transforms.Normalize( 118 | mean=torch.tensor(mean), 119 | std=torch.tensor(std)) 120 | ] 121 | return transforms.Compose(primary_tfl+secondary_tfl+final_tfl) 122 | -------------------------------------------------------------------------------- /detection/.gitignore: -------------------------------------------------------------------------------- 1 | data 2 | pretrain 3 | work_dirs 4 | results.pkl -------------------------------------------------------------------------------- /detection/README.md: -------------------------------------------------------------------------------- 1 | # Object Detection and Instance Segmentation 2 | 3 | Detection and instance segmentation on MS COCO 2017 is implemented based on [MMDetection](https://github.com/open-mmlab/mmdetection). 4 | 5 | ## Models 6 | Results with RetinaNet 7 | | Model | $AP$ | $AP_{50}$ | $AP_{75}$ | $AP_S$ | $AP_M$ | $AP_L$ | Log | 8 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 9 | | [LSNet-T](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_t_retinanet.pth) | 34.2 | 54.6 | 35.2 | 17.8 | 37.1 | 48.5 | [lsnet_t_retinanet.json](./logs/lsnet_t_retinanet.json) | 10 | | [LSNet-S](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_s_retinanet.pth) | 36.5 | 57.3 | 38.1 | 20.3 | 39.5 | 51.0 | [lsnet_s_retinanet.json](./logs/lsnet_s_retinanet.json) | 11 | | [LSNet-B](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_b_retinanet.pth) | 39.2 | 60.0 | 41.5 | 22.1 | 43.0 | 52.9 | [lsnet_b_retinanet.json](./logs/lsnet_b_retinanet.json) | 12 | 13 | Results with MaskR-CNN 14 | | Model | $AP^b$ | $AP_{50}^b$ | $AP_{75}^b$ | $AP^m$ | $AP_{50}^m$ | $AP_{75}^m$ | Log | 15 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 16 | | [LSNet-T](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_t_maskrcnn.pth) | 35.0 | 57.0 | 37.3 | 32.7 | 53.8 | 34.3 | [lsnet_t_maskrcnn.json](./logs/lsnet_t_maskrcnn.json) | 17 | | [LSNet-S](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_s_maskrcnn.pth) | 37.4 | 59.9 | 39.8 | 34.8 | 56.8 | 36.6 | [lsnet_s_maskrcnn.json](./logs/lsnet_s_maskrcnn.json) | 18 | | [LSNet-B](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_b_maskrcnn.pth) | 40.8 | 63.4 | 44.0 | 37.8 | 60.5 | 40.1 | [lsnet_b_maskrcnn.json](./logs/lsnet_b_maskrcnn.json) | 19 | 20 | ## Installation 21 | ```bash 22 | pip install mmcv-full==1.7.2 23 | pip install mmdet==2.28.2 24 | # Please replace line 160 in anaconda3/envs/seg/lib/python3.10/site-packages/mmcv/parallel/distributed.py to module_to_run = self.module 25 | # Please patch mmcv following https://github.com/HarborYuan/mmcv_16/commit/ad1a72fe0cbeead2716706ff618dfa0269d2cf4c 26 | ``` 27 | 28 | ## Data preparation 29 | 30 | Please prepare COCO 2017 dataset according to the [instructions in MMDetection](https://github.com/open-mmlab/mmdetection/blob/master/docs/en/1_exist_data_model.md#test-existing-models-on-standard-datasets). 31 | The dataset should be organized as 32 | ``` 33 | detection 34 | ├── data 35 | │ ├── coco 36 | │ │ ├── annotations 37 | │ │ ├── train2017 38 | │ │ ├── val2017 39 | │ │ ├── test2017 40 | ``` 41 | 42 | ## Testing 43 | For RetinaNet 44 | ```bash 45 | bash ./dist_test.sh configs/retinanet_lsnet_b_fpn_1x_coco.py pretrain/lsnet_b_retinanet.pth 8 --eval bbox --out results.pkl 46 | ``` 47 | For Mask R-CNN 48 | ```bash 49 | bash ./dist_test.sh configs/mask_rcnn_lsnet_b_fpn_1x_coco.py pretrain/lsnet_b_maskrcnn.pth 8 --eval bbox segm --out results.pkl 50 | ``` 51 | 52 | ## Training 53 | Download ImageNet-1K pretrained weights into `./pretrain` 54 | For RetinaNet 55 | ```bash 56 | bash ./dist_train.sh configs/retinanet_lsnet_b_fpn_1x_coco.py 8 57 | ``` 58 | For Mask R-CNN 59 | ```bash 60 | bash ./dist_train.sh configs/mask_rcnn_lsnet_b_fpn_1x_coco.py 8 61 | ``` 62 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/cityscapes_detection.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'data/cityscapes/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict( 9 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(2048, 1024), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=1, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=8, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 40 | 'annotations/instancesonly_filtered_gtFine_train.json', 41 | img_prefix=data_root + 'leftImg8bit/train/', 42 | pipeline=train_pipeline)), 43 | val=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 46 | 'annotations/instancesonly_filtered_gtFine_val.json', 47 | img_prefix=data_root + 'leftImg8bit/val/', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | ann_file=data_root + 52 | 'annotations/instancesonly_filtered_gtFine_test.json', 53 | img_prefix=data_root + 'leftImg8bit/test/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(interval=1, metric='bbox') 56 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/cityscapes_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'data/cityscapes/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict( 9 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(2048, 1024), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=1, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=8, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 40 | 'annotations/instancesonly_filtered_gtFine_train.json', 41 | img_prefix=data_root + 'leftImg8bit/train/', 42 | pipeline=train_pipeline)), 43 | val=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 46 | 'annotations/instancesonly_filtered_gtFine_val.json', 47 | img_prefix=data_root + 'leftImg8bit/val/', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | ann_file=data_root + 52 | 'annotations/instancesonly_filtered_gtFine_test.json', 53 | img_prefix=data_root + 'leftImg8bit/test/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(metric=['bbox', 'segm']) 56 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(interval=1, metric='bbox') 49 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/coco_instance_semantic.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict( 8 | type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='SegRescale', scale_factor=1 / 8), 14 | dict(type='DefaultFormatBundle'), 15 | dict( 16 | type='Collect', 17 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1333, 800), 24 | flip=False, 25 | transforms=[ 26 | dict(type='Resize', keep_ratio=True), 27 | dict(type='RandomFlip', flip_ratio=0.5), 28 | dict(type='Normalize', **img_norm_cfg), 29 | dict(type='Pad', size_divisor=32), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=2, 36 | workers_per_gpu=2, 37 | train=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 'annotations/instances_train2017.json', 40 | img_prefix=data_root + 'train2017/', 41 | seg_prefix=data_root + 'stuffthingmaps/train2017/', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline), 48 | test=dict( 49 | type=dataset_type, 50 | ann_file=data_root + 'annotations/instances_val2017.json', 51 | img_prefix=data_root + 'val2017/', 52 | pipeline=test_pipeline)) 53 | evaluation = dict(metric=['bbox', 'segm']) 54 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'DeepFashionDataset' 3 | data_root = 'data/DeepFashion/In-shop/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(750, 1101), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(750, 1101), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | imgs_per_gpu=2, 33 | workers_per_gpu=1, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 37 | img_prefix=data_root + 'Img/', 38 | pipeline=train_pipeline, 39 | data_root=data_root), 40 | val=dict( 41 | type=dataset_type, 42 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 43 | img_prefix=data_root + 'Img/', 44 | pipeline=test_pipeline, 45 | data_root=data_root), 46 | test=dict( 47 | type=dataset_type, 48 | ann_file=data_root + 49 | 'annotations/DeepFashion_segmentation_gallery.json', 50 | img_prefix=data_root + 'Img/', 51 | pipeline=test_pipeline, 52 | data_root=data_root)) 53 | evaluation = dict(interval=5, metric=['bbox', 'segm']) 54 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/lvis_v0.5_instance.py: -------------------------------------------------------------------------------- 1 | _base_ = 'coco_instance.py' 2 | dataset_type = 'LVISV05Dataset' 3 | data_root = 'data/lvis_v0.5/' 4 | data = dict( 5 | samples_per_gpu=2, 6 | workers_per_gpu=2, 7 | train=dict( 8 | _delete_=True, 9 | type='ClassBalancedDataset', 10 | oversample_thr=1e-3, 11 | dataset=dict( 12 | type=dataset_type, 13 | ann_file=data_root + 'annotations/lvis_v0.5_train.json', 14 | img_prefix=data_root + 'train2017/')), 15 | val=dict( 16 | type=dataset_type, 17 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 18 | img_prefix=data_root + 'val2017/'), 19 | test=dict( 20 | type=dataset_type, 21 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 22 | img_prefix=data_root + 'val2017/')) 23 | evaluation = dict(metric=['bbox', 'segm']) 24 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/lvis_v1_instance.py: -------------------------------------------------------------------------------- 1 | _base_ = 'coco_instance.py' 2 | dataset_type = 'LVISV1Dataset' 3 | data_root = 'data/lvis_v1/' 4 | data = dict( 5 | samples_per_gpu=2, 6 | workers_per_gpu=2, 7 | train=dict( 8 | _delete_=True, 9 | type='ClassBalancedDataset', 10 | oversample_thr=1e-3, 11 | dataset=dict( 12 | type=dataset_type, 13 | ann_file=data_root + 'annotations/lvis_v1_train.json', 14 | img_prefix=data_root)), 15 | val=dict( 16 | type=dataset_type, 17 | ann_file=data_root + 'annotations/lvis_v1_val.json', 18 | img_prefix=data_root), 19 | test=dict( 20 | type=dataset_type, 21 | ann_file=data_root + 'annotations/lvis_v1_val.json', 22 | img_prefix=data_root)) 23 | evaluation = dict(metric=['bbox', 'segm']) 24 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/voc0712.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'VOCDataset' 3 | data_root = 'data/VOCdevkit/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1000, 600), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=3, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=[ 40 | data_root + 'VOC2007/ImageSets/Main/trainval.txt', 41 | data_root + 'VOC2012/ImageSets/Main/trainval.txt' 42 | ], 43 | img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], 44 | pipeline=train_pipeline)), 45 | val=dict( 46 | type=dataset_type, 47 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 48 | img_prefix=data_root + 'VOC2007/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 53 | img_prefix=data_root + 'VOC2007/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(interval=1, metric='mAP') 56 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'WIDERFaceDataset' 3 | data_root = 'data/WIDERFace/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile', to_float32=True), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict( 9 | type='PhotoMetricDistortion', 10 | brightness_delta=32, 11 | contrast_range=(0.5, 1.5), 12 | saturation_range=(0.5, 1.5), 13 | hue_delta=18), 14 | dict( 15 | type='Expand', 16 | mean=img_norm_cfg['mean'], 17 | to_rgb=img_norm_cfg['to_rgb'], 18 | ratio_range=(1, 4)), 19 | dict( 20 | type='MinIoURandomCrop', 21 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 22 | min_crop_size=0.3), 23 | dict(type='Resize', img_scale=(300, 300), keep_ratio=False), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='RandomFlip', flip_ratio=0.5), 26 | dict(type='DefaultFormatBundle'), 27 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 28 | ] 29 | test_pipeline = [ 30 | dict(type='LoadImageFromFile'), 31 | dict( 32 | type='MultiScaleFlipAug', 33 | img_scale=(300, 300), 34 | flip=False, 35 | transforms=[ 36 | dict(type='Resize', keep_ratio=False), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='ImageToTensor', keys=['img']), 39 | dict(type='Collect', keys=['img']), 40 | ]) 41 | ] 42 | data = dict( 43 | samples_per_gpu=60, 44 | workers_per_gpu=2, 45 | train=dict( 46 | type='RepeatDataset', 47 | times=2, 48 | dataset=dict( 49 | type=dataset_type, 50 | ann_file=data_root + 'train.txt', 51 | img_prefix=data_root + 'WIDER_train/', 52 | min_size=17, 53 | pipeline=train_pipeline)), 54 | val=dict( 55 | type=dataset_type, 56 | ann_file=data_root + 'val.txt', 57 | img_prefix=data_root + 'WIDER_val/', 58 | pipeline=test_pipeline), 59 | test=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'val.txt', 62 | img_prefix=data_root + 'WIDER_val/', 63 | pipeline=test_pipeline)) 64 | -------------------------------------------------------------------------------- /detection/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/fast_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | roi_head=dict( 20 | type='StandardRoIHead', 21 | bbox_roi_extractor=dict( 22 | type='SingleRoIExtractor', 23 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 24 | out_channels=256, 25 | featmap_strides=[4, 8, 16, 32]), 26 | bbox_head=dict( 27 | type='Shared2FCBBoxHead', 28 | in_channels=256, 29 | fc_out_channels=1024, 30 | roi_feat_size=7, 31 | num_classes=80, 32 | bbox_coder=dict( 33 | type='DeltaXYWHBBoxCoder', 34 | target_means=[0., 0., 0., 0.], 35 | target_stds=[0.1, 0.1, 0.2, 0.2]), 36 | reg_class_agnostic=False, 37 | loss_cls=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 40 | # model training and testing settings 41 | train_cfg=dict( 42 | rcnn=dict( 43 | assigner=dict( 44 | type='MaxIoUAssigner', 45 | pos_iou_thr=0.5, 46 | neg_iou_thr=0.5, 47 | min_pos_iou=0.5, 48 | match_low_quality=False, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | type='RandomSampler', 52 | num=512, 53 | pos_fraction=0.25, 54 | neg_pos_ub=-1, 55 | add_gt_as_proposals=True), 56 | pos_weight=-1, 57 | debug=False)), 58 | test_cfg=dict( 59 | rcnn=dict( 60 | score_thr=0.05, 61 | nms=dict(type='nms', iou_threshold=0.5), 62 | max_per_img=100))) 63 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 63 | # model training and testing settings 64 | train_cfg=dict( 65 | rpn=dict( 66 | assigner=dict( 67 | type='MaxIoUAssigner', 68 | pos_iou_thr=0.7, 69 | neg_iou_thr=0.3, 70 | min_pos_iou=0.3, 71 | match_low_quality=True, 72 | ignore_iof_thr=-1), 73 | sampler=dict( 74 | type='RandomSampler', 75 | num=256, 76 | pos_fraction=0.5, 77 | neg_pos_ub=-1, 78 | add_gt_as_proposals=False), 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | rpn_proposal=dict( 83 | nms_pre=12000, 84 | max_per_img=2000, 85 | nms=dict(type='nms', iou_threshold=0.7), 86 | min_bbox_size=0), 87 | rcnn=dict( 88 | assigner=dict( 89 | type='MaxIoUAssigner', 90 | pos_iou_thr=0.5, 91 | neg_iou_thr=0.5, 92 | min_pos_iou=0.5, 93 | match_low_quality=False, 94 | ignore_iof_thr=-1), 95 | sampler=dict( 96 | type='RandomSampler', 97 | num=512, 98 | pos_fraction=0.25, 99 | neg_pos_ub=-1, 100 | add_gt_as_proposals=True), 101 | pos_weight=-1, 102 | debug=False)), 103 | test_cfg=dict( 104 | rpn=dict( 105 | nms_pre=6000, 106 | max_per_img=1000, 107 | nms=dict(type='nms', iou_threshold=0.7), 108 | min_bbox_size=0), 109 | rcnn=dict( 110 | score_thr=0.05, 111 | nms=dict(type='nms', iou_threshold=0.5), 112 | max_per_img=100))) 113 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=4, 10 | strides=(1, 2, 2, 1), 11 | dilations=(1, 1, 1, 2), 12 | out_indices=(3, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=2048, 20 | feat_channels=2048, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=2048, 39 | featmap_strides=[16]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=2048, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=80, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | rpn=dict( 57 | assigner=dict( 58 | type='MaxIoUAssigner', 59 | pos_iou_thr=0.7, 60 | neg_iou_thr=0.3, 61 | min_pos_iou=0.3, 62 | match_low_quality=True, 63 | ignore_iof_thr=-1), 64 | sampler=dict( 65 | type='RandomSampler', 66 | num=256, 67 | pos_fraction=0.5, 68 | neg_pos_ub=-1, 69 | add_gt_as_proposals=False), 70 | allowed_border=0, 71 | pos_weight=-1, 72 | debug=False), 73 | rpn_proposal=dict( 74 | nms_pre=12000, 75 | max_per_img=2000, 76 | nms=dict(type='nms', iou_threshold=0.7), 77 | min_bbox_size=0), 78 | rcnn=dict( 79 | assigner=dict( 80 | type='MaxIoUAssigner', 81 | pos_iou_thr=0.5, 82 | neg_iou_thr=0.5, 83 | min_pos_iou=0.5, 84 | match_low_quality=False, 85 | ignore_iof_thr=-1), 86 | sampler=dict( 87 | type='RandomSampler', 88 | num=512, 89 | pos_fraction=0.25, 90 | neg_pos_ub=-1, 91 | add_gt_as_proposals=True), 92 | pos_weight=-1, 93 | debug=False)), 94 | test_cfg=dict( 95 | rpn=dict( 96 | nms=dict(type='nms', iou_threshold=0.7), 97 | nms_pre=6000, 98 | max_per_img=1000, 99 | min_bbox_size=0), 100 | rcnn=dict( 101 | score_thr=0.05, 102 | nms=dict(type='nms', iou_threshold=0.5), 103 | max_per_img=100))) 104 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FasterRCNN', 3 | pretrained='torchvision://resnet50', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch'), 13 | neck=dict( 14 | type='FPN', 15 | in_channels=[256, 512, 1024, 2048], 16 | out_channels=256, 17 | num_outs=5), 18 | rpn_head=dict( 19 | type='RPNHead', 20 | in_channels=256, 21 | feat_channels=256, 22 | anchor_generator=dict( 23 | type='AnchorGenerator', 24 | scales=[8], 25 | ratios=[0.5, 1.0, 2.0], 26 | strides=[4, 8, 16, 32, 64]), 27 | bbox_coder=dict( 28 | type='DeltaXYWHBBoxCoder', 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[1.0, 1.0, 1.0, 1.0]), 31 | loss_cls=dict( 32 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 33 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 34 | roi_head=dict( 35 | type='StandardRoIHead', 36 | bbox_roi_extractor=dict( 37 | type='SingleRoIExtractor', 38 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 39 | out_channels=256, 40 | featmap_strides=[4, 8, 16, 32]), 41 | bbox_head=dict( 42 | type='Shared2FCBBoxHead', 43 | in_channels=256, 44 | fc_out_channels=1024, 45 | roi_feat_size=7, 46 | num_classes=80, 47 | bbox_coder=dict( 48 | type='DeltaXYWHBBoxCoder', 49 | target_means=[0., 0., 0., 0.], 50 | target_stds=[0.1, 0.1, 0.2, 0.2]), 51 | reg_class_agnostic=False, 52 | loss_cls=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 54 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 55 | # model training and testing settings 56 | train_cfg=dict( 57 | rpn=dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.7, 61 | neg_iou_thr=0.3, 62 | min_pos_iou=0.3, 63 | match_low_quality=True, 64 | ignore_iof_thr=-1), 65 | sampler=dict( 66 | type='RandomSampler', 67 | num=256, 68 | pos_fraction=0.5, 69 | neg_pos_ub=-1, 70 | add_gt_as_proposals=False), 71 | allowed_border=-1, 72 | pos_weight=-1, 73 | debug=False), 74 | rpn_proposal=dict( 75 | nms_pre=2000, 76 | max_per_img=1000, 77 | nms=dict(type='nms', iou_threshold=0.7), 78 | min_bbox_size=0), 79 | rcnn=dict( 80 | assigner=dict( 81 | type='MaxIoUAssigner', 82 | pos_iou_thr=0.5, 83 | neg_iou_thr=0.5, 84 | min_pos_iou=0.5, 85 | match_low_quality=False, 86 | ignore_iof_thr=-1), 87 | sampler=dict( 88 | type='RandomSampler', 89 | num=512, 90 | pos_fraction=0.25, 91 | neg_pos_ub=-1, 92 | add_gt_as_proposals=True), 93 | pos_weight=-1, 94 | debug=False)), 95 | test_cfg=dict( 96 | rpn=dict( 97 | nms_pre=1000, 98 | max_per_img=1000, 99 | nms=dict(type='nms', iou_threshold=0.7), 100 | min_bbox_size=0), 101 | rcnn=dict( 102 | score_thr=0.05, 103 | nms=dict(type='nms', iou_threshold=0.5), 104 | max_per_img=100) 105 | # soft-nms is also supported for rcnn testing 106 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 107 | )) 108 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_lsnet_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='lsnet_t', 7 | pretrained="",), 8 | neck=dict( 9 | type='FPN', 10 | in_channels=[256, 512, 1024, 2048], 11 | out_channels=256, 12 | num_outs=5), 13 | rpn_head=dict( 14 | type='RPNHead', 15 | in_channels=256, 16 | feat_channels=256, 17 | anchor_generator=dict( 18 | type='AnchorGenerator', 19 | scales=[8], 20 | ratios=[0.5, 1.0, 2.0], 21 | strides=[4, 8, 16, 32, 64]), 22 | bbox_coder=dict( 23 | type='DeltaXYWHBBoxCoder', 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0]), 26 | loss_cls=dict( 27 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 28 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 29 | roi_head=dict( 30 | type='StandardRoIHead', 31 | bbox_roi_extractor=dict( 32 | type='SingleRoIExtractor', 33 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 34 | out_channels=256, 35 | featmap_strides=[4, 8, 16, 32]), 36 | bbox_head=dict( 37 | type='Shared2FCBBoxHead', 38 | in_channels=256, 39 | fc_out_channels=1024, 40 | roi_feat_size=7, 41 | num_classes=80, 42 | bbox_coder=dict( 43 | type='DeltaXYWHBBoxCoder', 44 | target_means=[0., 0., 0., 0.], 45 | target_stds=[0.1, 0.1, 0.2, 0.2]), 46 | reg_class_agnostic=False, 47 | loss_cls=dict( 48 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 49 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 50 | mask_roi_extractor=dict( 51 | type='SingleRoIExtractor', 52 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 53 | out_channels=256, 54 | featmap_strides=[4, 8, 16, 32]), 55 | mask_head=dict( 56 | type='FCNMaskHead', 57 | num_convs=4, 58 | in_channels=256, 59 | conv_out_channels=256, 60 | num_classes=80, 61 | loss_mask=dict( 62 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 63 | # model training and testing settings 64 | train_cfg=dict( 65 | rpn=dict( 66 | assigner=dict( 67 | type='MaxIoUAssigner', 68 | pos_iou_thr=0.7, 69 | neg_iou_thr=0.3, 70 | min_pos_iou=0.3, 71 | match_low_quality=True, 72 | ignore_iof_thr=-1), 73 | sampler=dict( 74 | type='RandomSampler', 75 | num=256, 76 | pos_fraction=0.5, 77 | neg_pos_ub=-1, 78 | add_gt_as_proposals=False), 79 | allowed_border=-1, 80 | pos_weight=-1, 81 | debug=False), 82 | rpn_proposal=dict( 83 | nms_pre=2000, 84 | max_per_img=1000, 85 | nms=dict(type='nms', iou_threshold=0.7), 86 | min_bbox_size=0), 87 | rcnn=dict( 88 | assigner=dict( 89 | type='MaxIoUAssigner', 90 | pos_iou_thr=0.5, 91 | neg_iou_thr=0.5, 92 | min_pos_iou=0.5, 93 | match_low_quality=True, 94 | ignore_iof_thr=-1), 95 | sampler=dict( 96 | type='RandomSampler', 97 | num=512, 98 | pos_fraction=0.25, 99 | neg_pos_ub=-1, 100 | add_gt_as_proposals=True), 101 | mask_size=28, 102 | pos_weight=-1, 103 | debug=False)), 104 | test_cfg=dict( 105 | rpn=dict( 106 | nms_pre=1000, 107 | max_per_img=1000, 108 | nms=dict(type='nms', iou_threshold=0.7), 109 | min_bbox_size=0), 110 | rcnn=dict( 111 | score_thr=0.05, 112 | nms=dict(type='nms', iou_threshold=0.5), 113 | max_per_img=100, 114 | mask_thr_binary=0.5))) 115 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='MaskRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 63 | mask_roi_extractor=None, 64 | mask_head=dict( 65 | type='FCNMaskHead', 66 | num_convs=0, 67 | in_channels=2048, 68 | conv_out_channels=256, 69 | num_classes=80, 70 | loss_mask=dict( 71 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 72 | # model training and testing settings 73 | train_cfg=dict( 74 | rpn=dict( 75 | assigner=dict( 76 | type='MaxIoUAssigner', 77 | pos_iou_thr=0.7, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | match_low_quality=True, 81 | ignore_iof_thr=-1), 82 | sampler=dict( 83 | type='RandomSampler', 84 | num=256, 85 | pos_fraction=0.5, 86 | neg_pos_ub=-1, 87 | add_gt_as_proposals=False), 88 | allowed_border=0, 89 | pos_weight=-1, 90 | debug=False), 91 | rpn_proposal=dict( 92 | nms_pre=12000, 93 | max_per_img=2000, 94 | nms=dict(type='nms', iou_threshold=0.7), 95 | min_bbox_size=0), 96 | rcnn=dict( 97 | assigner=dict( 98 | type='MaxIoUAssigner', 99 | pos_iou_thr=0.5, 100 | neg_iou_thr=0.5, 101 | min_pos_iou=0.5, 102 | match_low_quality=False, 103 | ignore_iof_thr=-1), 104 | sampler=dict( 105 | type='RandomSampler', 106 | num=512, 107 | pos_fraction=0.25, 108 | neg_pos_ub=-1, 109 | add_gt_as_proposals=True), 110 | mask_size=14, 111 | pos_weight=-1, 112 | debug=False)), 113 | test_cfg=dict( 114 | rpn=dict( 115 | nms_pre=6000, 116 | nms=dict(type='nms', iou_threshold=0.7), 117 | max_per_img=1000, 118 | min_bbox_size=0), 119 | rcnn=dict( 120 | score_thr=0.05, 121 | nms=dict(type='nms', iou_threshold=0.5), 122 | max_per_img=100, 123 | mask_thr_binary=0.5))) 124 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_pre=2000, 90 | max_per_img=1000, 91 | nms=dict(type='nms', iou_threshold=0.7), 92 | min_bbox_size=0), 93 | rcnn=dict( 94 | assigner=dict( 95 | type='MaxIoUAssigner', 96 | pos_iou_thr=0.5, 97 | neg_iou_thr=0.5, 98 | min_pos_iou=0.5, 99 | match_low_quality=True, 100 | ignore_iof_thr=-1), 101 | sampler=dict( 102 | type='RandomSampler', 103 | num=512, 104 | pos_fraction=0.25, 105 | neg_pos_ub=-1, 106 | add_gt_as_proposals=True), 107 | mask_size=28, 108 | pos_weight=-1, 109 | debug=False)), 110 | test_cfg=dict( 111 | rpn=dict( 112 | nms_pre=1000, 113 | max_per_img=1000, 114 | nms=dict(type='nms', iou_threshold=0.7), 115 | min_bbox_size=0), 116 | rcnn=dict( 117 | score_thr=0.05, 118 | nms=dict(type='nms', iou_threshold=0.5), 119 | max_per_img=100, 120 | mask_thr_binary=0.5))) 121 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_swin_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained=None, 5 | backbone=dict( 6 | type='SwinTransformer', 7 | embed_dim=96, 8 | depths=[2, 2, 6, 2], 9 | num_heads=[3, 6, 12, 24], 10 | window_size=7, 11 | mlp_ratio=4., 12 | qkv_bias=True, 13 | qk_scale=None, 14 | drop_rate=0., 15 | attn_drop_rate=0., 16 | drop_path_rate=0.2, 17 | ape=False, 18 | patch_norm=True, 19 | out_indices=(0, 1, 2, 3), 20 | use_checkpoint=False), 21 | neck=dict( 22 | type='FPN', 23 | in_channels=[96, 192, 384, 768], 24 | out_channels=256, 25 | num_outs=5), 26 | rpn_head=dict( 27 | type='RPNHead', 28 | in_channels=256, 29 | feat_channels=256, 30 | anchor_generator=dict( 31 | type='AnchorGenerator', 32 | scales=[8], 33 | ratios=[0.5, 1.0, 2.0], 34 | strides=[4, 8, 16, 32, 64]), 35 | bbox_coder=dict( 36 | type='DeltaXYWHBBoxCoder', 37 | target_means=[.0, .0, .0, .0], 38 | target_stds=[1.0, 1.0, 1.0, 1.0]), 39 | loss_cls=dict( 40 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 41 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 42 | roi_head=dict( 43 | type='StandardRoIHead', 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 47 | out_channels=256, 48 | featmap_strides=[4, 8, 16, 32]), 49 | bbox_head=dict( 50 | type='Shared2FCBBoxHead', 51 | in_channels=256, 52 | fc_out_channels=1024, 53 | roi_feat_size=7, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 63 | mask_roi_extractor=dict( 64 | type='SingleRoIExtractor', 65 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 66 | out_channels=256, 67 | featmap_strides=[4, 8, 16, 32]), 68 | mask_head=dict( 69 | type='FCNMaskHead', 70 | num_convs=4, 71 | in_channels=256, 72 | conv_out_channels=256, 73 | num_classes=80, 74 | loss_mask=dict( 75 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 76 | # model training and testing settings 77 | train_cfg=dict( 78 | rpn=dict( 79 | assigner=dict( 80 | type='MaxIoUAssigner', 81 | pos_iou_thr=0.7, 82 | neg_iou_thr=0.3, 83 | min_pos_iou=0.3, 84 | match_low_quality=True, 85 | ignore_iof_thr=-1), 86 | sampler=dict( 87 | type='RandomSampler', 88 | num=256, 89 | pos_fraction=0.5, 90 | neg_pos_ub=-1, 91 | add_gt_as_proposals=False), 92 | allowed_border=-1, 93 | pos_weight=-1, 94 | debug=False), 95 | rpn_proposal=dict( 96 | nms_pre=2000, 97 | max_per_img=1000, 98 | nms=dict(type='nms', iou_threshold=0.7), 99 | min_bbox_size=0), 100 | rcnn=dict( 101 | assigner=dict( 102 | type='MaxIoUAssigner', 103 | pos_iou_thr=0.5, 104 | neg_iou_thr=0.5, 105 | min_pos_iou=0.5, 106 | match_low_quality=True, 107 | ignore_iof_thr=-1), 108 | sampler=dict( 109 | type='RandomSampler', 110 | num=512, 111 | pos_fraction=0.25, 112 | neg_pos_ub=-1, 113 | add_gt_as_proposals=True), 114 | mask_size=28, 115 | pos_weight=-1, 116 | debug=False)), 117 | test_cfg=dict( 118 | rpn=dict( 119 | nms_pre=1000, 120 | max_per_img=1000, 121 | nms=dict(type='nms', iou_threshold=0.7), 122 | min_bbox_size=0), 123 | rcnn=dict( 124 | score_thr=0.05, 125 | nms=dict(type='nms', iou_threshold=0.5), 126 | max_per_img=100, 127 | mask_thr_binary=0.5))) 128 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_reppointsv2_swin_bifpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) 3 | model = dict( 4 | type='RepPointsV2MaskDetector', 5 | pretrained=None, 6 | backbone=dict( 7 | type='SwinTransformer', 8 | embed_dim=96, 9 | depths=[2, 2, 6, 2], 10 | num_heads=[3, 6, 12, 24], 11 | window_size=7, 12 | mlp_ratio=4., 13 | qkv_bias=True, 14 | qk_scale=None, 15 | drop_rate=0., 16 | attn_drop_rate=0., 17 | drop_path_rate=0.2, 18 | ape=False, 19 | patch_norm=True, 20 | out_indices=(1, 2, 3), 21 | use_checkpoint=False), 22 | neck=dict( 23 | type='BiFPN', 24 | in_channels=[192, 384, 768], 25 | out_channels=256, 26 | start_level=0, 27 | add_extra_convs=False, 28 | num_outs=5, 29 | no_norm_on_lateral=False, 30 | num_repeat=2, 31 | norm_cfg=norm_cfg 32 | ), 33 | bbox_head=dict( 34 | type='RepPointsV2Head', 35 | num_classes=80, 36 | in_channels=256, 37 | feat_channels=256, 38 | point_feat_channels=256, 39 | stacked_convs=3, 40 | shared_stacked_convs=1, 41 | first_kernel_size=3, 42 | kernel_size=1, 43 | corner_dim=64, 44 | num_points=9, 45 | gradient_mul=0.1, 46 | point_strides=[8, 16, 32, 64, 128], 47 | point_base_scale=4, 48 | norm_cfg=norm_cfg, 49 | loss_cls=dict( 50 | type='RPDQualityFocalLoss', 51 | use_sigmoid=True, 52 | beta=2.0, 53 | loss_weight=1.0), 54 | loss_bbox_init=dict(type='RPDGIoULoss', loss_weight=1.0), 55 | loss_bbox_refine=dict(type='RPDGIoULoss', loss_weight=2.0), 56 | loss_heatmap=dict( 57 | type='GaussianFocalLoss', 58 | alpha=2.0, 59 | gamma=4.0, 60 | loss_weight=0.25), 61 | loss_offset=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 62 | loss_sem=dict( 63 | type='SEPFocalLoss', 64 | gamma=2.0, 65 | alpha=0.25, 66 | loss_weight=0.1), 67 | transform_method='exact_minmax', 68 | # new for condconv 69 | coord_pos='center', 70 | mask_head=dict( 71 | type='CondConvMaskHead', 72 | branch_cfg=dict( 73 | in_channels=256, # == neck out channels 74 | channels=128, 75 | in_features=[0,1,2], 76 | out_stride=[8,16,32], # p3, p4, p5 77 | norm=dict(type='BN', requires_grad=True), 78 | num_convs=4, 79 | out_channels=8, 80 | semantic_loss_on=False, 81 | num_classes=80, 82 | loss_sem=dict( 83 | type='FocalLoss', 84 | use_sigmoid=True, 85 | gamma=2.0, 86 | alpha=0.25, 87 | loss_weight=1.0, 88 | prior_prob=0.01) 89 | ), 90 | head_cfg=dict( 91 | channels=8, 92 | disable_rel_coords=False, 93 | num_layers=3, 94 | use_fp16=False, 95 | mask_out_stride=4, 96 | max_proposals=500, 97 | aux_loss=True, 98 | mask_loss_weight=[0.,0.6,1.], 99 | sizes_of_interest=[64, 128, 256, 512, 1024] 100 | ), 101 | )), 102 | train_cfg = dict( 103 | init=dict( 104 | assigner=dict(type='PointAssignerV2', scale=4, pos_num=1, mask_center_sample=True, use_center=True), 105 | allowed_border=-1, 106 | pos_weight=-1, 107 | debug=False), 108 | heatmap=dict( 109 | assigner=dict(type='PointHMAssigner', gaussian_bump=True, gaussian_iou=0.7), 110 | allowed_border=-1, 111 | pos_weight=-1, 112 | debug=False), 113 | refine=dict( 114 | assigner=dict(type='ATSSAssignerV2', topk=9, mask_center_sample=True), 115 | allowed_border=-1, 116 | pos_weight=-1, 117 | debug=False)), 118 | test_cfg = dict( 119 | nms_pre=1000, 120 | min_bbox_size=0, 121 | score_thr=0.05, 122 | nms=dict(type='nms', iou_threshold=0.6), 123 | max_per_img=100) 124 | ) -------------------------------------------------------------------------------- /detection/configs/_base_/models/reppointsv2_swin_bifpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) 3 | model = dict( 4 | type='RepPointsV2Detector', 5 | pretrained=None, 6 | backbone=dict( 7 | type='SwinTransformer', 8 | embed_dim=96, 9 | depths=[2, 2, 6, 2], 10 | num_heads=[3, 6, 12, 24], 11 | window_size=7, 12 | mlp_ratio=4., 13 | qkv_bias=True, 14 | qk_scale=None, 15 | drop_rate=0., 16 | attn_drop_rate=0., 17 | drop_path_rate=0.2, 18 | ape=False, 19 | patch_norm=True, 20 | out_indices=(1, 2, 3), 21 | use_checkpoint=False), 22 | neck=dict( 23 | type='BiFPN', 24 | in_channels=[192, 384, 768], 25 | out_channels=256, 26 | start_level=0, 27 | add_extra_convs=False, 28 | num_outs=5, 29 | no_norm_on_lateral=False, 30 | num_repeat=2, 31 | norm_cfg=norm_cfg 32 | ), 33 | bbox_head=dict( 34 | type='RepPointsV2Head', 35 | num_classes=80, 36 | in_channels=256, 37 | feat_channels=256, 38 | point_feat_channels=256, 39 | stacked_convs=3, 40 | shared_stacked_convs=1, 41 | first_kernel_size=3, 42 | kernel_size=1, 43 | corner_dim=64, 44 | num_points=9, 45 | gradient_mul=0.1, 46 | point_strides=[8, 16, 32, 64, 128], 47 | point_base_scale=4, 48 | norm_cfg=norm_cfg, 49 | loss_cls=dict( 50 | type='RPDQualityFocalLoss', 51 | use_sigmoid=True, 52 | beta=2.0, 53 | loss_weight=1.0), 54 | loss_bbox_init=dict(type='RPDGIoULoss', loss_weight=1.0), 55 | loss_bbox_refine=dict(type='RPDGIoULoss', loss_weight=2.0), 56 | loss_heatmap=dict( 57 | type='GaussianFocalLoss', 58 | alpha=2.0, 59 | gamma=4.0, 60 | loss_weight=0.25), 61 | loss_offset=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 62 | loss_sem=dict( 63 | type='SEPFocalLoss', 64 | gamma=2.0, 65 | alpha=0.25, 66 | loss_weight=0.1), 67 | transform_method='exact_minmax'), 68 | # training and testing settings 69 | train_cfg = dict( 70 | init=dict( 71 | assigner=dict(type='PointAssignerV2', scale=4, pos_num=1), 72 | allowed_border=-1, 73 | pos_weight=-1, 74 | debug=False), 75 | heatmap=dict( 76 | assigner=dict(type='PointHMAssigner', gaussian_bump=True, gaussian_iou=0.7), 77 | allowed_border=-1, 78 | pos_weight=-1, 79 | debug=False), 80 | refine=dict( 81 | assigner=dict(type='ATSSAssignerV2', topk=9), 82 | allowed_border=-1, 83 | pos_weight=-1, 84 | debug=False)), 85 | test_cfg = dict( 86 | nms_pre=1000, 87 | min_bbox_size=0, 88 | score_thr=0.05, 89 | nms=dict(type='nms', iou_threshold=0.6), 90 | max_per_img=100), 91 | ) -------------------------------------------------------------------------------- /detection/configs/_base_/models/retinanet_lsnet_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='lsnet_t', 7 | pretrained="",), 8 | neck=dict( 9 | type='FPN', 10 | in_channels=[256, 512, 1024, 2048], 11 | out_channels=256, 12 | start_level=1, 13 | add_extra_convs='on_input', 14 | num_outs=5), 15 | bbox_head=dict( 16 | type='RetinaHead', 17 | num_classes=80, 18 | in_channels=256, 19 | stacked_convs=4, 20 | feat_channels=256, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | octave_base_scale=4, 24 | scales_per_octave=3, 25 | ratios=[0.5, 1.0, 2.0], 26 | strides=[8, 16, 32, 64, 128]), 27 | bbox_coder=dict( 28 | type='DeltaXYWHBBoxCoder', 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[1.0, 1.0, 1.0, 1.0]), 31 | loss_cls=dict( 32 | type='FocalLoss', 33 | use_sigmoid=True, 34 | gamma=2.0, 35 | alpha=0.25, 36 | loss_weight=1.0), 37 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 38 | # training and testing settings 39 | train_cfg=dict( 40 | assigner=dict( 41 | type='MaxIoUAssigner', 42 | pos_iou_thr=0.5, 43 | neg_iou_thr=0.4, 44 | min_pos_iou=0, 45 | ignore_iof_thr=-1), 46 | allowed_border=-1, 47 | pos_weight=-1, 48 | debug=False), 49 | test_cfg=dict( 50 | nms_pre=1000, 51 | min_bbox_size=0, 52 | score_thr=0.05, 53 | nms=dict(type='nms', iou_threshold=0.5), 54 | max_per_img=100)) 55 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs='on_input', 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=80, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | anchor_generator=dict( 28 | type='AnchorGenerator', 29 | octave_base_scale=4, 30 | scales_per_octave=3, 31 | ratios=[0.5, 1.0, 2.0], 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict( 34 | type='DeltaXYWHBBoxCoder', 35 | target_means=[.0, .0, .0, .0], 36 | target_stds=[1.0, 1.0, 1.0, 1.0]), 37 | loss_cls=dict( 38 | type='FocalLoss', 39 | use_sigmoid=True, 40 | gamma=2.0, 41 | alpha=0.25, 42 | loss_weight=1.0), 43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 44 | # training and testing settings 45 | train_cfg=dict( 46 | assigner=dict( 47 | type='MaxIoUAssigner', 48 | pos_iou_thr=0.5, 49 | neg_iou_thr=0.4, 50 | min_pos_iou=0, 51 | ignore_iof_thr=-1), 52 | allowed_border=-1, 53 | pos_weight=-1, 54 | debug=False), 55 | test_cfg=dict( 56 | nms_pre=1000, 57 | min_bbox_size=0, 58 | score_thr=0.05, 59 | nms=dict(type='nms', iou_threshold=0.5), 60 | max_per_img=100)) 61 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://detectron2/resnet50_caffe', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | norm_cfg=dict(type='BN', requires_grad=False), 14 | norm_eval=True, 15 | style='caffe'), 16 | neck=None, 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict( 35 | rpn=dict( 36 | assigner=dict( 37 | type='MaxIoUAssigner', 38 | pos_iou_thr=0.7, 39 | neg_iou_thr=0.3, 40 | min_pos_iou=0.3, 41 | ignore_iof_thr=-1), 42 | sampler=dict( 43 | type='RandomSampler', 44 | num=256, 45 | pos_fraction=0.5, 46 | neg_pos_ub=-1, 47 | add_gt_as_proposals=False), 48 | allowed_border=0, 49 | pos_weight=-1, 50 | debug=False)), 51 | test_cfg=dict( 52 | rpn=dict( 53 | nms_pre=12000, 54 | max_per_img=2000, 55 | nms=dict(type='nms', iou_threshold=0.7), 56 | min_bbox_size=0))) 57 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | 3 | model = dict( 4 | type='RPN', 5 | pretrained='torchvision://resnet50', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='pytorch'), 15 | neck=dict( 16 | type='FPN', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | num_outs=5), 20 | rpn_head=dict( 21 | type='RPNHead', 22 | in_channels=256, 23 | feat_channels=256, 24 | anchor_generator=dict( 25 | type='AnchorGenerator', 26 | scales=[8], 27 | ratios=[0.5, 1.0, 2.0], 28 | strides=[4, 8, 16, 32, 64]), 29 | bbox_coder=dict( 30 | type='DeltaXYWHBBoxCoder', 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0]), 33 | loss_cls=dict( 34 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 35 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 36 | # model training and testing settings 37 | train_cfg=dict( 38 | rpn=dict( 39 | assigner=dict( 40 | type='MaxIoUAssigner', 41 | pos_iou_thr=0.7, 42 | neg_iou_thr=0.3, 43 | min_pos_iou=0.3, 44 | ignore_iof_thr=-1), 45 | sampler=dict( 46 | type='RandomSampler', 47 | num=256, 48 | pos_fraction=0.5, 49 | neg_pos_ub=-1, 50 | add_gt_as_proposals=False), 51 | allowed_border=0, 52 | pos_weight=-1, 53 | debug=False)), 54 | test_cfg=dict( 55 | rpn=dict( 56 | nms_pre=2000, 57 | max_per_img=1000, 58 | nms=dict(type='nms', iou_threshold=0.7), 59 | min_bbox_size=0))) 60 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | in_channels=(512, 1024, 512, 256, 256, 256), 19 | num_classes=80, 20 | anchor_generator=dict( 21 | type='SSDAnchorGenerator', 22 | scale_major=False, 23 | input_size=input_size, 24 | basesize_ratio_range=(0.15, 0.9), 25 | strides=[8, 16, 32, 64, 100, 300], 26 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 27 | bbox_coder=dict( 28 | type='DeltaXYWHBBoxCoder', 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[0.1, 0.1, 0.2, 0.2])), 31 | train_cfg=dict( 32 | assigner=dict( 33 | type='MaxIoUAssigner', 34 | pos_iou_thr=0.5, 35 | neg_iou_thr=0.5, 36 | min_pos_iou=0., 37 | ignore_iof_thr=-1, 38 | gt_max_assign_all=False), 39 | smoothl1_beta=1., 40 | allowed_border=-1, 41 | pos_weight=-1, 42 | neg_pos_ratio=3, 43 | debug=False), 44 | test_cfg=dict( 45 | nms_pre=1000, 46 | nms=dict(type='nms', iou_threshold=0.45), 47 | min_bbox_size=0, 48 | score_thr=0.02, 49 | max_per_img=200)) 50 | cudnn_benchmark = True 51 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=20) 12 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=24) 12 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn_lsnet_b_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './_base_/models/mask_rcnn_lsnet_fpn.py', 3 | './_base_/datasets/coco_instance.py', 4 | './_base_/schedules/schedule_1x.py', 5 | './_base_/default_runtime.py' 6 | ] 7 | 8 | model = dict( 9 | pretrained=None, 10 | backbone=dict( 11 | type='lsnet_b', 12 | pretrained="pretrain/lsnet_b.pth", 13 | frozen_stages=-1, 14 | ), 15 | neck=dict( 16 | type='LSNetFPN', 17 | in_channels=[128, 256, 384, 512], 18 | out_channels=256, 19 | start_level=0, 20 | num_outs=5, 21 | num_extra_trans_convs=1, 22 | )) 23 | 24 | # optimizer 25 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 26 | paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.), 27 | 'attention_bias_idxs': dict(decay_mult=0.), 28 | })) 29 | # optimizer_config = dict(grad_clip=None) 30 | # do not use mmdet version fp16 31 | # fp16 = None 32 | optimizer_config = dict(grad_clip=None) 33 | # learning policy 34 | lr_config = dict( 35 | policy='step', 36 | warmup='linear', 37 | warmup_iters=500, 38 | warmup_ratio=0.001, 39 | step=[8, 11]) 40 | total_epochs = 12 41 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn_lsnet_s_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './_base_/models/mask_rcnn_lsnet_fpn.py', 3 | './_base_/datasets/coco_instance.py', 4 | './_base_/schedules/schedule_1x.py', 5 | './_base_/default_runtime.py' 6 | ] 7 | 8 | model = dict( 9 | pretrained=None, 10 | backbone=dict( 11 | type='lsnet_s', 12 | pretrained="pretrain/lsnet_s.pth", 13 | frozen_stages=-1, 14 | ), 15 | neck=dict( 16 | type='LSNetFPN', 17 | in_channels=[96, 192, 320, 448], 18 | out_channels=256, 19 | start_level=0, 20 | num_outs=5, 21 | num_extra_trans_convs=1, 22 | )) 23 | 24 | # optimizer 25 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 26 | paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.), 27 | 'attention_bias_idxs': dict(decay_mult=0.), 28 | })) 29 | # optimizer_config = dict(grad_clip=None) 30 | # do not use mmdet version fp16 31 | # fp16 = None 32 | optimizer_config = dict(grad_clip=None) 33 | # learning policy 34 | lr_config = dict( 35 | policy='step', 36 | warmup='linear', 37 | warmup_iters=500, 38 | warmup_ratio=0.001, 39 | step=[8, 11]) 40 | total_epochs = 12 41 | -------------------------------------------------------------------------------- /detection/configs/mask_rcnn_lsnet_t_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './_base_/models/mask_rcnn_lsnet_fpn.py', 3 | './_base_/datasets/coco_instance.py', 4 | './_base_/schedules/schedule_1x.py', 5 | './_base_/default_runtime.py' 6 | ] 7 | 8 | model = dict( 9 | pretrained=None, 10 | backbone=dict( 11 | type='lsnet_t', 12 | pretrained="pretrain/lsnet_t.pth", 13 | frozen_stages=-1, 14 | ), 15 | neck=dict( 16 | type='LSNetFPN', 17 | in_channels=[64, 128, 256, 384], 18 | out_channels=256, 19 | start_level=0, 20 | num_outs=5, 21 | num_extra_trans_convs=1, 22 | )) 23 | 24 | # optimizer 25 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 26 | paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.), 27 | 'attention_bias_idxs': dict(decay_mult=0.), 28 | })) 29 | # optimizer_config = dict(grad_clip=None) 30 | # do not use mmdet version fp16 31 | # fp16 = None 32 | optimizer_config = dict(grad_clip=None) 33 | # learning policy 34 | lr_config = dict( 35 | policy='step', 36 | warmup='linear', 37 | warmup_iters=500, 38 | warmup_ratio=0.001, 39 | step=[8, 11]) 40 | total_epochs = 12 41 | -------------------------------------------------------------------------------- /detection/configs/retinanet_lsnet_b_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './_base_/models/retinanet_lsnet_fpn.py', 3 | './_base_/datasets/coco_detection.py', 4 | './_base_/schedules/schedule_1x.py', 5 | './_base_/default_runtime.py' 6 | ] 7 | 8 | model = dict( 9 | pretrained=None, 10 | backbone=dict( 11 | type='lsnet_b', 12 | pretrained="pretrain/lsnet_b.pth", 13 | frozen_stages=-1, 14 | ), 15 | neck=dict( 16 | type='LSNetFPN', 17 | in_channels=[128, 256, 384, 512], 18 | out_channels=256, 19 | start_level=0, 20 | num_outs=5, 21 | num_extra_trans_convs=0, 22 | )) 23 | 24 | # optimizer 25 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 26 | paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.), 27 | 'attention_bias_idxs': dict(decay_mult=0.), 28 | })) 29 | # optimizer_config = dict(grad_clip=None) 30 | # do not use mmdet version fp16 31 | # fp16 = None 32 | optimizer_config = dict(grad_clip=None) 33 | # learning policy 34 | lr_config = dict( 35 | policy='step', 36 | warmup='linear', 37 | warmup_iters=500, 38 | warmup_ratio=0.001, 39 | step=[8, 11]) 40 | total_epochs = 12 41 | -------------------------------------------------------------------------------- /detection/configs/retinanet_lsnet_s_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './_base_/models/retinanet_lsnet_fpn.py', 3 | './_base_/datasets/coco_detection.py', 4 | './_base_/schedules/schedule_1x.py', 5 | './_base_/default_runtime.py' 6 | ] 7 | 8 | model = dict( 9 | pretrained=None, 10 | backbone=dict( 11 | type='lsnet_s', 12 | pretrained="pretrain/lsnet_s.pth", 13 | frozen_stages=-1, 14 | ), 15 | neck=dict( 16 | type='LSNetFPN', 17 | in_channels=[96, 192, 320, 448], 18 | out_channels=256, 19 | start_level=0, 20 | num_outs=5, 21 | )) 22 | 23 | # optimizer 24 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 25 | paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.), 26 | 'attention_bias_idxs': dict(decay_mult=0.), 27 | })) 28 | # optimizer_config = dict(grad_clip=None) 29 | # do not use mmdet version fp16 30 | # fp16 = None 31 | optimizer_config = dict(grad_clip=None) 32 | # learning policy 33 | lr_config = dict( 34 | policy='step', 35 | warmup='linear', 36 | warmup_iters=500, 37 | warmup_ratio=0.001, 38 | step=[8, 11]) 39 | total_epochs = 12 40 | -------------------------------------------------------------------------------- /detection/configs/retinanet_lsnet_t_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './_base_/models/retinanet_lsnet_fpn.py', 3 | './_base_/datasets/coco_detection.py', 4 | './_base_/schedules/schedule_1x.py', 5 | './_base_/default_runtime.py' 6 | ] 7 | 8 | model = dict( 9 | pretrained=None, 10 | backbone=dict( 11 | type='lsnet_t', 12 | pretrained="pretrain/lsnet_t.pth", 13 | frozen_stages=-1, 14 | ), 15 | neck=dict( 16 | type='LSNetFPN', 17 | in_channels=[64, 128, 256, 384], 18 | out_channels=256, 19 | start_level=0, 20 | num_outs=5, 21 | )) 22 | 23 | # optimizer 24 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 25 | paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.), 26 | 'attention_bias_idxs': dict(decay_mult=0.), 27 | })) 28 | # optimizer_config = dict(grad_clip=None) 29 | # do not use mmdet version fp16 30 | # fp16 = None 31 | optimizer_config = dict(grad_clip=None) 32 | # learning policy 33 | lr_config = dict( 34 | policy='step', 35 | warmup='linear', 36 | warmup_iters=500, 37 | warmup_ratio=0.001, 38 | step=[8, 11]) 39 | total_epochs = 12 40 | -------------------------------------------------------------------------------- /detection/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29500} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/.":$PYTHONPATH \ 12 | NCCL_P2P_DISABLE=1 \ 13 | python -m torch.distributed.launch \ 14 | --nnodes=$NNODES \ 15 | --node_rank=$NODE_RANK \ 16 | --master_addr=$MASTER_ADDR \ 17 | --nproc_per_node=$GPUS \ 18 | --master_port=$PORT \ 19 | $(dirname "$0")/test.py \ 20 | $CONFIG \ 21 | $CHECKPOINT \ 22 | --launcher pytorch \ 23 | ${@:4} -------------------------------------------------------------------------------- /detection/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/.":$PYTHONPATH \ 11 | NCCL_P2P_DISABLE=1 \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/train.py \ 19 | $CONFIG \ 20 | --seed 0 \ 21 | --launcher pytorch ${@:3} -------------------------------------------------------------------------------- /detection/eval.sh: -------------------------------------------------------------------------------- 1 | # For RetinaNet 2 | bash ./dist_test.sh configs/retinanet_lsnet_t_fpn_1x_coco.py pretrain/lsnet_t_retinanet.pth 8 --eval bbox --out results.pkl 3 | 4 | # For Mask R-CNN 5 | bash ./dist_test.sh configs/mask_rcnn_lsnet_t_fpn_1x_coco.py pretrain/lsnet_t_maskrcnn.pth 8 --eval bbox segm --out results.pkl -------------------------------------------------------------------------------- /detection/mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .checkpoint import load_checkpoint, load_state_dict, _load_checkpoint 4 | 5 | __all__ = ['load_checkpoint', 'load_state_dict', '_load_checkpoint'] 6 | -------------------------------------------------------------------------------- /detection/mmcv_custom/runner/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | from .checkpoint import save_checkpoint 3 | from .epoch_based_runner import EpochBasedRunnerAmp 4 | 5 | 6 | __all__ = [ 7 | 'EpochBasedRunnerAmp', 'save_checkpoint' 8 | ] 9 | -------------------------------------------------------------------------------- /detection/mmcv_custom/runner/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import time 4 | from tempfile import TemporaryDirectory 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.parallel import is_module_wrapper 11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict 12 | 13 | 14 | def save_checkpoint(model, filename, optimizer=None, meta=None): 15 | """Save checkpoint to file. 16 | 17 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and 18 | ``optimizer``, ``amp``. By default ``meta`` will contain version 19 | and time info. 20 | 21 | Args: 22 | model (Module): Module whose params are to be saved. 23 | filename (str): Checkpoint filename. 24 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 25 | meta (dict, optional): Metadata to be saved in checkpoint. 26 | """ 27 | if meta is None: 28 | meta = {} 29 | elif not isinstance(meta, dict): 30 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}') 31 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) 32 | 33 | if is_module_wrapper(model): 34 | model = model.module 35 | 36 | if hasattr(model, 'CLASSES') and model.CLASSES is not None: 37 | # save class name to the meta 38 | meta.update(CLASSES=model.CLASSES) 39 | 40 | checkpoint = { 41 | 'meta': meta, 42 | 'state_dict': weights_to_cpu(get_state_dict(model)) 43 | } 44 | # save optimizer state dict in the checkpoint 45 | if isinstance(optimizer, Optimizer): 46 | checkpoint['optimizer'] = optimizer.state_dict() 47 | elif isinstance(optimizer, dict): 48 | checkpoint['optimizer'] = {} 49 | for name, optim in optimizer.items(): 50 | checkpoint['optimizer'][name] = optim.state_dict() 51 | 52 | # save amp state dict in the checkpoint 53 | checkpoint['amp'] = apex.amp.state_dict() 54 | 55 | if filename.startswith('pavi://'): 56 | try: 57 | from pavi import modelcloud 58 | from pavi.exception import NodeNotFoundError 59 | except ImportError: 60 | raise ImportError( 61 | 'Please install pavi to load checkpoint from modelcloud.') 62 | model_path = filename[7:] 63 | root = modelcloud.Folder() 64 | model_dir, model_name = osp.split(model_path) 65 | try: 66 | model = modelcloud.get(model_dir) 67 | except NodeNotFoundError: 68 | model = root.create_training_model(model_dir) 69 | with TemporaryDirectory() as tmp_dir: 70 | checkpoint_file = osp.join(tmp_dir, model_name) 71 | with open(checkpoint_file, 'wb') as f: 72 | torch.save(checkpoint, f) 73 | f.flush() 74 | model.create_file(checkpoint_file, name=model_name) 75 | else: 76 | mmcv.mkdir_or_exist(osp.dirname(filename)) 77 | # immediately flush buffer 78 | with open(filename, 'wb') as f: 79 | torch.save(checkpoint, f) 80 | f.flush() 81 | -------------------------------------------------------------------------------- /detection/mmcv_custom/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import platform 4 | import shutil 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.runner import RUNNERS, EpochBasedRunner 11 | from .checkpoint import save_checkpoint 12 | 13 | 14 | @RUNNERS.register_module() 15 | class EpochBasedRunnerAmp(EpochBasedRunner): 16 | """Epoch-based Runner with AMP support. 17 | 18 | This runner train models epoch by epoch. 19 | """ 20 | 21 | def save_checkpoint(self, 22 | out_dir, 23 | filename_tmpl='epoch_{}.pth', 24 | save_optimizer=True, 25 | meta=None, 26 | create_symlink=True): 27 | """Save the checkpoint. 28 | 29 | Args: 30 | out_dir (str): The directory that checkpoints are saved. 31 | filename_tmpl (str, optional): The checkpoint filename template, 32 | which contains a placeholder for the epoch number. 33 | Defaults to 'epoch_{}.pth'. 34 | save_optimizer (bool, optional): Whether to save the optimizer to 35 | the checkpoint. Defaults to True. 36 | meta (dict, optional): The meta information to be saved in the 37 | checkpoint. Defaults to None. 38 | create_symlink (bool, optional): Whether to create a symlink 39 | "latest.pth" to point to the latest checkpoint. 40 | Defaults to True. 41 | """ 42 | if meta is None: 43 | meta = dict(epoch=self.epoch + 1, iter=self.iter) 44 | elif isinstance(meta, dict): 45 | meta.update(epoch=self.epoch + 1, iter=self.iter) 46 | else: 47 | raise TypeError( 48 | f'meta should be a dict or None, but got {type(meta)}') 49 | if self.meta is not None: 50 | meta.update(self.meta) 51 | 52 | filename = filename_tmpl.format(self.epoch + 1) 53 | filepath = osp.join(out_dir, filename) 54 | optimizer = self.optimizer if save_optimizer else None 55 | save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) 56 | # in some environments, `os.symlink` is not supported, you may need to 57 | # set `create_symlink` to False 58 | if create_symlink: 59 | dst_file = osp.join(out_dir, 'latest.pth') 60 | if platform.system() != 'Windows': 61 | mmcv.symlink(filename, dst_file) 62 | else: 63 | shutil.copy(filepath, dst_file) 64 | 65 | def resume(self, 66 | checkpoint, 67 | resume_optimizer=True, 68 | map_location='default'): 69 | if map_location == 'default': 70 | if torch.cuda.is_available(): 71 | device_id = torch.cuda.current_device() 72 | checkpoint = self.load_checkpoint( 73 | checkpoint, 74 | map_location=lambda storage, loc: storage.cuda(device_id)) 75 | else: 76 | checkpoint = self.load_checkpoint(checkpoint) 77 | else: 78 | checkpoint = self.load_checkpoint( 79 | checkpoint, map_location=map_location) 80 | 81 | self._epoch = checkpoint['meta']['epoch'] 82 | self._iter = checkpoint['meta']['iter'] 83 | if 'optimizer' in checkpoint and resume_optimizer: 84 | if isinstance(self.optimizer, Optimizer): 85 | self.optimizer.load_state_dict(checkpoint['optimizer']) 86 | elif isinstance(self.optimizer, dict): 87 | for k in self.optimizer.keys(): 88 | self.optimizer[k].load_state_dict( 89 | checkpoint['optimizer'][k]) 90 | else: 91 | raise TypeError( 92 | 'Optimizer should be dict or torch.optim.Optimizer ' 93 | f'but got {type(self.optimizer)}') 94 | 95 | if 'amp' in checkpoint: 96 | apex.amp.load_state_dict(checkpoint['amp']) 97 | self.logger.info('load amp state dict') 98 | 99 | self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) 100 | -------------------------------------------------------------------------------- /detection/mmcv_custom/runner/optimizer.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import OptimizerHook, HOOKS 2 | 3 | @HOOKS.register_module() 4 | class DistOptimizerHook(OptimizerHook): 5 | """Optimizer hook for distributed training.""" 6 | 7 | def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False): 8 | self.grad_clip = grad_clip 9 | self.coalesce = coalesce 10 | self.bucket_size_mb = bucket_size_mb 11 | self.update_interval = update_interval 12 | self.use_fp16 = use_fp16 13 | 14 | def before_run(self, runner): 15 | runner.optimizer.zero_grad() 16 | 17 | def after_train_iter(self, runner): 18 | runner.outputs['loss'] /= self.update_interval 19 | if self.use_fp16: 20 | with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss: 21 | scaled_loss.backward() 22 | else: 23 | runner.outputs['loss'].backward() 24 | if self.every_n_iters(runner, self.update_interval): 25 | if self.grad_clip is not None: 26 | self.clip_grads(runner.model.parameters()) 27 | runner.optimizer.step() 28 | runner.optimizer.zero_grad() -------------------------------------------------------------------------------- /detection/model/ska.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | import triton 4 | import triton.language as tl 5 | from torch.amp import custom_fwd, custom_bwd 6 | import math 7 | 8 | def _grid(numel: int, bs: int) -> tuple: 9 | return (triton.cdiv(numel, bs),) 10 | 11 | @triton.jit 12 | def _idx(i, n: int, c: int, h: int, w: int): 13 | ni = i // (c * h * w) 14 | ci = (i // (h * w)) % c 15 | hi = (i // w) % h 16 | wi = i % w 17 | m = i < (n * c * h * w) 18 | return ni, ci, hi, wi, m 19 | 20 | @triton.jit 21 | def ska_fwd( 22 | x_ptr, w_ptr, o_ptr, 23 | n, ic, h, w, ks, pad, wc, 24 | BS: tl.constexpr, 25 | CT: tl.constexpr, AT: tl.constexpr 26 | ): 27 | pid = tl.program_id(0) 28 | start = pid * BS 29 | offs = start + tl.arange(0, BS) 30 | 31 | ni, ci, hi, wi, m = _idx(offs, n, ic, h, w) 32 | val = tl.zeros((BS,), dtype=AT) 33 | 34 | for kh in range(ks): 35 | hin = hi - pad + kh 36 | hb = (hin >= 0) & (hin < h) 37 | for kw in range(ks): 38 | win = wi - pad + kw 39 | b = hb & (win >= 0) & (win < w) 40 | 41 | x_off = ((ni * ic + ci) * h + hin) * w + win 42 | w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi 43 | 44 | x_val = tl.load(x_ptr + x_off, mask=m & b, other=0.0).to(CT) 45 | w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT) 46 | val += tl.where(b & m, x_val * w_val, 0.0).to(AT) 47 | 48 | tl.store(o_ptr + offs, val.to(CT), mask=m) 49 | 50 | @triton.jit 51 | def ska_bwd_x( 52 | go_ptr, w_ptr, gi_ptr, 53 | n, ic, h, w, ks, pad, wc, 54 | BS: tl.constexpr, 55 | CT: tl.constexpr, AT: tl.constexpr 56 | ): 57 | pid = tl.program_id(0) 58 | start = pid * BS 59 | offs = start + tl.arange(0, BS) 60 | 61 | ni, ci, hi, wi, m = _idx(offs, n, ic, h, w) 62 | val = tl.zeros((BS,), dtype=AT) 63 | 64 | for kh in range(ks): 65 | ho = hi + pad - kh 66 | hb = (ho >= 0) & (ho < h) 67 | for kw in range(ks): 68 | wo = wi + pad - kw 69 | b = hb & (wo >= 0) & (wo < w) 70 | 71 | go_off = ((ni * ic + ci) * h + ho) * w + wo 72 | w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + ho * w + wo 73 | 74 | go_val = tl.load(go_ptr + go_off, mask=m & b, other=0.0).to(CT) 75 | w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT) 76 | val += tl.where(b & m, go_val * w_val, 0.0).to(AT) 77 | 78 | tl.store(gi_ptr + offs, val.to(CT), mask=m) 79 | 80 | @triton.jit 81 | def ska_bwd_w( 82 | go_ptr, x_ptr, gw_ptr, 83 | n, wc, h, w, ic, ks, pad, 84 | BS: tl.constexpr, 85 | CT: tl.constexpr, AT: tl.constexpr 86 | ): 87 | pid = tl.program_id(0) 88 | start = pid * BS 89 | offs = start + tl.arange(0, BS) 90 | 91 | ni, ci, hi, wi, m = _idx(offs, n, wc, h, w) 92 | 93 | for kh in range(ks): 94 | hin = hi - pad + kh 95 | hb = (hin >= 0) & (hin < h) 96 | for kw in range(ks): 97 | win = wi - pad + kw 98 | b = hb & (win >= 0) & (win < w) 99 | w_off = ((ni * wc + ci) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi 100 | 101 | val = tl.zeros((BS,), dtype=AT) 102 | steps = (ic - ci + wc - 1) // wc 103 | for s in range(tl.max(steps, axis=0)): 104 | cc = ci + s * wc 105 | cm = (cc < ic) & m & b 106 | 107 | x_off = ((ni * ic + cc) * h + hin) * w + win 108 | go_off = ((ni * ic + cc) * h + hi) * w + wi 109 | 110 | x_val = tl.load(x_ptr + x_off, mask=cm, other=0.0).to(CT) 111 | go_val = tl.load(go_ptr + go_off, mask=cm, other=0.0).to(CT) 112 | val += tl.where(cm, x_val * go_val, 0.0).to(AT) 113 | 114 | tl.store(gw_ptr + w_off, val.to(CT), mask=m) 115 | 116 | class SkaFn(Function): 117 | @staticmethod 118 | @custom_fwd(device_type='cuda') 119 | def forward(ctx, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor: 120 | ks = int(math.sqrt(w.shape[2])) 121 | pad = (ks - 1) // 2 122 | ctx.ks, ctx.pad = ks, pad 123 | n, ic, h, width = x.shape 124 | wc = w.shape[1] 125 | o = torch.empty(n, ic, h, width, device=x.device, dtype=x.dtype) 126 | numel = o.numel() 127 | 128 | x = x.contiguous() 129 | w = w.contiguous() 130 | 131 | grid = lambda meta: _grid(numel, meta["BS"]) 132 | 133 | ct = tl.float16 if x.dtype == torch.float16 else (tl.float32 if x.dtype == torch.float32 else tl.float64) 134 | at = tl.float32 if x.dtype == torch.float16 else ct 135 | 136 | ska_fwd[grid](x, w, o, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at) 137 | 138 | ctx.save_for_backward(x, w) 139 | ctx.ct, ctx.at = ct, at 140 | return o 141 | 142 | @staticmethod 143 | @custom_bwd(device_type='cuda') 144 | def backward(ctx, go: torch.Tensor) -> tuple: 145 | ks, pad = ctx.ks, ctx.pad 146 | x, w = ctx.saved_tensors 147 | n, ic, h, width = x.shape 148 | wc = w.shape[1] 149 | 150 | go = go.contiguous() 151 | gx = gw = None 152 | ct, at = ctx.ct, ctx.at 153 | 154 | if ctx.needs_input_grad[0]: 155 | gx = torch.empty_like(x) 156 | numel = gx.numel() 157 | ska_bwd_x[lambda meta: _grid(numel, meta["BS"])](go, w, gx, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at) 158 | 159 | if ctx.needs_input_grad[1]: 160 | gw = torch.empty_like(w) 161 | numel = gw.numel() // w.shape[2] 162 | ska_bwd_w[lambda meta: _grid(numel, meta["BS"])](go, x, gw, n, wc, h, width, ic, ks, pad, BS=1024, CT=ct, AT=at) 163 | 164 | return gx, gw, None, None 165 | 166 | class SKA(torch.nn.Module): 167 | def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor: 168 | return SkaFn.apply(x, w) # type: ignore 169 | -------------------------------------------------------------------------------- /detection/train.sh: -------------------------------------------------------------------------------- 1 | # For RetinaNet 2 | bash ./dist_train.sh configs/retinanet_lsnet_t_fpn_1x_coco.py 8 3 | 4 | # For Mask R-CNN 5 | bash ./dist_train.sh configs/mask_rcnn_lsnet_t_fpn_1x_coco.py 8 6 | -------------------------------------------------------------------------------- /engine.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | from typing import Iterable, Optional 4 | 5 | import torch 6 | 7 | from timm.data import Mixup 8 | from timm.utils import accuracy, ModelEma 9 | 10 | from losses import DistillationLoss 11 | import utils 12 | 13 | def set_bn_state(model): 14 | for m in model.modules(): 15 | if isinstance(m, torch.nn.modules.batchnorm._BatchNorm): 16 | m.eval() 17 | 18 | def train_one_epoch(model: torch.nn.Module, criterion: DistillationLoss, 19 | data_loader: Iterable, optimizer: torch.optim.Optimizer, 20 | device: torch.device, epoch: int, loss_scaler, 21 | clip_grad: float = 0, 22 | clip_mode: str = 'norm', 23 | model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, 24 | set_training_mode=True, 25 | set_bn_eval=False,): 26 | model.train(set_training_mode) 27 | if set_bn_eval: 28 | set_bn_state(model) 29 | metric_logger = utils.MetricLogger(delimiter=" ") 30 | metric_logger.add_meter('lr', utils.SmoothedValue( 31 | window_size=1, fmt='{value:.6f}')) 32 | header = 'Epoch: [{}]'.format(epoch) 33 | print_freq = 100 34 | 35 | for samples, targets in metric_logger.log_every( 36 | data_loader, print_freq, header): 37 | samples = samples.to(device, non_blocking=True) 38 | targets = targets.to(device, non_blocking=True) 39 | 40 | if mixup_fn is not None: 41 | samples, targets = mixup_fn(samples, targets) 42 | 43 | with torch.amp.autocast(enabled=False, device_type="cuda"): 44 | outputs = model(samples) 45 | loss = criterion(samples, outputs, targets) 46 | 47 | loss_value = loss.item() 48 | 49 | if not math.isfinite(loss_value): 50 | print("Loss is {}, stopping training".format(loss_value)) 51 | sys.exit(1) 52 | 53 | optimizer.zero_grad() 54 | 55 | # this attribute is added by timm on one optimizer (adahessian) 56 | is_second_order = hasattr( 57 | optimizer, 'is_second_order') and optimizer.is_second_order 58 | loss_scaler(loss, optimizer, clip_grad=clip_grad, clip_mode=clip_mode, 59 | parameters=model.parameters(), create_graph=is_second_order) 60 | 61 | torch.cuda.synchronize() 62 | if model_ema is not None: 63 | model_ema.update(model) 64 | 65 | metric_logger.update(loss=loss_value) 66 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 67 | # gather the stats from all processes 68 | metric_logger.synchronize_between_processes() 69 | print("Averaged stats:", metric_logger) 70 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 71 | 72 | 73 | @torch.no_grad() 74 | def evaluate(data_loader, model, device): 75 | criterion = torch.nn.CrossEntropyLoss() 76 | 77 | metric_logger = utils.MetricLogger(delimiter=" ") 78 | header = 'Test:' 79 | 80 | # switch to evaluation mode 81 | model.eval() 82 | 83 | for images, target in metric_logger.log_every(data_loader, 10, header): 84 | images = images.to(device, non_blocking=True) 85 | target = target.to(device, non_blocking=True) 86 | 87 | # compute output 88 | with torch.amp.autocast(enabled=False, device_type="cuda"): 89 | output = model(images) 90 | loss = criterion(output, target) 91 | 92 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 93 | 94 | batch_size = images.shape[0] 95 | metric_logger.update(loss=loss.item()) 96 | metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) 97 | metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) 98 | # gather the stats from all processes 99 | metric_logger.synchronize_between_processes() 100 | print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' 101 | .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) 102 | 103 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 104 | -------------------------------------------------------------------------------- /eval.sh: -------------------------------------------------------------------------------- 1 | python main.py --eval --model lsnet_b --resume ./pretrain/lsnet_b_distill.pth --data-path ~/imagenet --distillation-type hard -------------------------------------------------------------------------------- /eval_robust.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | set -x 3 | 4 | MODEL=$1 5 | CKPT=$2 6 | INPUT=$3 7 | 8 | export HF_ENDPOINT=https://hf-mirror.com 9 | 10 | python main.py --eval --model ${MODEL} --resume ${CKPT} --data-path ~/imagenet \ 11 | --inc_path ~/datasets/OpenDataLab___ImageNet-C/raw \ 12 | --insk_path ~/datasets/OpenDataLab___ImageNet-Sketch/raw/sketch \ 13 | --ina_path ~/datasets/OpenDataLab___ImageNet-A/raw/imagenet-a \ 14 | --inr_path ~/datasets/OpenDataLab___ImageNet-R/raw/imagenet-r \ 15 | --batch-size 512 \ 16 | --input-size ${INPUT} -------------------------------------------------------------------------------- /flops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from timm import create_model 3 | import model.build 4 | import utils 5 | from fvcore.nn import FlopCountAnalysis 6 | from model.ska import SKA 7 | import torch.nn.functional as F 8 | from argparse import ArgumentParser 9 | 10 | def forward_flops(self, x: torch.Tensor, w: torch.Tensor): 11 | w = w.squeeze(0)[..., 0, 0].reshape(-1, 1, 3, 3) 12 | w = w.repeat(8, 1, 1, 1) 13 | return F.conv2d(x, w, None, 1, 1, 1, groups=w.shape[0]) 14 | 15 | SKA.forward = forward_flops 16 | 17 | torch.autograd.set_grad_enabled(False) 18 | 19 | if __name__ == "__main__": 20 | parser = ArgumentParser() 21 | parser.add_argument("--model", default="lsnet_t", type=str) 22 | parser.add_argument("--resolution", default=224, type=int) 23 | 24 | args = parser.parse_args() 25 | model = args.model 26 | resolution = args.resolution 27 | 28 | inputs = torch.randn(1, 3, resolution, 29 | resolution) 30 | model = create_model(model, num_classes=1000) 31 | utils.replace_batchnorm(model) 32 | model.cuda() 33 | n_parameters = sum(p.numel() 34 | for p in model.parameters() if p.requires_grad) 35 | print('Number of params:', n_parameters / 1e6) 36 | flops = FlopCountAnalysis(model, inputs.cuda()).unsupported_ops_warnings(False).uncalled_modules_warnings(False) 37 | print("Flops: ", flops.total() / 1e9) 38 | -------------------------------------------------------------------------------- /losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | 5 | class DistillationLoss(torch.nn.Module): 6 | """ 7 | This module wraps a standard criterion and adds an extra knowledge distillation loss by 8 | taking a teacher model prediction and using it as additional supervision. 9 | """ 10 | 11 | def __init__(self, base_criterion: torch.nn.Module, teacher_model: torch.nn.Module, 12 | distillation_type: str, alpha: float, tau: float): 13 | super().__init__() 14 | self.base_criterion = base_criterion 15 | self.teacher_model = teacher_model 16 | assert distillation_type in ['none', 'soft', 'hard'] 17 | self.distillation_type = distillation_type 18 | self.alpha = alpha 19 | self.tau = tau 20 | 21 | def forward(self, inputs, outputs, labels): 22 | """ 23 | Args: 24 | inputs: The original inputs that are feed to the teacher model 25 | outputs: the outputs of the model to be trained. It is expected to be 26 | either a Tensor, or a Tuple[Tensor, Tensor], with the original output 27 | in the first position and the distillation predictions as the second output 28 | labels: the labels for the base criterion 29 | """ 30 | outputs_kd = None 31 | if not isinstance(outputs, torch.Tensor): 32 | # assume that the model outputs a tuple of [outputs, outputs_kd] 33 | outputs, outputs_kd = outputs 34 | base_loss = self.base_criterion(outputs, labels) 35 | if self.distillation_type == 'none': 36 | return base_loss 37 | 38 | if outputs_kd is None: 39 | raise ValueError("When knowledge distillation is enabled, the model is " 40 | "expected to return a Tuple[Tensor, Tensor] with the output of the " 41 | "class_token and the dist_token") 42 | # don't backprop throught the teacher 43 | with torch.no_grad(): 44 | teacher_outputs = self.teacher_model(inputs) 45 | 46 | if self.distillation_type == 'soft': 47 | T = self.tau 48 | # taken from https://github.com/peterliht/knowledge-distillation-pytorch/blob/master/model/net.py#L100 49 | # with slight modifications 50 | distillation_loss = F.kl_div( 51 | F.log_softmax(outputs_kd / T, dim=1), 52 | F.log_softmax(teacher_outputs / T, dim=1), 53 | reduction='sum', 54 | log_target=True 55 | ) * (T * T) / outputs_kd.numel() 56 | elif self.distillation_type == 'hard': 57 | distillation_loss = F.cross_entropy( 58 | outputs_kd, teacher_outputs.argmax(dim=1)) 59 | 60 | loss = base_loss * (1 - self.alpha) + distillation_loss * self.alpha 61 | return loss 62 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THU-MIG/lsnet/cbe737c92b7c43ecf02d08545a07f03f1010177c/model/__init__.py -------------------------------------------------------------------------------- /model/build.py: -------------------------------------------------------------------------------- 1 | import model.lsnet -------------------------------------------------------------------------------- /model/ska.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | import triton 4 | import triton.language as tl 5 | from torch.amp import custom_fwd, custom_bwd 6 | import math 7 | 8 | def _grid(numel: int, bs: int) -> tuple: 9 | return (triton.cdiv(numel, bs),) 10 | 11 | @triton.jit 12 | def _idx(i, n: int, c: int, h: int, w: int): 13 | ni = i // (c * h * w) 14 | ci = (i // (h * w)) % c 15 | hi = (i // w) % h 16 | wi = i % w 17 | m = i < (n * c * h * w) 18 | return ni, ci, hi, wi, m 19 | 20 | @triton.jit 21 | def ska_fwd( 22 | x_ptr, w_ptr, o_ptr, 23 | n, ic, h, w, ks, pad, wc, 24 | BS: tl.constexpr, 25 | CT: tl.constexpr, AT: tl.constexpr 26 | ): 27 | pid = tl.program_id(0) 28 | start = pid * BS 29 | offs = start + tl.arange(0, BS) 30 | 31 | ni, ci, hi, wi, m = _idx(offs, n, ic, h, w) 32 | val = tl.zeros((BS,), dtype=AT) 33 | 34 | for kh in range(ks): 35 | hin = hi - pad + kh 36 | hb = (hin >= 0) & (hin < h) 37 | for kw in range(ks): 38 | win = wi - pad + kw 39 | b = hb & (win >= 0) & (win < w) 40 | 41 | x_off = ((ni * ic + ci) * h + hin) * w + win 42 | w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi 43 | 44 | x_val = tl.load(x_ptr + x_off, mask=m & b, other=0.0).to(CT) 45 | w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT) 46 | val += tl.where(b & m, x_val * w_val, 0.0).to(AT) 47 | 48 | tl.store(o_ptr + offs, val.to(CT), mask=m) 49 | 50 | @triton.jit 51 | def ska_bwd_x( 52 | go_ptr, w_ptr, gi_ptr, 53 | n, ic, h, w, ks, pad, wc, 54 | BS: tl.constexpr, 55 | CT: tl.constexpr, AT: tl.constexpr 56 | ): 57 | pid = tl.program_id(0) 58 | start = pid * BS 59 | offs = start + tl.arange(0, BS) 60 | 61 | ni, ci, hi, wi, m = _idx(offs, n, ic, h, w) 62 | val = tl.zeros((BS,), dtype=AT) 63 | 64 | for kh in range(ks): 65 | ho = hi + pad - kh 66 | hb = (ho >= 0) & (ho < h) 67 | for kw in range(ks): 68 | wo = wi + pad - kw 69 | b = hb & (wo >= 0) & (wo < w) 70 | 71 | go_off = ((ni * ic + ci) * h + ho) * w + wo 72 | w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + ho * w + wo 73 | 74 | go_val = tl.load(go_ptr + go_off, mask=m & b, other=0.0).to(CT) 75 | w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT) 76 | val += tl.where(b & m, go_val * w_val, 0.0).to(AT) 77 | 78 | tl.store(gi_ptr + offs, val.to(CT), mask=m) 79 | 80 | @triton.jit 81 | def ska_bwd_w( 82 | go_ptr, x_ptr, gw_ptr, 83 | n, wc, h, w, ic, ks, pad, 84 | BS: tl.constexpr, 85 | CT: tl.constexpr, AT: tl.constexpr 86 | ): 87 | pid = tl.program_id(0) 88 | start = pid * BS 89 | offs = start + tl.arange(0, BS) 90 | 91 | ni, ci, hi, wi, m = _idx(offs, n, wc, h, w) 92 | 93 | for kh in range(ks): 94 | hin = hi - pad + kh 95 | hb = (hin >= 0) & (hin < h) 96 | for kw in range(ks): 97 | win = wi - pad + kw 98 | b = hb & (win >= 0) & (win < w) 99 | w_off = ((ni * wc + ci) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi 100 | 101 | val = tl.zeros((BS,), dtype=AT) 102 | steps = (ic - ci + wc - 1) // wc 103 | for s in range(tl.max(steps, axis=0)): 104 | cc = ci + s * wc 105 | cm = (cc < ic) & m & b 106 | 107 | x_off = ((ni * ic + cc) * h + hin) * w + win 108 | go_off = ((ni * ic + cc) * h + hi) * w + wi 109 | 110 | x_val = tl.load(x_ptr + x_off, mask=cm, other=0.0).to(CT) 111 | go_val = tl.load(go_ptr + go_off, mask=cm, other=0.0).to(CT) 112 | val += tl.where(cm, x_val * go_val, 0.0).to(AT) 113 | 114 | tl.store(gw_ptr + w_off, val.to(CT), mask=m) 115 | 116 | class SkaFn(Function): 117 | @staticmethod 118 | @custom_fwd(device_type='cuda') 119 | def forward(ctx, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor: 120 | ks = int(math.sqrt(w.shape[2])) 121 | pad = (ks - 1) // 2 122 | ctx.ks, ctx.pad = ks, pad 123 | n, ic, h, width = x.shape 124 | wc = w.shape[1] 125 | o = torch.empty(n, ic, h, width, device=x.device, dtype=x.dtype) 126 | numel = o.numel() 127 | 128 | x = x.contiguous() 129 | w = w.contiguous() 130 | 131 | grid = lambda meta: _grid(numel, meta["BS"]) 132 | 133 | ct = tl.float16 if x.dtype == torch.float16 else (tl.float32 if x.dtype == torch.float32 else tl.float64) 134 | at = tl.float32 if x.dtype == torch.float16 else ct 135 | 136 | ska_fwd[grid](x, w, o, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at) 137 | 138 | ctx.save_for_backward(x, w) 139 | ctx.ct, ctx.at = ct, at 140 | return o 141 | 142 | @staticmethod 143 | @custom_bwd(device_type='cuda') 144 | def backward(ctx, go: torch.Tensor) -> tuple: 145 | ks, pad = ctx.ks, ctx.pad 146 | x, w = ctx.saved_tensors 147 | n, ic, h, width = x.shape 148 | wc = w.shape[1] 149 | 150 | go = go.contiguous() 151 | gx = gw = None 152 | ct, at = ctx.ct, ctx.at 153 | 154 | if ctx.needs_input_grad[0]: 155 | gx = torch.empty_like(x) 156 | numel = gx.numel() 157 | ska_bwd_x[lambda meta: _grid(numel, meta["BS"])](go, w, gx, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at) 158 | 159 | if ctx.needs_input_grad[1]: 160 | gw = torch.empty_like(w) 161 | numel = gw.numel() // w.shape[2] 162 | ska_bwd_w[lambda meta: _grid(numel, meta["BS"])](go, x, gw, n, wc, h, width, ic, ks, pad, BS=1024, CT=ct, AT=at) 163 | 164 | return gx, gw, None, None 165 | 166 | class SKA(torch.nn.Module): 167 | def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor: 168 | return SkaFn.apply(x, w) # type: ignore 169 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | timm==0.5.4 2 | einops==0.4.1 3 | fvcore 4 | easydict 5 | matplotlib 6 | numpy==1.21.0 7 | yacs 8 | scikit-image==0.19.3 9 | pillow 10 | wandb 11 | torch==2.4.1 -------------------------------------------------------------------------------- /robust_utils.py: -------------------------------------------------------------------------------- 1 | data_loaders_names = { 2 | 'Brightness': 'brightness', 3 | 'Contrast': 'contrast', 4 | 'Defocus Blur': 'defocus_blur', 5 | 'Elastic Transform': 'elastic_transform', 6 | 'Fog': 'fog', 7 | 'Frost': 'frost', 8 | 'Gaussian Noise': 'gaussian_noise', 9 | 'Glass Blur': 'glass_blur', 10 | 'Impulse Noise': 'impulse_noise', 11 | 'JPEG Compression': 'jpeg_compression', 12 | 'Motion Blur': 'motion_blur', 13 | 'Pixelate': 'pixelate', 14 | 'Shot Noise': 'shot_noise', 15 | 'Snow': 'snow', 16 | 'Zoom Blur': 'zoom_blur' 17 | } 18 | 19 | def get_ce_alexnet(): 20 | """Returns Corruption Error values for AlexNet""" 21 | 22 | ce_alexnet = dict() 23 | ce_alexnet['Gaussian Noise'] = 0.886428 24 | ce_alexnet['Shot Noise'] = 0.894468 25 | ce_alexnet['Impulse Noise'] = 0.922640 26 | ce_alexnet['Defocus Blur'] = 0.819880 27 | ce_alexnet['Glass Blur'] = 0.826268 28 | ce_alexnet['Motion Blur'] = 0.785948 29 | ce_alexnet['Zoom Blur'] = 0.798360 30 | ce_alexnet['Snow'] = 0.866816 31 | ce_alexnet['Frost'] = 0.826572 32 | ce_alexnet['Fog'] = 0.819324 33 | ce_alexnet['Brightness'] = 0.564592 34 | ce_alexnet['Contrast'] = 0.853204 35 | ce_alexnet['Elastic Transform'] = 0.646056 36 | ce_alexnet['Pixelate'] = 0.717840 37 | ce_alexnet['JPEG Compression'] = 0.606500 38 | 39 | return ce_alexnet 40 | 41 | def get_mce_from_accuracy(accuracy, error_alexnet): 42 | """Computes mean Corruption Error from accuracy""" 43 | error = 100. - accuracy 44 | ce = error / (error_alexnet * 100.) 45 | 46 | return ce 47 | -------------------------------------------------------------------------------- /segmentation/.gitignore: -------------------------------------------------------------------------------- 1 | pretrain 2 | work_dirs 3 | data 4 | results -------------------------------------------------------------------------------- /segmentation/README.md: -------------------------------------------------------------------------------- 1 | # Semantic Segmentation 2 | 3 | Segmentation on ADE20K is implemented based on [MMSegmentation](https://github.com/open-mmlab/mmsegmentation). 4 | 5 | ## Models 6 | | Model | mIoU | Log | 7 | |:-:|:-:|:-:| 8 | | [LSNet-T](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_t_semfpn.pth) | 40.1 | [lsnet_t_semfpn.json](./logs/lsnet_t_semfpn.json) | 9 | | [LSNet-S](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_s_semfpn.pth) | 41.6 | [lsnet_s_semfpn.json](./logs/lsnet_s_semfpn.json) | 10 | | [LSNet-B](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_b_semfpn.pth) | 43.1 | [lsnet_b_semfpn.json](./logs/lsnet_b_semfpn.json) | 11 | 12 | ## Requirements 13 | ```bash 14 | pip install mmsegmentation==0.30.0 15 | ``` 16 | 17 | ## Data preparation 18 | 19 | Please prepare ADE20K dataset following [insructions in MMSeg](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets). 20 | The data should appear as: 21 | ``` 22 | ├── segmentation 23 | │ ├── data 24 | │ │ ├── ade 25 | │ │ │ ├── ADEChallengeData2016 26 | │ │ │ │ ├── annotations 27 | │ │ │ │ │ ├── training 28 | │ │ │ │ │ ├── validation 29 | │ │ │ │ ├── images 30 | │ │ │ │ │ ├── training 31 | │ │ │ │ │ ├── validation 32 | 33 | ``` 34 | 35 | ## Testing 36 | ```bash 37 | ./tools/dist_test.sh configs/sem_fpn/fpn_lsnet_b_ade20k_40k.py pretrain/lsnet_b_semfpn.pth 8 --eval mIoU 38 | ``` 39 | 40 | ## Training 41 | Download ImageNet-1K pretrained weights into `./pretrain` 42 | ```bash 43 | ./tools/dist_train.sh configs/sem_fpn/fpn_lsnet_b_ade20k_40k.py 8 --seed 0 --deterministic 44 | ``` 45 | 46 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = 'data/ade/ADEChallengeData2016' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='AlignResize', keep_ratio=True, size_divisor=32), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type='RepeatDataset', 39 | times=50, 40 | dataset=dict( 41 | type=dataset_type, 42 | data_root=data_root, 43 | img_dir='images/training', 44 | ann_dir='annotations/training', 45 | pipeline=train_pipeline)), 46 | val=dict( 47 | type=dataset_type, 48 | data_root=data_root, 49 | img_dir='images/validation', 50 | ann_dir='annotations/validation', 51 | pipeline=test_pipeline), 52 | test=dict( 53 | type=dataset_type, 54 | data_root=data_root, 55 | img_dir='images/validation', 56 | ann_dir='annotations/validation', 57 | pipeline=test_pipeline)) 58 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # yapf:disable 2 | log_config = dict( 3 | interval=50, 4 | hooks=[ 5 | dict(type='TextLoggerHook', by_epoch=False), 6 | # dict(type='TensorboardLoggerHook') 7 | ]) 8 | # yapf:enable 9 | dist_params = dict(backend='nccl') 10 | log_level = 'INFO' 11 | load_from = None 12 | resume_from = None 13 | workflow = [('train', 1)] 14 | cudnn_benchmark = True 15 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/fpn_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 1, 1), 12 | strides=(1, 2, 2, 2), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[256, 512, 1024, 2048], 20 | out_channels=256, 21 | num_outs=4), 22 | decode_head=dict( 23 | type='FPNHead', 24 | in_channels=[256, 256, 256, 256], 25 | in_index=[0, 1, 2, 3], 26 | feature_strides=[4, 8, 16, 32], 27 | channels=128, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | # model training and testing settings 35 | train_cfg=dict(), 36 | test_cfg=dict(mode='whole')) 37 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=160000) 8 | checkpoint_config = dict(by_epoch=False, interval=16000) 9 | evaluation = dict(interval=16000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=20000) 8 | checkpoint_config = dict(by_epoch=False, interval=2000) 9 | evaluation = dict(interval=2000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=40000) 8 | checkpoint_config = dict(by_epoch=False, interval=4000) 9 | evaluation = dict(interval=4000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=80000) 8 | checkpoint_config = dict(by_epoch=False, interval=8000) 9 | evaluation = dict(interval=8000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /segmentation/configs/sem_fpn/fpn_lsnet_b_ade20k_40k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/fpn_r50.py', 3 | '../_base_/datasets/ade20k.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | # model settings 7 | model = dict( 8 | pretrained=None, 9 | type='EncoderDecoder', 10 | backbone=dict( 11 | type='lsnet_b', 12 | style='pytorch', 13 | pretrained= 'pretrain/lsnet_b.pth', 14 | frozen_stages=-1, 15 | ), 16 | neck=dict( 17 | type='LSNetFPN', 18 | in_channels=[128, 256, 384, 512], 19 | out_channels=256, 20 | num_outs=4, 21 | # num_extra_trans_convs=1, 22 | ), 23 | decode_head=dict(num_classes=150)) 24 | 25 | gpu_multiples = 2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2 26 | # optimizer 27 | optimizer = dict(type='AdamW', lr=0.0001 * gpu_multiples, weight_decay=0.0001) 28 | optimizer_config = dict() 29 | # learning policy 30 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-6, by_epoch=False) 31 | # runtime settings 32 | runner = dict(type='IterBasedRunner', max_iters=80000 // gpu_multiples) 33 | checkpoint_config = dict(by_epoch=False, interval=8000 // gpu_multiples) 34 | evaluation = dict(interval=8000 // gpu_multiples, metric='mIoU') -------------------------------------------------------------------------------- /segmentation/configs/sem_fpn/fpn_lsnet_s_ade20k_40k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/fpn_r50.py', 3 | '../_base_/datasets/ade20k.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | # model settings 7 | model = dict( 8 | pretrained=None, 9 | type='EncoderDecoder', 10 | backbone=dict( 11 | type='lsnet_s', 12 | style='pytorch', 13 | pretrained= 'pretrain/lsnet_s.pth', 14 | frozen_stages=-1, 15 | ), 16 | neck=dict( 17 | type='LSNetFPN', 18 | in_channels=[96, 192, 320, 448], 19 | out_channels=256, 20 | num_outs=4 21 | ), 22 | decode_head=dict(num_classes=150)) 23 | 24 | gpu_multiples = 2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2 25 | # optimizer 26 | optimizer = dict(type='AdamW', lr=0.0001 * gpu_multiples, weight_decay=0.0001) 27 | optimizer_config = dict() 28 | # learning policy 29 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-6, by_epoch=False) 30 | # runtime settings 31 | runner = dict(type='IterBasedRunner', max_iters=80000 // gpu_multiples) 32 | checkpoint_config = dict(by_epoch=False, interval=8000 // gpu_multiples) 33 | evaluation = dict(interval=8000 // gpu_multiples, metric='mIoU') 34 | -------------------------------------------------------------------------------- /segmentation/configs/sem_fpn/fpn_lsnet_t_ade20k_40k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/fpn_r50.py', 3 | '../_base_/datasets/ade20k.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | # model settings 7 | model = dict( 8 | pretrained=None, 9 | type='EncoderDecoder', 10 | backbone=dict( 11 | type='lsnet_t', 12 | style='pytorch', 13 | pretrained= 'pretrain/lsnet_t.pth', 14 | frozen_stages=-1, 15 | ), 16 | neck=dict( 17 | type='LSNetFPN', 18 | in_channels=[64, 128, 256, 384], 19 | out_channels=256, 20 | num_outs=4 21 | ), 22 | decode_head=dict(num_classes=150)) 23 | 24 | gpu_multiples = 2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2 25 | # optimizer 26 | optimizer = dict(type='AdamW', lr=0.0001 * gpu_multiples, weight_decay=0.0001) 27 | optimizer_config = dict() 28 | # learning policy 29 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-6, by_epoch=False) 30 | # runtime settings 31 | runner = dict(type='IterBasedRunner', max_iters=80000 // gpu_multiples) 32 | checkpoint_config = dict(by_epoch=False, interval=8000 // gpu_multiples) 33 | evaluation = dict(interval=8000 // gpu_multiples, metric='mIoU') 34 | -------------------------------------------------------------------------------- /segmentation/eval.sh: -------------------------------------------------------------------------------- 1 | PORT=12345 ./tools/dist_test.sh configs/sem_fpn/fpn_lsnet_t_ade20k_40k.py pretrain/lsnet_t_semfpn.pth 8 --eval mIoU -------------------------------------------------------------------------------- /segmentation/mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .checkpoint import load_checkpoint, load_state_dict, _load_checkpoint 4 | 5 | __all__ = ['load_checkpoint', 'load_state_dict', '_load_checkpoint'] 6 | -------------------------------------------------------------------------------- /segmentation/mmcv_custom/runner/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | from .checkpoint import save_checkpoint 3 | from .epoch_based_runner import EpochBasedRunnerAmp 4 | 5 | 6 | __all__ = [ 7 | 'EpochBasedRunnerAmp', 'save_checkpoint' 8 | ] 9 | -------------------------------------------------------------------------------- /segmentation/mmcv_custom/runner/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import time 4 | from tempfile import TemporaryDirectory 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.parallel import is_module_wrapper 11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict 12 | 13 | 14 | def save_checkpoint(model, filename, optimizer=None, meta=None): 15 | """Save checkpoint to file. 16 | 17 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and 18 | ``optimizer``, ``amp``. By default ``meta`` will contain version 19 | and time info. 20 | 21 | Args: 22 | model (Module): Module whose params are to be saved. 23 | filename (str): Checkpoint filename. 24 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 25 | meta (dict, optional): Metadata to be saved in checkpoint. 26 | """ 27 | if meta is None: 28 | meta = {} 29 | elif not isinstance(meta, dict): 30 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}') 31 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) 32 | 33 | if is_module_wrapper(model): 34 | model = model.module 35 | 36 | if hasattr(model, 'CLASSES') and model.CLASSES is not None: 37 | # save class name to the meta 38 | meta.update(CLASSES=model.CLASSES) 39 | 40 | checkpoint = { 41 | 'meta': meta, 42 | 'state_dict': weights_to_cpu(get_state_dict(model)) 43 | } 44 | # save optimizer state dict in the checkpoint 45 | if isinstance(optimizer, Optimizer): 46 | checkpoint['optimizer'] = optimizer.state_dict() 47 | elif isinstance(optimizer, dict): 48 | checkpoint['optimizer'] = {} 49 | for name, optim in optimizer.items(): 50 | checkpoint['optimizer'][name] = optim.state_dict() 51 | 52 | # save amp state dict in the checkpoint 53 | checkpoint['amp'] = apex.amp.state_dict() 54 | 55 | if filename.startswith('pavi://'): 56 | try: 57 | from pavi import modelcloud 58 | from pavi.exception import NodeNotFoundError 59 | except ImportError: 60 | raise ImportError( 61 | 'Please install pavi to load checkpoint from modelcloud.') 62 | model_path = filename[7:] 63 | root = modelcloud.Folder() 64 | model_dir, model_name = osp.split(model_path) 65 | try: 66 | model = modelcloud.get(model_dir) 67 | except NodeNotFoundError: 68 | model = root.create_training_model(model_dir) 69 | with TemporaryDirectory() as tmp_dir: 70 | checkpoint_file = osp.join(tmp_dir, model_name) 71 | with open(checkpoint_file, 'wb') as f: 72 | torch.save(checkpoint, f) 73 | f.flush() 74 | model.create_file(checkpoint_file, name=model_name) 75 | else: 76 | mmcv.mkdir_or_exist(osp.dirname(filename)) 77 | # immediately flush buffer 78 | with open(filename, 'wb') as f: 79 | torch.save(checkpoint, f) 80 | f.flush() 81 | -------------------------------------------------------------------------------- /segmentation/mmcv_custom/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import platform 4 | import shutil 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.runner import RUNNERS, EpochBasedRunner 11 | from .checkpoint import save_checkpoint 12 | 13 | 14 | @RUNNERS.register_module() 15 | class EpochBasedRunnerAmp(EpochBasedRunner): 16 | """Epoch-based Runner with AMP support. 17 | 18 | This runner train models epoch by epoch. 19 | """ 20 | 21 | def save_checkpoint(self, 22 | out_dir, 23 | filename_tmpl='epoch_{}.pth', 24 | save_optimizer=True, 25 | meta=None, 26 | create_symlink=True): 27 | """Save the checkpoint. 28 | 29 | Args: 30 | out_dir (str): The directory that checkpoints are saved. 31 | filename_tmpl (str, optional): The checkpoint filename template, 32 | which contains a placeholder for the epoch number. 33 | Defaults to 'epoch_{}.pth'. 34 | save_optimizer (bool, optional): Whether to save the optimizer to 35 | the checkpoint. Defaults to True. 36 | meta (dict, optional): The meta information to be saved in the 37 | checkpoint. Defaults to None. 38 | create_symlink (bool, optional): Whether to create a symlink 39 | "latest.pth" to point to the latest checkpoint. 40 | Defaults to True. 41 | """ 42 | if meta is None: 43 | meta = dict(epoch=self.epoch + 1, iter=self.iter) 44 | elif isinstance(meta, dict): 45 | meta.update(epoch=self.epoch + 1, iter=self.iter) 46 | else: 47 | raise TypeError( 48 | f'meta should be a dict or None, but got {type(meta)}') 49 | if self.meta is not None: 50 | meta.update(self.meta) 51 | 52 | filename = filename_tmpl.format(self.epoch + 1) 53 | filepath = osp.join(out_dir, filename) 54 | optimizer = self.optimizer if save_optimizer else None 55 | save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) 56 | # in some environments, `os.symlink` is not supported, you may need to 57 | # set `create_symlink` to False 58 | if create_symlink: 59 | dst_file = osp.join(out_dir, 'latest.pth') 60 | if platform.system() != 'Windows': 61 | mmcv.symlink(filename, dst_file) 62 | else: 63 | shutil.copy(filepath, dst_file) 64 | 65 | def resume(self, 66 | checkpoint, 67 | resume_optimizer=True, 68 | map_location='default'): 69 | if map_location == 'default': 70 | if torch.cuda.is_available(): 71 | device_id = torch.cuda.current_device() 72 | checkpoint = self.load_checkpoint( 73 | checkpoint, 74 | map_location=lambda storage, loc: storage.cuda(device_id)) 75 | else: 76 | checkpoint = self.load_checkpoint(checkpoint) 77 | else: 78 | checkpoint = self.load_checkpoint( 79 | checkpoint, map_location=map_location) 80 | 81 | self._epoch = checkpoint['meta']['epoch'] 82 | self._iter = checkpoint['meta']['iter'] 83 | if 'optimizer' in checkpoint and resume_optimizer: 84 | if isinstance(self.optimizer, Optimizer): 85 | self.optimizer.load_state_dict(checkpoint['optimizer']) 86 | elif isinstance(self.optimizer, dict): 87 | for k in self.optimizer.keys(): 88 | self.optimizer[k].load_state_dict( 89 | checkpoint['optimizer'][k]) 90 | else: 91 | raise TypeError( 92 | 'Optimizer should be dict or torch.optim.Optimizer ' 93 | f'but got {type(self.optimizer)}') 94 | 95 | if 'amp' in checkpoint: 96 | apex.amp.load_state_dict(checkpoint['amp']) 97 | self.logger.info('load amp state dict') 98 | 99 | self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) 100 | -------------------------------------------------------------------------------- /segmentation/mmcv_custom/runner/optimizer.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import OptimizerHook, HOOKS 2 | 3 | @HOOKS.register_module() 4 | class DistOptimizerHook(OptimizerHook): 5 | """Optimizer hook for distributed training.""" 6 | 7 | def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False): 8 | self.grad_clip = grad_clip 9 | self.coalesce = coalesce 10 | self.bucket_size_mb = bucket_size_mb 11 | self.update_interval = update_interval 12 | self.use_fp16 = use_fp16 13 | 14 | def before_run(self, runner): 15 | runner.optimizer.zero_grad() 16 | 17 | def after_train_iter(self, runner): 18 | runner.outputs['loss'] /= self.update_interval 19 | if self.use_fp16: 20 | with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss: 21 | scaled_loss.backward() 22 | else: 23 | runner.outputs['loss'].backward() 24 | if self.every_n_iters(runner, self.update_interval): 25 | if self.grad_clip is not None: 26 | self.clip_grads(runner.model.parameters()) 27 | runner.optimizer.step() 28 | runner.optimizer.zero_grad() -------------------------------------------------------------------------------- /segmentation/model/ska.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | import triton 4 | import triton.language as tl 5 | from torch.amp import custom_fwd, custom_bwd 6 | import math 7 | 8 | def _grid(numel: int, bs: int) -> tuple: 9 | return (triton.cdiv(numel, bs),) 10 | 11 | @triton.jit 12 | def _idx(i, n: int, c: int, h: int, w: int): 13 | ni = i // (c * h * w) 14 | ci = (i // (h * w)) % c 15 | hi = (i // w) % h 16 | wi = i % w 17 | m = i < (n * c * h * w) 18 | return ni, ci, hi, wi, m 19 | 20 | @triton.jit 21 | def ska_fwd( 22 | x_ptr, w_ptr, o_ptr, 23 | n, ic, h, w, ks, pad, wc, 24 | BS: tl.constexpr, 25 | CT: tl.constexpr, AT: tl.constexpr 26 | ): 27 | pid = tl.program_id(0) 28 | start = pid * BS 29 | offs = start + tl.arange(0, BS) 30 | 31 | ni, ci, hi, wi, m = _idx(offs, n, ic, h, w) 32 | val = tl.zeros((BS,), dtype=AT) 33 | 34 | for kh in range(ks): 35 | hin = hi - pad + kh 36 | hb = (hin >= 0) & (hin < h) 37 | for kw in range(ks): 38 | win = wi - pad + kw 39 | b = hb & (win >= 0) & (win < w) 40 | 41 | x_off = ((ni * ic + ci) * h + hin) * w + win 42 | w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi 43 | 44 | x_val = tl.load(x_ptr + x_off, mask=m & b, other=0.0).to(CT) 45 | w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT) 46 | val += tl.where(b & m, x_val * w_val, 0.0).to(AT) 47 | 48 | tl.store(o_ptr + offs, val.to(CT), mask=m) 49 | 50 | @triton.jit 51 | def ska_bwd_x( 52 | go_ptr, w_ptr, gi_ptr, 53 | n, ic, h, w, ks, pad, wc, 54 | BS: tl.constexpr, 55 | CT: tl.constexpr, AT: tl.constexpr 56 | ): 57 | pid = tl.program_id(0) 58 | start = pid * BS 59 | offs = start + tl.arange(0, BS) 60 | 61 | ni, ci, hi, wi, m = _idx(offs, n, ic, h, w) 62 | val = tl.zeros((BS,), dtype=AT) 63 | 64 | for kh in range(ks): 65 | ho = hi + pad - kh 66 | hb = (ho >= 0) & (ho < h) 67 | for kw in range(ks): 68 | wo = wi + pad - kw 69 | b = hb & (wo >= 0) & (wo < w) 70 | 71 | go_off = ((ni * ic + ci) * h + ho) * w + wo 72 | w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + ho * w + wo 73 | 74 | go_val = tl.load(go_ptr + go_off, mask=m & b, other=0.0).to(CT) 75 | w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT) 76 | val += tl.where(b & m, go_val * w_val, 0.0).to(AT) 77 | 78 | tl.store(gi_ptr + offs, val.to(CT), mask=m) 79 | 80 | @triton.jit 81 | def ska_bwd_w( 82 | go_ptr, x_ptr, gw_ptr, 83 | n, wc, h, w, ic, ks, pad, 84 | BS: tl.constexpr, 85 | CT: tl.constexpr, AT: tl.constexpr 86 | ): 87 | pid = tl.program_id(0) 88 | start = pid * BS 89 | offs = start + tl.arange(0, BS) 90 | 91 | ni, ci, hi, wi, m = _idx(offs, n, wc, h, w) 92 | 93 | for kh in range(ks): 94 | hin = hi - pad + kh 95 | hb = (hin >= 0) & (hin < h) 96 | for kw in range(ks): 97 | win = wi - pad + kw 98 | b = hb & (win >= 0) & (win < w) 99 | w_off = ((ni * wc + ci) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi 100 | 101 | val = tl.zeros((BS,), dtype=AT) 102 | steps = (ic - ci + wc - 1) // wc 103 | for s in range(tl.max(steps, axis=0)): 104 | cc = ci + s * wc 105 | cm = (cc < ic) & m & b 106 | 107 | x_off = ((ni * ic + cc) * h + hin) * w + win 108 | go_off = ((ni * ic + cc) * h + hi) * w + wi 109 | 110 | x_val = tl.load(x_ptr + x_off, mask=cm, other=0.0).to(CT) 111 | go_val = tl.load(go_ptr + go_off, mask=cm, other=0.0).to(CT) 112 | val += tl.where(cm, x_val * go_val, 0.0).to(AT) 113 | 114 | tl.store(gw_ptr + w_off, val.to(CT), mask=m) 115 | 116 | class SkaFn(Function): 117 | @staticmethod 118 | @custom_fwd(device_type='cuda') 119 | def forward(ctx, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor: 120 | ks = int(math.sqrt(w.shape[2])) 121 | pad = (ks - 1) // 2 122 | ctx.ks, ctx.pad = ks, pad 123 | n, ic, h, width = x.shape 124 | wc = w.shape[1] 125 | o = torch.empty(n, ic, h, width, device=x.device, dtype=x.dtype) 126 | numel = o.numel() 127 | 128 | x = x.contiguous() 129 | w = w.contiguous() 130 | 131 | grid = lambda meta: _grid(numel, meta["BS"]) 132 | 133 | ct = tl.float16 if x.dtype == torch.float16 else (tl.float32 if x.dtype == torch.float32 else tl.float64) 134 | at = tl.float32 if x.dtype == torch.float16 else ct 135 | 136 | ska_fwd[grid](x, w, o, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at) 137 | 138 | ctx.save_for_backward(x, w) 139 | ctx.ct, ctx.at = ct, at 140 | return o 141 | 142 | @staticmethod 143 | @custom_bwd(device_type='cuda') 144 | def backward(ctx, go: torch.Tensor) -> tuple: 145 | ks, pad = ctx.ks, ctx.pad 146 | x, w = ctx.saved_tensors 147 | n, ic, h, width = x.shape 148 | wc = w.shape[1] 149 | 150 | go = go.contiguous() 151 | gx = gw = None 152 | ct, at = ctx.ct, ctx.at 153 | 154 | if ctx.needs_input_grad[0]: 155 | gx = torch.empty_like(x) 156 | numel = gx.numel() 157 | ska_bwd_x[lambda meta: _grid(numel, meta["BS"])](go, w, gx, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at) 158 | 159 | if ctx.needs_input_grad[1]: 160 | gw = torch.empty_like(w) 161 | numel = gw.numel() // w.shape[2] 162 | ska_bwd_w[lambda meta: _grid(numel, meta["BS"])](go, x, gw, n, wc, h, width, ic, ks, pad, BS=1024, CT=ct, AT=at) 163 | 164 | return gx, gw, None, None 165 | 166 | class SKA(torch.nn.Module): 167 | def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor: 168 | return SkaFn.apply(x, w) # type: ignore 169 | -------------------------------------------------------------------------------- /segmentation/tools/analyze_logs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | """Modified from https://github.com/open- 3 | mmlab/mmdetection/blob/master/tools/analysis_tools/analyze_logs.py.""" 4 | import argparse 5 | import json 6 | from collections import defaultdict 7 | 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns 10 | 11 | 12 | def plot_curve(log_dicts, args): 13 | if args.backend is not None: 14 | plt.switch_backend(args.backend) 15 | sns.set_style(args.style) 16 | # if legend is None, use {filename}_{key} as legend 17 | legend = args.legend 18 | if legend is None: 19 | legend = [] 20 | for json_log in args.json_logs: 21 | for metric in args.keys: 22 | legend.append(f'{json_log}_{metric}') 23 | assert len(legend) == (len(args.json_logs) * len(args.keys)) 24 | metrics = args.keys 25 | 26 | num_metrics = len(metrics) 27 | for i, log_dict in enumerate(log_dicts): 28 | epochs = list(log_dict.keys()) 29 | for j, metric in enumerate(metrics): 30 | print(f'plot curve of {args.json_logs[i]}, metric is {metric}') 31 | plot_epochs = [] 32 | plot_iters = [] 33 | plot_values = [] 34 | # In some log files, iters number is not correct, `pre_iter` is 35 | # used to prevent generate wrong lines. 36 | pre_iter = -1 37 | for epoch in epochs: 38 | epoch_logs = log_dict[epoch] 39 | if metric not in epoch_logs.keys(): 40 | continue 41 | if metric in ['mIoU', 'mAcc', 'aAcc']: 42 | plot_epochs.append(epoch) 43 | plot_values.append(epoch_logs[metric][0]) 44 | else: 45 | for idx in range(len(epoch_logs[metric])): 46 | if pre_iter > epoch_logs['iter'][idx]: 47 | continue 48 | pre_iter = epoch_logs['iter'][idx] 49 | plot_iters.append(epoch_logs['iter'][idx]) 50 | plot_values.append(epoch_logs[metric][idx]) 51 | ax = plt.gca() 52 | label = legend[i * num_metrics + j] 53 | if metric in ['mIoU', 'mAcc', 'aAcc']: 54 | ax.set_xticks(plot_epochs) 55 | plt.xlabel('epoch') 56 | plt.plot(plot_epochs, plot_values, label=label, marker='o') 57 | else: 58 | plt.xlabel('iter') 59 | plt.plot(plot_iters, plot_values, label=label, linewidth=0.5) 60 | plt.legend() 61 | if args.title is not None: 62 | plt.title(args.title) 63 | if args.out is None: 64 | plt.show() 65 | else: 66 | print(f'save curve to: {args.out}') 67 | plt.savefig(args.out) 68 | plt.cla() 69 | 70 | 71 | def parse_args(): 72 | parser = argparse.ArgumentParser(description='Analyze Json Log') 73 | parser.add_argument( 74 | 'json_logs', 75 | type=str, 76 | nargs='+', 77 | help='path of train log in json format') 78 | parser.add_argument( 79 | '--keys', 80 | type=str, 81 | nargs='+', 82 | default=['mIoU'], 83 | help='the metric that you want to plot') 84 | parser.add_argument('--title', type=str, help='title of figure') 85 | parser.add_argument( 86 | '--legend', 87 | type=str, 88 | nargs='+', 89 | default=None, 90 | help='legend of each plot') 91 | parser.add_argument( 92 | '--backend', type=str, default=None, help='backend of plt') 93 | parser.add_argument( 94 | '--style', type=str, default='dark', help='style of plt') 95 | parser.add_argument('--out', type=str, default=None) 96 | args = parser.parse_args() 97 | return args 98 | 99 | 100 | def load_json_logs(json_logs): 101 | # load and convert json_logs to log_dict, key is epoch, value is a sub dict 102 | # keys of sub dict is different metrics 103 | # value of sub dict is a list of corresponding values of all iterations 104 | log_dicts = [dict() for _ in json_logs] 105 | for json_log, log_dict in zip(json_logs, log_dicts): 106 | with open(json_log, 'r') as log_file: 107 | for line in log_file: 108 | log = json.loads(line.strip()) 109 | # skip lines without `epoch` field 110 | if 'epoch' not in log: 111 | continue 112 | epoch = log.pop('epoch') 113 | if epoch not in log_dict: 114 | log_dict[epoch] = defaultdict(list) 115 | for k, v in log.items(): 116 | log_dict[epoch][k].append(v) 117 | return log_dicts 118 | 119 | 120 | def main(): 121 | args = parse_args() 122 | json_logs = args.json_logs 123 | for json_log in json_logs: 124 | assert json_log.endswith('.json') 125 | log_dicts = load_json_logs(json_logs) 126 | plot_curve(log_dicts, args) 127 | 128 | 129 | if __name__ == '__main__': 130 | main() 131 | -------------------------------------------------------------------------------- /segmentation/tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import time 4 | 5 | import torch 6 | from mmcv import Config 7 | from mmcv.parallel import MMDataParallel 8 | from mmcv.runner import load_checkpoint, wrap_fp16_model 9 | 10 | from mmseg.datasets import build_dataloader, build_dataset 11 | from mmseg.models import build_segmentor 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='MMSeg benchmark a model') 16 | parser.add_argument('config', help='test config file path') 17 | parser.add_argument('checkpoint', help='checkpoint file') 18 | parser.add_argument( 19 | '--log-interval', type=int, default=50, help='interval of logging') 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def main(): 25 | args = parse_args() 26 | 27 | cfg = Config.fromfile(args.config) 28 | # set cudnn_benchmark 29 | torch.backends.cudnn.benchmark = False 30 | cfg.model.pretrained = None 31 | cfg.data.test.test_mode = True 32 | 33 | # build the dataloader 34 | # TODO: support multiple images per gpu (only minor changes are needed) 35 | dataset = build_dataset(cfg.data.test) 36 | data_loader = build_dataloader( 37 | dataset, 38 | samples_per_gpu=1, 39 | workers_per_gpu=cfg.data.workers_per_gpu, 40 | dist=False, 41 | shuffle=False) 42 | 43 | # build the model and load checkpoint 44 | cfg.model.train_cfg = None 45 | model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) 46 | fp16_cfg = cfg.get('fp16', None) 47 | if fp16_cfg is not None: 48 | wrap_fp16_model(model) 49 | load_checkpoint(model, args.checkpoint, map_location='cpu') 50 | 51 | model = MMDataParallel(model, device_ids=[0]) 52 | 53 | model.eval() 54 | 55 | # the first several iterations may be very slow so skip them 56 | num_warmup = 5 57 | pure_inf_time = 0 58 | total_iters = 200 59 | 60 | # benchmark with 200 image and take the average 61 | for i, data in enumerate(data_loader): 62 | 63 | torch.cuda.synchronize() 64 | start_time = time.perf_counter() 65 | 66 | with torch.no_grad(): 67 | model(return_loss=False, rescale=True, **data) 68 | 69 | torch.cuda.synchronize() 70 | elapsed = time.perf_counter() - start_time 71 | 72 | if i >= num_warmup: 73 | pure_inf_time += elapsed 74 | if (i + 1) % args.log_interval == 0: 75 | fps = (i + 1 - num_warmup) / pure_inf_time 76 | print(f'Done image [{i + 1:<3}/ {total_iters}], ' 77 | f'fps: {fps:.2f} img / s') 78 | 79 | if (i + 1) == total_iters: 80 | fps = (i + 1 - num_warmup) / pure_inf_time 81 | print(f'Overall fps: {fps:.2f} img / s') 82 | break 83 | 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /segmentation/tools/browse_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import warnings 4 | from pathlib import Path 5 | 6 | import mmcv 7 | import numpy as np 8 | from mmcv import Config 9 | 10 | from mmseg.datasets.builder import build_dataset 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Browse a dataset') 15 | parser.add_argument('config', help='train config file path') 16 | parser.add_argument( 17 | '--show-origin', 18 | default=False, 19 | action='store_true', 20 | help='if True, omit all augmentation in pipeline,' 21 | ' show origin image and seg map') 22 | parser.add_argument( 23 | '--skip-type', 24 | type=str, 25 | nargs='+', 26 | default=['DefaultFormatBundle', 'Normalize', 'Collect'], 27 | help='skip some useless pipeline,if `show-origin` is true, ' 28 | 'all pipeline except `Load` will be skipped') 29 | parser.add_argument( 30 | '--output-dir', 31 | default='./output', 32 | type=str, 33 | help='If there is no display interface, you can save it') 34 | parser.add_argument('--show', default=False, action='store_true') 35 | parser.add_argument( 36 | '--show-interval', 37 | type=int, 38 | default=999, 39 | help='the interval of show (ms)') 40 | parser.add_argument( 41 | '--opacity', 42 | type=float, 43 | default=0.5, 44 | help='the opacity of semantic map') 45 | args = parser.parse_args() 46 | return args 47 | 48 | 49 | def imshow_semantic(img, 50 | seg, 51 | class_names, 52 | palette=None, 53 | win_name='', 54 | show=False, 55 | wait_time=0, 56 | out_file=None, 57 | opacity=0.5): 58 | """Draw `result` over `img`. 59 | 60 | Args: 61 | img (str or Tensor): The image to be displayed. 62 | seg (Tensor): The semantic segmentation results to draw over 63 | `img`. 64 | class_names (list[str]): Names of each classes. 65 | palette (list[list[int]]] | np.ndarray | None): The palette of 66 | segmentation map. If None is given, random palette will be 67 | generated. Default: None 68 | win_name (str): The window name. 69 | wait_time (int): Value of waitKey param. 70 | Default: 0. 71 | show (bool): Whether to show the image. 72 | Default: False. 73 | out_file (str or None): The filename to write the image. 74 | Default: None. 75 | opacity(float): Opacity of painted segmentation map. 76 | Default 0.5. 77 | Must be in (0, 1] range. 78 | Returns: 79 | img (Tensor): Only if not `show` or `out_file` 80 | """ 81 | img = mmcv.imread(img) 82 | img = img.copy() 83 | if palette is None: 84 | palette = np.random.randint(0, 255, size=(len(class_names), 3)) 85 | palette = np.array(palette) 86 | assert palette.shape[0] == len(class_names) 87 | assert palette.shape[1] == 3 88 | assert len(palette.shape) == 2 89 | assert 0 < opacity <= 1.0 90 | color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) 91 | for label, color in enumerate(palette): 92 | color_seg[seg == label, :] = color 93 | # convert to BGR 94 | color_seg = color_seg[..., ::-1] 95 | 96 | img = img * (1 - opacity) + color_seg * opacity 97 | img = img.astype(np.uint8) 98 | # if out_file specified, do not show image in window 99 | if out_file is not None: 100 | show = False 101 | 102 | if show: 103 | mmcv.imshow(img, win_name, wait_time) 104 | if out_file is not None: 105 | mmcv.imwrite(img, out_file) 106 | 107 | if not (show or out_file): 108 | warnings.warn('show==False and out_file is not specified, only ' 109 | 'result image will be returned') 110 | return img 111 | 112 | 113 | def _retrieve_data_cfg(_data_cfg, skip_type, show_origin): 114 | if show_origin is True: 115 | # only keep pipeline of Loading data and ann 116 | _data_cfg['pipeline'] = [ 117 | x for x in _data_cfg.pipeline if 'Load' in x['type'] 118 | ] 119 | else: 120 | _data_cfg['pipeline'] = [ 121 | x for x in _data_cfg.pipeline if x['type'] not in skip_type 122 | ] 123 | 124 | 125 | def retrieve_data_cfg(config_path, skip_type, show_origin=False): 126 | cfg = Config.fromfile(config_path) 127 | train_data_cfg = cfg.data.train 128 | if isinstance(train_data_cfg, list): 129 | for _data_cfg in train_data_cfg: 130 | if 'pipeline' in _data_cfg: 131 | _retrieve_data_cfg(_data_cfg, skip_type, show_origin) 132 | elif 'dataset' in _data_cfg: 133 | _retrieve_data_cfg(_data_cfg['dataset'], skip_type, 134 | show_origin) 135 | else: 136 | raise ValueError 137 | elif 'dataset' in train_data_cfg: 138 | _retrieve_data_cfg(train_data_cfg['dataset'], skip_type, show_origin) 139 | else: 140 | _retrieve_data_cfg(train_data_cfg, skip_type, show_origin) 141 | return cfg 142 | 143 | 144 | def main(): 145 | args = parse_args() 146 | cfg = retrieve_data_cfg(args.config, args.skip_type, args.show_origin) 147 | dataset = build_dataset(cfg.data.train) 148 | progress_bar = mmcv.ProgressBar(len(dataset)) 149 | for item in dataset: 150 | filename = os.path.join(args.output_dir, 151 | Path(item['filename']).name 152 | ) if args.output_dir is not None else None 153 | imshow_semantic( 154 | item['img'], 155 | item['gt_semantic_seg'], 156 | dataset.CLASSES, 157 | dataset.PALETTE, 158 | show=args.show, 159 | wait_time=args.show_interval, 160 | out_file=filename, 161 | opacity=args.opacity, 162 | ) 163 | progress_bar.update() 164 | 165 | 166 | if __name__ == '__main__': 167 | main() 168 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/chase_db1.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | import tempfile 6 | import zipfile 7 | 8 | import mmcv 9 | 10 | CHASE_DB1_LEN = 28 * 3 11 | TRAINING_LEN = 60 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser( 16 | description='Convert CHASE_DB1 dataset to mmsegmentation format') 17 | parser.add_argument('dataset_path', help='path of CHASEDB1.zip') 18 | parser.add_argument('--tmp_dir', help='path of the temporary directory') 19 | parser.add_argument('-o', '--out_dir', help='output path') 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def main(): 25 | args = parse_args() 26 | dataset_path = args.dataset_path 27 | if args.out_dir is None: 28 | out_dir = osp.join('data', 'CHASE_DB1') 29 | else: 30 | out_dir = args.out_dir 31 | 32 | print('Making directories...') 33 | mmcv.mkdir_or_exist(out_dir) 34 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) 35 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) 36 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) 37 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) 38 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) 39 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) 40 | 41 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 42 | print('Extracting CHASEDB1.zip...') 43 | zip_file = zipfile.ZipFile(dataset_path) 44 | zip_file.extractall(tmp_dir) 45 | 46 | print('Generating training dataset...') 47 | 48 | assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \ 49 | 'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN) 50 | 51 | for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: 52 | img = mmcv.imread(osp.join(tmp_dir, img_name)) 53 | if osp.splitext(img_name)[1] == '.jpg': 54 | mmcv.imwrite( 55 | img, 56 | osp.join(out_dir, 'images', 'training', 57 | osp.splitext(img_name)[0] + '.png')) 58 | else: 59 | # The annotation img should be divided by 128, because some of 60 | # the annotation imgs are not standard. We should set a 61 | # threshold to convert the nonstandard annotation imgs. The 62 | # value divided by 128 is equivalent to '1 if value >= 128 63 | # else 0' 64 | mmcv.imwrite( 65 | img[:, :, 0] // 128, 66 | osp.join(out_dir, 'annotations', 'training', 67 | osp.splitext(img_name)[0] + '.png')) 68 | 69 | for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: 70 | img = mmcv.imread(osp.join(tmp_dir, img_name)) 71 | if osp.splitext(img_name)[1] == '.jpg': 72 | mmcv.imwrite( 73 | img, 74 | osp.join(out_dir, 'images', 'validation', 75 | osp.splitext(img_name)[0] + '.png')) 76 | else: 77 | mmcv.imwrite( 78 | img[:, :, 0] // 128, 79 | osp.join(out_dir, 'annotations', 'validation', 80 | osp.splitext(img_name)[0] + '.png')) 81 | 82 | print('Removing the temporary files...') 83 | 84 | print('Done!') 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | 5 | import mmcv 6 | from cityscapesscripts.preparation.json2labelImg import json2labelImg 7 | 8 | 9 | def convert_json_to_label(json_file): 10 | label_file = json_file.replace('_polygons.json', '_labelTrainIds.png') 11 | json2labelImg(json_file, label_file, 'trainIds') 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser( 16 | description='Convert Cityscapes annotations to TrainIds') 17 | parser.add_argument('cityscapes_path', help='cityscapes data path') 18 | parser.add_argument('--gt-dir', default='gtFine', type=str) 19 | parser.add_argument('-o', '--out-dir', help='output path') 20 | parser.add_argument( 21 | '--nproc', default=1, type=int, help='number of process') 22 | args = parser.parse_args() 23 | return args 24 | 25 | 26 | def main(): 27 | args = parse_args() 28 | cityscapes_path = args.cityscapes_path 29 | out_dir = args.out_dir if args.out_dir else cityscapes_path 30 | mmcv.mkdir_or_exist(out_dir) 31 | 32 | gt_dir = osp.join(cityscapes_path, args.gt_dir) 33 | 34 | poly_files = [] 35 | for poly in mmcv.scandir(gt_dir, '_polygons.json', recursive=True): 36 | poly_file = osp.join(gt_dir, poly) 37 | poly_files.append(poly_file) 38 | if args.nproc > 1: 39 | mmcv.track_parallel_progress(convert_json_to_label, poly_files, 40 | args.nproc) 41 | else: 42 | mmcv.track_progress(convert_json_to_label, poly_files) 43 | 44 | split_names = ['train', 'val', 'test'] 45 | 46 | for split in split_names: 47 | filenames = [] 48 | for poly in mmcv.scandir( 49 | osp.join(gt_dir, split), '_polygons.json', recursive=True): 50 | filenames.append(poly.replace('_gtFine_polygons.json', '')) 51 | with open(osp.join(out_dir, f'{split}.txt'), 'w') as f: 52 | f.writelines(f + '\n' for f in filenames) 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/drive.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | import tempfile 6 | import zipfile 7 | 8 | import cv2 9 | import mmcv 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser( 14 | description='Convert DRIVE dataset to mmsegmentation format') 15 | parser.add_argument( 16 | 'training_path', help='the training part of DRIVE dataset') 17 | parser.add_argument( 18 | 'testing_path', help='the testing part of DRIVE dataset') 19 | parser.add_argument('--tmp_dir', help='path of the temporary directory') 20 | parser.add_argument('-o', '--out_dir', help='output path') 21 | args = parser.parse_args() 22 | return args 23 | 24 | 25 | def main(): 26 | args = parse_args() 27 | training_path = args.training_path 28 | testing_path = args.testing_path 29 | if args.out_dir is None: 30 | out_dir = osp.join('data', 'DRIVE') 31 | else: 32 | out_dir = args.out_dir 33 | 34 | print('Making directories...') 35 | mmcv.mkdir_or_exist(out_dir) 36 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) 37 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) 38 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) 39 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) 40 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) 41 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) 42 | 43 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 44 | print('Extracting training.zip...') 45 | zip_file = zipfile.ZipFile(training_path) 46 | zip_file.extractall(tmp_dir) 47 | 48 | print('Generating training dataset...') 49 | now_dir = osp.join(tmp_dir, 'training', 'images') 50 | for img_name in os.listdir(now_dir): 51 | img = mmcv.imread(osp.join(now_dir, img_name)) 52 | mmcv.imwrite( 53 | img, 54 | osp.join( 55 | out_dir, 'images', 'training', 56 | osp.splitext(img_name)[0].replace('_training', '') + 57 | '.png')) 58 | 59 | now_dir = osp.join(tmp_dir, 'training', '1st_manual') 60 | for img_name in os.listdir(now_dir): 61 | cap = cv2.VideoCapture(osp.join(now_dir, img_name)) 62 | ret, img = cap.read() 63 | mmcv.imwrite( 64 | img[:, :, 0] // 128, 65 | osp.join(out_dir, 'annotations', 'training', 66 | osp.splitext(img_name)[0] + '.png')) 67 | 68 | print('Extracting test.zip...') 69 | zip_file = zipfile.ZipFile(testing_path) 70 | zip_file.extractall(tmp_dir) 71 | 72 | print('Generating validation dataset...') 73 | now_dir = osp.join(tmp_dir, 'test', 'images') 74 | for img_name in os.listdir(now_dir): 75 | img = mmcv.imread(osp.join(now_dir, img_name)) 76 | mmcv.imwrite( 77 | img, 78 | osp.join( 79 | out_dir, 'images', 'validation', 80 | osp.splitext(img_name)[0].replace('_test', '') + '.png')) 81 | 82 | now_dir = osp.join(tmp_dir, 'test', '1st_manual') 83 | if osp.exists(now_dir): 84 | for img_name in os.listdir(now_dir): 85 | cap = cv2.VideoCapture(osp.join(now_dir, img_name)) 86 | ret, img = cap.read() 87 | # The annotation img should be divided by 128, because some of 88 | # the annotation imgs are not standard. We should set a 89 | # threshold to convert the nonstandard annotation imgs. The 90 | # value divided by 128 is equivalent to '1 if value >= 128 91 | # else 0' 92 | mmcv.imwrite( 93 | img[:, :, 0] // 128, 94 | osp.join(out_dir, 'annotations', 'validation', 95 | osp.splitext(img_name)[0] + '.png')) 96 | 97 | now_dir = osp.join(tmp_dir, 'test', '2nd_manual') 98 | if osp.exists(now_dir): 99 | for img_name in os.listdir(now_dir): 100 | cap = cv2.VideoCapture(osp.join(now_dir, img_name)) 101 | ret, img = cap.read() 102 | mmcv.imwrite( 103 | img[:, :, 0] // 128, 104 | osp.join(out_dir, 'annotations', 'validation', 105 | osp.splitext(img_name)[0] + '.png')) 106 | 107 | print('Removing the temporary files...') 108 | 109 | print('Done!') 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/hrf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | import tempfile 6 | import zipfile 7 | 8 | import mmcv 9 | 10 | HRF_LEN = 15 11 | TRAINING_LEN = 5 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser( 16 | description='Convert HRF dataset to mmsegmentation format') 17 | parser.add_argument('healthy_path', help='the path of healthy.zip') 18 | parser.add_argument( 19 | 'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip') 20 | parser.add_argument('glaucoma_path', help='the path of glaucoma.zip') 21 | parser.add_argument( 22 | 'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip') 23 | parser.add_argument( 24 | 'diabetic_retinopathy_path', 25 | help='the path of diabetic_retinopathy.zip') 26 | parser.add_argument( 27 | 'diabetic_retinopathy_manualsegm_path', 28 | help='the path of diabetic_retinopathy_manualsegm.zip') 29 | parser.add_argument('--tmp_dir', help='path of the temporary directory') 30 | parser.add_argument('-o', '--out_dir', help='output path') 31 | args = parser.parse_args() 32 | return args 33 | 34 | 35 | def main(): 36 | args = parse_args() 37 | images_path = [ 38 | args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path 39 | ] 40 | annotations_path = [ 41 | args.healthy_manualsegm_path, args.glaucoma_manualsegm_path, 42 | args.diabetic_retinopathy_manualsegm_path 43 | ] 44 | if args.out_dir is None: 45 | out_dir = osp.join('data', 'HRF') 46 | else: 47 | out_dir = args.out_dir 48 | 49 | print('Making directories...') 50 | mmcv.mkdir_or_exist(out_dir) 51 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) 52 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) 53 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) 54 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) 55 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) 56 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) 57 | 58 | print('Generating images...') 59 | for now_path in images_path: 60 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 61 | zip_file = zipfile.ZipFile(now_path) 62 | zip_file.extractall(tmp_dir) 63 | 64 | assert len(os.listdir(tmp_dir)) == HRF_LEN, \ 65 | 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) 66 | 67 | for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: 68 | img = mmcv.imread(osp.join(tmp_dir, filename)) 69 | mmcv.imwrite( 70 | img, 71 | osp.join(out_dir, 'images', 'training', 72 | osp.splitext(filename)[0] + '.png')) 73 | for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: 74 | img = mmcv.imread(osp.join(tmp_dir, filename)) 75 | mmcv.imwrite( 76 | img, 77 | osp.join(out_dir, 'images', 'validation', 78 | osp.splitext(filename)[0] + '.png')) 79 | 80 | print('Generating annotations...') 81 | for now_path in annotations_path: 82 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 83 | zip_file = zipfile.ZipFile(now_path) 84 | zip_file.extractall(tmp_dir) 85 | 86 | assert len(os.listdir(tmp_dir)) == HRF_LEN, \ 87 | 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) 88 | 89 | for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: 90 | img = mmcv.imread(osp.join(tmp_dir, filename)) 91 | # The annotation img should be divided by 128, because some of 92 | # the annotation imgs are not standard. We should set a 93 | # threshold to convert the nonstandard annotation imgs. The 94 | # value divided by 128 is equivalent to '1 if value >= 128 95 | # else 0' 96 | mmcv.imwrite( 97 | img[:, :, 0] // 128, 98 | osp.join(out_dir, 'annotations', 'training', 99 | osp.splitext(filename)[0] + '.png')) 100 | for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: 101 | img = mmcv.imread(osp.join(tmp_dir, filename)) 102 | mmcv.imwrite( 103 | img[:, :, 0] // 128, 104 | osp.join(out_dir, 'annotations', 'validation', 105 | osp.splitext(filename)[0] + '.png')) 106 | 107 | print('Done!') 108 | 109 | 110 | if __name__ == '__main__': 111 | main() 112 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/pascal_context.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from functools import partial 5 | 6 | import mmcv 7 | import numpy as np 8 | from detail import Detail 9 | from PIL import Image 10 | 11 | _mapping = np.sort( 12 | np.array([ 13 | 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 14 | 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 15 | 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 16 | 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 17 | ])) 18 | _key = np.array(range(len(_mapping))).astype('uint8') 19 | 20 | 21 | def generate_labels(img_id, detail, out_dir): 22 | 23 | def _class_to_index(mask, _mapping, _key): 24 | # assert the values 25 | values = np.unique(mask) 26 | for i in range(len(values)): 27 | assert (values[i] in _mapping) 28 | index = np.digitize(mask.ravel(), _mapping, right=True) 29 | return _key[index].reshape(mask.shape) 30 | 31 | mask = Image.fromarray( 32 | _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key)) 33 | filename = img_id['file_name'] 34 | mask.save(osp.join(out_dir, filename.replace('jpg', 'png'))) 35 | return osp.splitext(osp.basename(filename))[0] 36 | 37 | 38 | def parse_args(): 39 | parser = argparse.ArgumentParser( 40 | description='Convert PASCAL VOC annotations to mmsegmentation format') 41 | parser.add_argument('devkit_path', help='pascal voc devkit path') 42 | parser.add_argument('json_path', help='annoation json filepath') 43 | parser.add_argument('-o', '--out_dir', help='output path') 44 | args = parser.parse_args() 45 | return args 46 | 47 | 48 | def main(): 49 | args = parse_args() 50 | devkit_path = args.devkit_path 51 | if args.out_dir is None: 52 | out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext') 53 | else: 54 | out_dir = args.out_dir 55 | json_path = args.json_path 56 | mmcv.mkdir_or_exist(out_dir) 57 | img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages') 58 | 59 | train_detail = Detail(json_path, img_dir, 'train') 60 | train_ids = train_detail.getImgs() 61 | 62 | val_detail = Detail(json_path, img_dir, 'val') 63 | val_ids = val_detail.getImgs() 64 | 65 | mmcv.mkdir_or_exist( 66 | osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext')) 67 | 68 | train_list = mmcv.track_progress( 69 | partial(generate_labels, detail=train_detail, out_dir=out_dir), 70 | train_ids) 71 | with open( 72 | osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', 73 | 'train.txt'), 'w') as f: 74 | f.writelines(line + '\n' for line in sorted(train_list)) 75 | 76 | val_list = mmcv.track_progress( 77 | partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids) 78 | with open( 79 | osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', 80 | 'val.txt'), 'w') as f: 81 | f.writelines(line + '\n' for line in sorted(val_list)) 82 | 83 | print('Done!') 84 | 85 | 86 | if __name__ == '__main__': 87 | main() 88 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/voc_aug.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from functools import partial 5 | 6 | import mmcv 7 | import numpy as np 8 | from PIL import Image 9 | from scipy.io import loadmat 10 | 11 | AUG_LEN = 10582 12 | 13 | 14 | def convert_mat(mat_file, in_dir, out_dir): 15 | data = loadmat(osp.join(in_dir, mat_file)) 16 | mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8) 17 | seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png')) 18 | Image.fromarray(mask).save(seg_filename, 'PNG') 19 | 20 | 21 | def generate_aug_list(merged_list, excluded_list): 22 | return list(set(merged_list) - set(excluded_list)) 23 | 24 | 25 | def parse_args(): 26 | parser = argparse.ArgumentParser( 27 | description='Convert PASCAL VOC annotations to mmsegmentation format') 28 | parser.add_argument('devkit_path', help='pascal voc devkit path') 29 | parser.add_argument('aug_path', help='pascal voc aug path') 30 | parser.add_argument('-o', '--out_dir', help='output path') 31 | parser.add_argument( 32 | '--nproc', default=1, type=int, help='number of process') 33 | args = parser.parse_args() 34 | return args 35 | 36 | 37 | def main(): 38 | args = parse_args() 39 | devkit_path = args.devkit_path 40 | aug_path = args.aug_path 41 | nproc = args.nproc 42 | if args.out_dir is None: 43 | out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug') 44 | else: 45 | out_dir = args.out_dir 46 | mmcv.mkdir_or_exist(out_dir) 47 | in_dir = osp.join(aug_path, 'dataset', 'cls') 48 | 49 | mmcv.track_parallel_progress( 50 | partial(convert_mat, in_dir=in_dir, out_dir=out_dir), 51 | list(mmcv.scandir(in_dir, suffix='.mat')), 52 | nproc=nproc) 53 | 54 | full_aug_list = [] 55 | with open(osp.join(aug_path, 'dataset', 'train.txt')) as f: 56 | full_aug_list += [line.strip() for line in f] 57 | with open(osp.join(aug_path, 'dataset', 'val.txt')) as f: 58 | full_aug_list += [line.strip() for line in f] 59 | 60 | with open( 61 | osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 62 | 'train.txt')) as f: 63 | ori_train_list = [line.strip() for line in f] 64 | with open( 65 | osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 66 | 'val.txt')) as f: 67 | val_list = [line.strip() for line in f] 68 | 69 | aug_train_list = generate_aug_list(ori_train_list + full_aug_list, 70 | val_list) 71 | assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format( 72 | AUG_LEN) 73 | 74 | with open( 75 | osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 76 | 'trainaug.txt'), 'w') as f: 77 | f.writelines(line + '\n' for line in aug_train_list) 78 | 79 | aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list) 80 | assert len(aug_list) == AUG_LEN - len( 81 | ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN - 82 | len(ori_train_list)) 83 | with open( 84 | osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'), 85 | 'w') as f: 86 | f.writelines(line + '\n' for line in aug_list) 87 | 88 | print('Done!') 89 | 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /segmentation/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | NCCL_P2P_DISABLE=1 \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /segmentation/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | NCCL_P2P_DISABLE=1 \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/train.py \ 19 | $CONFIG \ 20 | --launcher pytorch ${@:3} 21 | -------------------------------------------------------------------------------- /segmentation/tools/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | from mmcv import Config 5 | from mmcv.cnn import get_model_complexity_info 6 | 7 | from mmseg.models import build_segmentor 8 | import sys 9 | sys.path.append("..") 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser(description='Train a segmentor') 13 | parser.add_argument('config', help='train config file path') 14 | parser.add_argument( 15 | '--shape', 16 | type=int, 17 | nargs='+', 18 | default=[512, 512], 19 | help='input image size') 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def main(): 25 | 26 | args = parse_args() 27 | 28 | if len(args.shape) == 1: 29 | input_shape = (3, args.shape[0], args.shape[0]) 30 | elif len(args.shape) == 2: 31 | input_shape = (3, ) + tuple(args.shape) 32 | else: 33 | raise ValueError('invalid input shape') 34 | 35 | cfg = Config.fromfile(args.config) 36 | cfg.model.pretrained = None 37 | model = build_segmentor( 38 | cfg.model, 39 | train_cfg=cfg.get('train_cfg'), 40 | test_cfg=cfg.get('test_cfg')).cuda() 41 | model.eval() 42 | 43 | if hasattr(model, 'forward_dummy'): 44 | model.forward = model.forward_dummy 45 | else: 46 | raise NotImplementedError( 47 | 'FLOPs counter is currently not currently supported with {}'. 48 | format(model.__class__.__name__)) 49 | 50 | flops, params = get_model_complexity_info(model, input_shape) 51 | split_line = '=' * 30 52 | print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format( 53 | split_line, input_shape, flops, params)) 54 | print('!!!Please be cautious if you use the results in papers. ' 55 | 'You may need to check if all ops are supported and verify that the ' 56 | 'flops computation is correct.') 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /segmentation/tools/model_converters/mit2mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from collections import OrderedDict 5 | 6 | import mmcv 7 | import torch 8 | from mmcv.runner import CheckpointLoader 9 | 10 | 11 | def convert_mit(ckpt): 12 | new_ckpt = OrderedDict() 13 | # Process the concat between q linear weights and kv linear weights 14 | for k, v in ckpt.items(): 15 | if k.startswith('head'): 16 | continue 17 | # patch embedding conversion 18 | elif k.startswith('patch_embed'): 19 | stage_i = int(k.split('.')[0].replace('patch_embed', '')) 20 | new_k = k.replace(f'patch_embed{stage_i}', f'layers.{stage_i-1}.0') 21 | new_v = v 22 | if 'proj.' in new_k: 23 | new_k = new_k.replace('proj.', 'projection.') 24 | # transformer encoder layer conversion 25 | elif k.startswith('block'): 26 | stage_i = int(k.split('.')[0].replace('block', '')) 27 | new_k = k.replace(f'block{stage_i}', f'layers.{stage_i-1}.1') 28 | new_v = v 29 | if 'attn.q.' in new_k: 30 | sub_item_k = k.replace('q.', 'kv.') 31 | new_k = new_k.replace('q.', 'attn.in_proj_') 32 | new_v = torch.cat([v, ckpt[sub_item_k]], dim=0) 33 | elif 'attn.kv.' in new_k: 34 | continue 35 | elif 'attn.proj.' in new_k: 36 | new_k = new_k.replace('proj.', 'attn.out_proj.') 37 | elif 'attn.sr.' in new_k: 38 | new_k = new_k.replace('sr.', 'sr.') 39 | elif 'mlp.' in new_k: 40 | string = f'{new_k}-' 41 | new_k = new_k.replace('mlp.', 'ffn.layers.') 42 | if 'fc1.weight' in new_k or 'fc2.weight' in new_k: 43 | new_v = v.reshape((*v.shape, 1, 1)) 44 | new_k = new_k.replace('fc1.', '0.') 45 | new_k = new_k.replace('dwconv.dwconv.', '1.') 46 | new_k = new_k.replace('fc2.', '4.') 47 | string += f'{new_k} {v.shape}-{new_v.shape}' 48 | # norm layer conversion 49 | elif k.startswith('norm'): 50 | stage_i = int(k.split('.')[0].replace('norm', '')) 51 | new_k = k.replace(f'norm{stage_i}', f'layers.{stage_i-1}.2') 52 | new_v = v 53 | else: 54 | new_k = k 55 | new_v = v 56 | new_ckpt[new_k] = new_v 57 | return new_ckpt 58 | 59 | 60 | def main(): 61 | parser = argparse.ArgumentParser( 62 | description='Convert keys in official pretrained segformer to ' 63 | 'MMSegmentation style.') 64 | parser.add_argument('src', help='src model path or url') 65 | # The dst path must be a full path of the new checkpoint. 66 | parser.add_argument('dst', help='save path') 67 | args = parser.parse_args() 68 | 69 | checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') 70 | if 'state_dict' in checkpoint: 71 | state_dict = checkpoint['state_dict'] 72 | elif 'model' in checkpoint: 73 | state_dict = checkpoint['model'] 74 | else: 75 | state_dict = checkpoint 76 | weight = convert_mit(state_dict) 77 | mmcv.mkdir_or_exist(osp.dirname(args.dst)) 78 | torch.save(weight, args.dst) 79 | 80 | 81 | if __name__ == '__main__': 82 | main() 83 | -------------------------------------------------------------------------------- /segmentation/tools/model_converters/swin2mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from collections import OrderedDict 5 | 6 | import mmcv 7 | import torch 8 | from mmcv.runner import CheckpointLoader 9 | 10 | 11 | def convert_swin(ckpt): 12 | new_ckpt = OrderedDict() 13 | 14 | def correct_unfold_reduction_order(x): 15 | out_channel, in_channel = x.shape 16 | x = x.reshape(out_channel, 4, in_channel // 4) 17 | x = x[:, [0, 2, 1, 3], :].transpose(1, 18 | 2).reshape(out_channel, in_channel) 19 | return x 20 | 21 | def correct_unfold_norm_order(x): 22 | in_channel = x.shape[0] 23 | x = x.reshape(4, in_channel // 4) 24 | x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel) 25 | return x 26 | 27 | for k, v in ckpt.items(): 28 | if k.startswith('head'): 29 | continue 30 | elif k.startswith('layers'): 31 | new_v = v 32 | if 'attn.' in k: 33 | new_k = k.replace('attn.', 'attn.w_msa.') 34 | elif 'mlp.' in k: 35 | if 'mlp.fc1.' in k: 36 | new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.') 37 | elif 'mlp.fc2.' in k: 38 | new_k = k.replace('mlp.fc2.', 'ffn.layers.1.') 39 | else: 40 | new_k = k.replace('mlp.', 'ffn.') 41 | elif 'downsample' in k: 42 | new_k = k 43 | if 'reduction.' in k: 44 | new_v = correct_unfold_reduction_order(v) 45 | elif 'norm.' in k: 46 | new_v = correct_unfold_norm_order(v) 47 | else: 48 | new_k = k 49 | new_k = new_k.replace('layers', 'stages', 1) 50 | elif k.startswith('patch_embed'): 51 | new_v = v 52 | if 'proj' in k: 53 | new_k = k.replace('proj', 'projection') 54 | else: 55 | new_k = k 56 | else: 57 | new_v = v 58 | new_k = k 59 | 60 | new_ckpt[new_k] = new_v 61 | 62 | return new_ckpt 63 | 64 | 65 | def main(): 66 | parser = argparse.ArgumentParser( 67 | description='Convert keys in official pretrained swin models to' 68 | 'MMSegmentation style.') 69 | parser.add_argument('src', help='src model path or url') 70 | # The dst path must be a full path of the new checkpoint. 71 | parser.add_argument('dst', help='save path') 72 | args = parser.parse_args() 73 | 74 | checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') 75 | if 'state_dict' in checkpoint: 76 | state_dict = checkpoint['state_dict'] 77 | elif 'model' in checkpoint: 78 | state_dict = checkpoint['model'] 79 | else: 80 | state_dict = checkpoint 81 | weight = convert_swin(state_dict) 82 | mmcv.mkdir_or_exist(osp.dirname(args.dst)) 83 | torch.save(weight, args.dst) 84 | 85 | 86 | if __name__ == '__main__': 87 | main() 88 | -------------------------------------------------------------------------------- /segmentation/tools/model_converters/vit2mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from collections import OrderedDict 5 | 6 | import mmcv 7 | import torch 8 | from mmcv.runner import CheckpointLoader 9 | 10 | 11 | def convert_vit(ckpt): 12 | 13 | new_ckpt = OrderedDict() 14 | 15 | for k, v in ckpt.items(): 16 | if k.startswith('head'): 17 | continue 18 | if k.startswith('norm'): 19 | new_k = k.replace('norm.', 'ln1.') 20 | elif k.startswith('patch_embed'): 21 | if 'proj' in k: 22 | new_k = k.replace('proj', 'projection') 23 | else: 24 | new_k = k 25 | elif k.startswith('blocks'): 26 | if 'norm' in k: 27 | new_k = k.replace('norm', 'ln') 28 | elif 'mlp.fc1' in k: 29 | new_k = k.replace('mlp.fc1', 'ffn.layers.0.0') 30 | elif 'mlp.fc2' in k: 31 | new_k = k.replace('mlp.fc2', 'ffn.layers.1') 32 | elif 'attn.qkv' in k: 33 | new_k = k.replace('attn.qkv.', 'attn.attn.in_proj_') 34 | elif 'attn.proj' in k: 35 | new_k = k.replace('attn.proj', 'attn.attn.out_proj') 36 | else: 37 | new_k = k 38 | new_k = new_k.replace('blocks.', 'layers.') 39 | else: 40 | new_k = k 41 | new_ckpt[new_k] = v 42 | 43 | return new_ckpt 44 | 45 | 46 | def main(): 47 | parser = argparse.ArgumentParser( 48 | description='Convert keys in timm pretrained vit models to ' 49 | 'MMSegmentation style.') 50 | parser.add_argument('src', help='src model path or url') 51 | # The dst path must be a full path of the new checkpoint. 52 | parser.add_argument('dst', help='save path') 53 | args = parser.parse_args() 54 | 55 | checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') 56 | if 'state_dict' in checkpoint: 57 | # timm checkpoint 58 | state_dict = checkpoint['state_dict'] 59 | elif 'model' in checkpoint: 60 | # deit checkpoint 61 | state_dict = checkpoint['model'] 62 | else: 63 | state_dict = checkpoint 64 | weight = convert_vit(state_dict) 65 | mmcv.mkdir_or_exist(osp.dirname(args.dst)) 66 | torch.save(weight, args.dst) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /segmentation/tools/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | from mmcv import Config, DictAction 5 | 6 | from mmseg.apis import init_segmentor 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Print the whole config') 11 | parser.add_argument('config', help='config file path') 12 | parser.add_argument( 13 | '--graph', action='store_true', help='print the models graph') 14 | parser.add_argument( 15 | '--options', nargs='+', action=DictAction, help='arguments in dict') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | cfg = Config.fromfile(args.config) 25 | if args.options is not None: 26 | cfg.merge_from_dict(args.options) 27 | print(f'Config:\n{cfg.pretty_text}') 28 | # dump config 29 | cfg.dump('example.py') 30 | # dump models graph 31 | if args.graph: 32 | model = init_segmentor(args.config, device='cpu') 33 | print(f'Model graph:\n{str(model)}') 34 | with open('example-graph.txt', 'w') as f: 35 | f.writelines(str(model)) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /segmentation/tools/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | 5 | import torch 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser( 10 | description='Process a checkpoint to be published') 11 | parser.add_argument('in_file', help='input checkpoint filename') 12 | parser.add_argument('out_file', help='output checkpoint filename') 13 | args = parser.parse_args() 14 | return args 15 | 16 | 17 | def process_checkpoint(in_file, out_file): 18 | checkpoint = torch.load(in_file, map_location='cpu') 19 | # remove optimizer for smaller file size 20 | if 'optimizer' in checkpoint: 21 | del checkpoint['optimizer'] 22 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 23 | # add the code here. 24 | torch.save(checkpoint, out_file) 25 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 26 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 27 | subprocess.Popen(['mv', out_file, final_file]) 28 | 29 | 30 | def main(): 31 | args = parse_args() 32 | process_checkpoint(args.in_file, args.out_file) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /segmentation/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-4} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /segmentation/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-8} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-12} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | PY_ARGS=${@:4} 13 | 14 | export NCCL_P2P_DISABLE=1 15 | export MASTER_PORT=13579 16 | 17 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 18 | srun -p ${PARTITION} \ 19 | --job-name=${JOB_NAME} \ 20 | --gres=gpu:${GPUS_PER_NODE} \ 21 | --ntasks=${GPUS} \ 22 | --ntasks-per-node=${GPUS_PER_NODE} \ 23 | --cpus-per-task=${CPUS_PER_TASK} \ 24 | --kill-on-bad-exit=1 \ 25 | --mem 250G \ 26 | ${SRUN_ARGS} \ 27 | python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 28 | -------------------------------------------------------------------------------- /segmentation/tools/torchserve/mmseg2torchserve.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from argparse import ArgumentParser, Namespace 3 | from pathlib import Path 4 | from tempfile import TemporaryDirectory 5 | 6 | import mmcv 7 | 8 | try: 9 | from model_archiver.model_packaging import package_model 10 | from model_archiver.model_packaging_utils import ModelExportUtils 11 | except ImportError: 12 | package_model = None 13 | 14 | 15 | def mmseg2torchserve( 16 | config_file: str, 17 | checkpoint_file: str, 18 | output_folder: str, 19 | model_name: str, 20 | model_version: str = '1.0', 21 | force: bool = False, 22 | ): 23 | """Converts mmsegmentation model (config + checkpoint) to TorchServe 24 | `.mar`. 25 | 26 | Args: 27 | config_file: 28 | In MMSegmentation config format. 29 | The contents vary for each task repository. 30 | checkpoint_file: 31 | In MMSegmentation checkpoint format. 32 | The contents vary for each task repository. 33 | output_folder: 34 | Folder where `{model_name}.mar` will be created. 35 | The file created will be in TorchServe archive format. 36 | model_name: 37 | If not None, used for naming the `{model_name}.mar` file 38 | that will be created under `output_folder`. 39 | If None, `{Path(checkpoint_file).stem}` will be used. 40 | model_version: 41 | Model's version. 42 | force: 43 | If True, if there is an existing `{model_name}.mar` 44 | file under `output_folder` it will be overwritten. 45 | """ 46 | mmcv.mkdir_or_exist(output_folder) 47 | 48 | config = mmcv.Config.fromfile(config_file) 49 | 50 | with TemporaryDirectory() as tmpdir: 51 | config.dump(f'{tmpdir}/config.py') 52 | 53 | args = Namespace( 54 | **{ 55 | 'model_file': f'{tmpdir}/config.py', 56 | 'serialized_file': checkpoint_file, 57 | 'handler': f'{Path(__file__).parent}/mmseg_handler.py', 58 | 'model_name': model_name or Path(checkpoint_file).stem, 59 | 'version': model_version, 60 | 'export_path': output_folder, 61 | 'force': force, 62 | 'requirements_file': None, 63 | 'extra_files': None, 64 | 'runtime': 'python', 65 | 'archive_format': 'default' 66 | }) 67 | manifest = ModelExportUtils.generate_manifest_json(args) 68 | package_model(args, manifest) 69 | 70 | 71 | def parse_args(): 72 | parser = ArgumentParser( 73 | description='Convert mmseg models to TorchServe `.mar` format.') 74 | parser.add_argument('config', type=str, help='config file path') 75 | parser.add_argument('checkpoint', type=str, help='checkpoint file path') 76 | parser.add_argument( 77 | '--output-folder', 78 | type=str, 79 | required=True, 80 | help='Folder where `{model_name}.mar` will be created.') 81 | parser.add_argument( 82 | '--model-name', 83 | type=str, 84 | default=None, 85 | help='If not None, used for naming the `{model_name}.mar`' 86 | 'file that will be created under `output_folder`.' 87 | 'If None, `{Path(checkpoint_file).stem}` will be used.') 88 | parser.add_argument( 89 | '--model-version', 90 | type=str, 91 | default='1.0', 92 | help='Number used for versioning.') 93 | parser.add_argument( 94 | '-f', 95 | '--force', 96 | action='store_true', 97 | help='overwrite the existing `{model_name}.mar`') 98 | args = parser.parse_args() 99 | 100 | return args 101 | 102 | 103 | if __name__ == '__main__': 104 | args = parse_args() 105 | 106 | if package_model is None: 107 | raise ImportError('`torch-model-archiver` is required.' 108 | 'Try: pip install torch-model-archiver') 109 | 110 | mmseg2torchserve(args.config, args.checkpoint, args.output_folder, 111 | args.model_name, args.model_version, args.force) 112 | -------------------------------------------------------------------------------- /segmentation/tools/torchserve/mmseg_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import base64 3 | import os 4 | 5 | import cv2 6 | import mmcv 7 | import torch 8 | from mmcv.cnn.utils.sync_bn import revert_sync_batchnorm 9 | from ts.torch_handler.base_handler import BaseHandler 10 | 11 | from mmseg.apis import inference_segmentor, init_segmentor 12 | 13 | 14 | class MMsegHandler(BaseHandler): 15 | 16 | def initialize(self, context): 17 | properties = context.system_properties 18 | self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu' 19 | self.device = torch.device(self.map_location + ':' + 20 | str(properties.get('gpu_id')) if torch.cuda. 21 | is_available() else self.map_location) 22 | self.manifest = context.manifest 23 | 24 | model_dir = properties.get('model_dir') 25 | serialized_file = self.manifest['model']['serializedFile'] 26 | checkpoint = os.path.join(model_dir, serialized_file) 27 | self.config_file = os.path.join(model_dir, 'config.py') 28 | 29 | self.model = init_segmentor(self.config_file, checkpoint, self.device) 30 | self.model = revert_sync_batchnorm(self.model) 31 | self.initialized = True 32 | 33 | def preprocess(self, data): 34 | images = [] 35 | 36 | for row in data: 37 | image = row.get('data') or row.get('body') 38 | if isinstance(image, str): 39 | image = base64.b64decode(image) 40 | image = mmcv.imfrombytes(image) 41 | images.append(image) 42 | 43 | return images 44 | 45 | def inference(self, data, *args, **kwargs): 46 | results = [inference_segmentor(self.model, img) for img in data] 47 | return results 48 | 49 | def postprocess(self, data): 50 | output = [] 51 | 52 | for image_result in data: 53 | _, buffer = cv2.imencode('.png', image_result[0].astype('uint8')) 54 | content = buffer.tobytes() 55 | output.append(content) 56 | return output 57 | -------------------------------------------------------------------------------- /segmentation/tools/torchserve/test_torchserve.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from io import BytesIO 3 | 4 | import matplotlib.pyplot as plt 5 | import mmcv 6 | import requests 7 | 8 | from mmseg.apis import inference_segmentor, init_segmentor 9 | 10 | 11 | def parse_args(): 12 | parser = ArgumentParser( 13 | description='Compare result of torchserve and pytorch,' 14 | 'and visualize them.') 15 | parser.add_argument('img', help='Image file') 16 | parser.add_argument('config', help='Config file') 17 | parser.add_argument('checkpoint', help='Checkpoint file') 18 | parser.add_argument('model_name', help='The model name in the server') 19 | parser.add_argument( 20 | '--inference-addr', 21 | default='127.0.0.1:8080', 22 | help='Address and port of the inference server') 23 | parser.add_argument( 24 | '--result-image', 25 | type=str, 26 | default=None, 27 | help='save server output in result-image') 28 | parser.add_argument( 29 | '--device', default='cuda:0', help='Device used for inference') 30 | 31 | args = parser.parse_args() 32 | return args 33 | 34 | 35 | def main(args): 36 | url = 'http://' + args.inference_addr + '/predictions/' + args.model_name 37 | with open(args.img, 'rb') as image: 38 | tmp_res = requests.post(url, image) 39 | content = tmp_res.content 40 | if args.result_image: 41 | with open(args.result_image, 'wb') as out_image: 42 | out_image.write(content) 43 | plt.imshow(mmcv.imread(args.result_image, 'grayscale')) 44 | plt.show() 45 | else: 46 | plt.imshow(plt.imread(BytesIO(content))) 47 | plt.show() 48 | model = init_segmentor(args.config, args.checkpoint, args.device) 49 | image = mmcv.imread(args.img) 50 | result = inference_segmentor(model, image) 51 | plt.imshow(result[0]) 52 | plt.show() 53 | 54 | 55 | if __name__ == '__main__': 56 | args = parse_args() 57 | main(args) 58 | -------------------------------------------------------------------------------- /segmentation/train.sh: -------------------------------------------------------------------------------- 1 | ./tools/dist_train.sh configs/sem_fpn/fpn_lsnet_t_ade20k_40k.py 8 --seed 0 --deterministic 2 | -------------------------------------------------------------------------------- /speed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | from timm import create_model 4 | import model.build 5 | import utils 6 | from argparse import ArgumentParser 7 | 8 | torch.autograd.set_grad_enabled(False) 9 | 10 | T0 = 5 11 | T1 = 10 12 | 13 | def compute_throughput(model, device, batch_size, resolution=224): 14 | inputs = torch.randn(batch_size, 3, resolution, resolution, device=device) 15 | torch.cuda.empty_cache() 16 | torch.cuda.synchronize() 17 | start = time.time() 18 | while time.time() - start < T0: 19 | model(inputs) 20 | timing = [] 21 | torch.cuda.synchronize() 22 | while sum(timing) < T1: 23 | start = time.time() 24 | model(inputs) 25 | torch.cuda.synchronize() 26 | timing.append(time.time() - start) 27 | timing = torch.as_tensor(timing, dtype=torch.float32) 28 | print(batch_size / timing.mean().item(), 29 | 'images/s @ batch size', batch_size) 30 | 31 | if __name__ == "__main__": 32 | parser = ArgumentParser() 33 | parser.add_argument("--model", default="lsnet_t", type=str) 34 | parser.add_argument("--batch-size", default=2048, type=int) 35 | parser.add_argument("--resolution", default=224, type=int) 36 | parser.add_argument("--device", default=0, type=int) 37 | 38 | args = parser.parse_args() 39 | model = args.model 40 | batch_size = args.batch_size 41 | resolution = args.resolution 42 | device = args.device 43 | torch.cuda.set_device(device) 44 | 45 | torch.cuda.empty_cache() 46 | model = create_model(model, num_classes=1000) 47 | utils.replace_batchnorm(model) 48 | model.to(device) 49 | model.eval() 50 | compute_throughput(model, device, 51 | batch_size, resolution=resolution) 52 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | NCCL_P2P_DISABLE=1 python -m torch.distributed.launch --nproc_per_node=8 --master_port 12345 --use_env main.py --model lsnet_t --data-path ~/imagenet --dist-eval --------------------------------------------------------------------------------