├── .gitignore
├── README.md
├── README_robustness.md
├── data
    ├── __init__.py
    ├── datasets.py
    ├── samplers.py
    └── threeaugment.py
├── detection
    ├── .gitignore
    ├── README.md
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── cityscapes_detection.py
    │   │   │   ├── cityscapes_instance.py
    │   │   │   ├── coco_detection.py
    │   │   │   ├── coco_instance.py
    │   │   │   ├── coco_instance_semantic.py
    │   │   │   ├── deepfashion.py
    │   │   │   ├── lvis_v0.5_instance.py
    │   │   │   ├── lvis_v1_instance.py
    │   │   │   ├── voc0712.py
    │   │   │   └── wider_face.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   │   ├── cascade_mask_rcnn_swin_fpn.py
    │   │   │   ├── cascade_rcnn_r50_fpn.py
    │   │   │   ├── fast_rcnn_r50_fpn.py
    │   │   │   ├── faster_rcnn_r50_caffe_c4.py
    │   │   │   ├── faster_rcnn_r50_caffe_dc5.py
    │   │   │   ├── faster_rcnn_r50_fpn.py
    │   │   │   ├── mask_rcnn_lsnet_fpn.py
    │   │   │   ├── mask_rcnn_r50_caffe_c4.py
    │   │   │   ├── mask_rcnn_r50_fpn.py
    │   │   │   ├── mask_rcnn_swin_fpn.py
    │   │   │   ├── mask_reppointsv2_swin_bifpn.py
    │   │   │   ├── reppointsv2_swin_bifpn.py
    │   │   │   ├── retinanet_lsnet_fpn.py
    │   │   │   ├── retinanet_r50_fpn.py
    │   │   │   ├── rpn_r50_caffe_c4.py
    │   │   │   ├── rpn_r50_fpn.py
    │   │   │   └── ssd300.py
    │   │   └── schedules
    │   │   │   ├── schedule_1x.py
    │   │   │   ├── schedule_20e.py
    │   │   │   └── schedule_2x.py
    │   ├── mask_rcnn_lsnet_b_fpn_1x_coco.py
    │   ├── mask_rcnn_lsnet_s_fpn_1x_coco.py
    │   ├── mask_rcnn_lsnet_t_fpn_1x_coco.py
    │   ├── retinanet_lsnet_b_fpn_1x_coco.py
    │   ├── retinanet_lsnet_s_fpn_1x_coco.py
    │   └── retinanet_lsnet_t_fpn_1x_coco.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── eval.sh
    ├── logs
    │   ├── lsnet_b_maskrcnn.json
    │   ├── lsnet_b_retinanet.json
    │   ├── lsnet_s_maskrcnn.json
    │   ├── lsnet_s_retinanet.json
    │   ├── lsnet_t_maskrcnn.json
    │   └── lsnet_t_retinanet.json
    ├── mmcv_custom
    │   ├── __init__.py
    │   ├── checkpoint.py
    │   └── runner
    │   │   ├── __init__.py
    │   │   ├── checkpoint.py
    │   │   ├── epoch_based_runner.py
    │   │   └── optimizer.py
    ├── mmdet_custom
    │   └── apis
    │   │   └── train.py
    ├── model
    │   ├── lsnet.py
    │   ├── lsnet_fpn.py
    │   └── ska.py
    ├── test.py
    ├── train.py
    └── train.sh
├── engine.py
├── eval.sh
├── eval_robust.sh
├── figures
    └── throughput.svg
├── flops.py
├── logs
    ├── lsnet_b.log
    ├── lsnet_b_distill.log
    ├── lsnet_s.log
    ├── lsnet_s_distill.log
    ├── lsnet_t.log
    └── lsnet_t_distill.log
├── losses.py
├── main.py
├── model
    ├── __init__.py
    ├── build.py
    ├── lsnet.py
    └── ska.py
├── requirements.txt
├── robust.py
├── robust_utils.py
├── segmentation
    ├── .gitignore
    ├── README.md
    ├── align_resize.py
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   └── ade20k.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   └── fpn_r50.py
    │   │   └── schedules
    │   │   │   ├── schedule_160k.py
    │   │   │   ├── schedule_20k.py
    │   │   │   ├── schedule_40k.py
    │   │   │   └── schedule_80k.py
    │   └── sem_fpn
    │   │   ├── fpn_lsnet_b_ade20k_40k.py
    │   │   ├── fpn_lsnet_s_ade20k_40k.py
    │   │   └── fpn_lsnet_t_ade20k_40k.py
    ├── eval.sh
    ├── logs
    │   ├── lsnet_b_semfpn.json
    │   ├── lsnet_s_semfpn.json
    │   └── lsnet_t_semfpn.json
    ├── mmcv_custom
    │   ├── __init__.py
    │   ├── checkpoint.py
    │   └── runner
    │   │   ├── __init__.py
    │   │   ├── checkpoint.py
    │   │   ├── epoch_based_runner.py
    │   │   └── optimizer.py
    ├── model
    │   ├── lsnet.py
    │   ├── lsnet_fpn.py
    │   └── ska.py
    ├── tools
    │   ├── analyze_logs.py
    │   ├── benchmark.py
    │   ├── browse_dataset.py
    │   ├── convert_datasets
    │   │   ├── chase_db1.py
    │   │   ├── cityscapes.py
    │   │   ├── coco_stuff10k.py
    │   │   ├── coco_stuff164k.py
    │   │   ├── drive.py
    │   │   ├── hrf.py
    │   │   ├── pascal_context.py
    │   │   ├── stare.py
    │   │   └── voc_aug.py
    │   ├── deploy_test.py
    │   ├── dist_test.sh
    │   ├── dist_train.sh
    │   ├── get_flops.py
    │   ├── model_converters
    │   │   ├── mit2mmseg.py
    │   │   ├── swin2mmseg.py
    │   │   └── vit2mmseg.py
    │   ├── onnx2tensorrt.py
    │   ├── print_config.py
    │   ├── publish_model.py
    │   ├── pytorch2onnx.py
    │   ├── pytorch2torchscript.py
    │   ├── slurm_test.sh
    │   ├── slurm_train.sh
    │   ├── test.py
    │   ├── torchserve
    │   │   ├── mmseg2torchserve.py
    │   │   ├── mmseg_handler.py
    │   │   └── test_torchserve.py
    │   ├── train.py
    │   └── vis.py
    └── train.sh
├── speed.py
├── train.sh
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .test/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 
140 | **/checkpoints
141 | 
142 | wandb
143 | pretrain
144 | *.whl
145 | ignore


--------------------------------------------------------------------------------
/README_robustness.md:
--------------------------------------------------------------------------------
 1 | # Robustness Evaluation
 2 | 
 3 | ## Models
 4 | | Model | ImageNet-C | ImageNet-A | ImageNet-R | ImageNet-Sketch |
 5 | |:-:|:-:|:-:|:-:|:-:|
 6 | | [LSNet-T](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_t.pth) | 68.2 | 6.7  | 38.5 | 25.5 |
 7 | | [LSNet-S](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_s.pth) | 65.7 | 9.6  | 39.4 | 27.5 |
 8 | | [LSNet-B](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_b.pth) | 59.3 | 17.3 | 43.1 | 30.7 |
 9 | 
10 | ## Data preparation
11 | 
12 | Please download and prepare ImageNet-C, ImageNet-A, ImageNet-R, ImageNet-Sketch datasets. 
13 | 
14 | ## Testing
15 | ```bash
16 | set -e
17 | set -x
18 | 
19 | MODEL=lsnet_t
20 | CKPT=pretrain/lsnet_t.pth
21 | INPUT=224
22 | 
23 | # Optional for mirror
24 | # export HF_ENDPOINT=https://hf-mirror.com
25 | 
26 | python main.py --eval --model ${MODEL} --resume ${CKPT} --data-path ~/imagenet \
27 | --inc_path ~/datasets/OpenDataLab___ImageNet-C/raw \
28 | --insk_path ~/datasets/OpenDataLab___ImageNet-Sketch/raw/sketch \
29 | --ina_path ~/datasets/OpenDataLab___ImageNet-A/raw/imagenet-a \
30 | --inr_path ~/datasets/OpenDataLab___ImageNet-R/raw/imagenet-r \
31 | --batch-size 512 \
32 | --input-size ${INPUT}
33 | ```
34 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THU-MIG/lsnet/cbe737c92b7c43ecf02d08545a07f03f1010177c/data/__init__.py


--------------------------------------------------------------------------------
/data/datasets.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Build trainining/testing datasets
  3 | '''
  4 | import os
  5 | import json
  6 | 
  7 | from torchvision import datasets, transforms
  8 | from torchvision.datasets.folder import ImageFolder, default_loader
  9 | import torch
 10 | 
 11 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
 12 | from timm.data import create_transform
 13 | 
 14 | try:
 15 |     from timm.data import TimmDatasetTar
 16 | except ImportError:
 17 |     # for higher version of timm
 18 |     from timm.data import ImageDataset as TimmDatasetTar
 19 | 
 20 | class INatDataset(ImageFolder):
 21 |     def __init__(self, root, train=True, year=2018, transform=None, target_transform=None,
 22 |                  category='name', loader=default_loader):
 23 |         self.transform = transform
 24 |         self.loader = loader
 25 |         self.target_transform = target_transform
 26 |         self.year = year
 27 |         # assert category in ['kingdom','phylum','class','order','supercategory','family','genus','name']
 28 |         path_json = os.path.join(
 29 |             root, f'{"train" if train else "val"}{year}.json')
 30 |         with open(path_json) as json_file:
 31 |             data = json.load(json_file)
 32 | 
 33 |         with open(os.path.join(root, 'categories.json')) as json_file:
 34 |             data_catg = json.load(json_file)
 35 | 
 36 |         path_json_for_targeter = os.path.join(root, f"train{year}.json")
 37 | 
 38 |         with open(path_json_for_targeter) as json_file:
 39 |             data_for_targeter = json.load(json_file)
 40 | 
 41 |         targeter = {}
 42 |         indexer = 0
 43 |         for elem in data_for_targeter['annotations']:
 44 |             king = []
 45 |             king.append(data_catg[int(elem['category_id'])][category])
 46 |             if king[0] not in targeter.keys():
 47 |                 targeter[king[0]] = indexer
 48 |                 indexer += 1
 49 |         self.nb_classes = len(targeter)
 50 | 
 51 |         self.samples = []
 52 |         for elem in data['images']:
 53 |             cut = elem['file_name'].split('/')
 54 |             target_current = int(cut[2])
 55 |             path_current = os.path.join(root, cut[0], cut[2], cut[3])
 56 | 
 57 |             categors = data_catg[target_current]
 58 |             target_current_true = targeter[categors[category]]
 59 |             self.samples.append((path_current, target_current_true))
 60 | 
 61 |     # __getitem__ and __len__ inherited from ImageFolder
 62 | 
 63 | 
 64 | def build_dataset(is_train, args):
 65 |     transform = build_transform(is_train, args)
 66 | 
 67 |     if args.data_set == 'CIFAR':
 68 |         dataset = datasets.CIFAR100(
 69 |             args.data_path, train=is_train, transform=transform)
 70 |         nb_classes = 100
 71 |     elif args.data_set == 'IMNET':
 72 |         prefix = 'train' if is_train else 'val'
 73 |         data_dir = os.path.join(args.data_path, f'{prefix}.tar')
 74 |         if os.path.exists(data_dir):
 75 |             dataset = TimmDatasetTar(data_dir, transform=transform)
 76 |         else:
 77 |             root = os.path.join(args.data_path, 'train' if is_train else 'val')
 78 |             dataset = datasets.ImageFolder(root, transform=transform)
 79 |         nb_classes = 1000
 80 |     elif args.data_set == 'IMNETEE':
 81 |         root = os.path.join(args.data_path, 'train' if is_train else 'val')
 82 |         dataset = datasets.ImageFolder(root, transform=transform)
 83 |         nb_classes = 10
 84 |     elif args.data_set == 'FLOWERS':
 85 |         root = os.path.join(args.data_path, 'train' if is_train else 'test')
 86 |         dataset = datasets.ImageFolder(root, transform=transform)
 87 |         if is_train:
 88 |             dataset = torch.utils.data.ConcatDataset(
 89 |                 [dataset for _ in range(100)])
 90 |         nb_classes = 102
 91 |     elif args.data_set == 'INAT':
 92 |         dataset = INatDataset(args.data_path, train=is_train, year=2018,
 93 |                               category=args.inat_category, transform=transform)
 94 |         nb_classes = dataset.nb_classes
 95 |     elif args.data_set == 'INAT19':
 96 |         dataset = INatDataset(args.data_path, train=is_train, year=2019,
 97 |                               category=args.inat_category, transform=transform)
 98 |         nb_classes = dataset.nb_classes
 99 |     return dataset, nb_classes
100 | 
101 | 
102 | def build_transform(is_train, args):
103 |     resize_im = args.input_size > 32
104 |     if is_train:
105 |         # this should always dispatch to transforms_imagenet_train
106 |         transform = create_transform(
107 |             input_size=args.input_size,
108 |             is_training=True,
109 |             color_jitter=args.color_jitter,
110 |             auto_augment=args.aa,
111 |             interpolation=args.train_interpolation,
112 |             re_prob=args.reprob,
113 |             re_mode=args.remode,
114 |             re_count=args.recount,
115 |         )
116 |         if not resize_im:
117 |             # replace RandomResizedCropAndInterpolation with
118 |             # RandomCrop
119 |             transform.transforms[0] = transforms.RandomCrop(
120 |                 args.input_size, padding=4)
121 |         return transform
122 | 
123 |     t = []
124 |     if args.finetune:
125 |         t.append(
126 |             transforms.Resize((args.input_size, args.input_size),
127 |                                 interpolation=3)
128 |         )
129 |     else:
130 |         if resize_im:
131 |             size = int((256 / 224) * args.input_size)
132 |             t.append(
133 |                 # to maintain same ratio w.r.t. 224 images
134 |                 transforms.Resize(size, interpolation=3),
135 |             )
136 |             t.append(transforms.CenterCrop(args.input_size))
137 |     
138 |     t.append(transforms.ToTensor())
139 |     t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD))
140 |     return transforms.Compose(t)
141 | 


--------------------------------------------------------------------------------
/data/samplers.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Build samplers for data loading
 3 | '''
 4 | import torch
 5 | import torch.distributed as dist
 6 | import math
 7 | 
 8 | 
 9 | class RASampler(torch.utils.data.Sampler):
10 |     """Sampler that restricts data loading to a subset of the dataset for distributed,
11 |     with repeated augmentation.
12 |     It ensures that different each augmented version of a sample will be visible to a
13 |     different process (GPU)
14 |     Heavily based on torch.utils.data.DistributedSampler
15 |     """
16 | 
17 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
18 |         if num_replicas is None:
19 |             if not dist.is_available():
20 |                 raise RuntimeError(
21 |                     "Requires distributed package to be available")
22 |             num_replicas = dist.get_world_size()
23 |         if rank is None:
24 |             if not dist.is_available():
25 |                 raise RuntimeError(
26 |                     "Requires distributed package to be available")
27 |             rank = dist.get_rank()
28 |         self.dataset = dataset
29 |         self.num_replicas = num_replicas
30 |         self.rank = rank
31 |         self.epoch = 0
32 |         self.num_samples = int(
33 |             math.ceil(len(self.dataset) * 3.0 / self.num_replicas))
34 |         self.total_size = self.num_samples * self.num_replicas
35 |         # self.num_selected_samples = int(math.ceil(len(self.dataset) / self.num_replicas))
36 |         self.num_selected_samples = int(math.floor(
37 |             len(self.dataset) // 256 * 256 / self.num_replicas))
38 |         self.shuffle = shuffle
39 | 
40 |     def __iter__(self):
41 |         # deterministically shuffle based on epoch
42 |         g = torch.Generator()
43 |         g.manual_seed(self.epoch)
44 |         if self.shuffle:
45 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
46 |         else:
47 |             indices = list(range(len(self.dataset)))
48 | 
49 |         # add extra samples to make it evenly divisible
50 |         indices = [ele for ele in indices for i in range(3)]
51 |         indices += indices[:(self.total_size - len(indices))]
52 |         assert len(indices) == self.total_size
53 | 
54 |         # subsample
55 |         indices = indices[self.rank:self.total_size:self.num_replicas]
56 |         assert len(indices) == self.num_samples
57 | 
58 |         return iter(indices[:self.num_selected_samples])
59 | 
60 |     def __len__(self):
61 |         return self.num_selected_samples
62 | 
63 |     def set_epoch(self, epoch):
64 |         self.epoch = epoch
65 | 


--------------------------------------------------------------------------------
/data/threeaugment.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 3Augment implementation from (https://github.com/facebookresearch/deit/blob/main/augment.py)
  3 | Data-augmentation (DA) based on dino DA (https://github.com/facebookresearch/dino)
  4 | and timm DA(https://github.com/rwightman/pytorch-image-models)
  5 | Can be called by adding "--ThreeAugment" to the command line
  6 | """
  7 | import torch
  8 | from torchvision import transforms
  9 | 
 10 | from timm.data.transforms import str_to_pil_interp, RandomResizedCropAndInterpolation, ToNumpy, ToTensor
 11 | 
 12 | import numpy as np
 13 | from torchvision import datasets, transforms
 14 | import random
 15 | 
 16 | 
 17 | 
 18 | from PIL import ImageFilter, ImageOps
 19 | import torchvision.transforms.functional as TF
 20 | 
 21 | 
 22 | class GaussianBlur(object):
 23 |     """
 24 |     Apply Gaussian Blur to the PIL image.
 25 |     """
 26 |     def __init__(self, p=0.1, radius_min=0.1, radius_max=2.):
 27 |         self.prob = p
 28 |         self.radius_min = radius_min
 29 |         self.radius_max = radius_max
 30 | 
 31 |     def __call__(self, img):
 32 |         do_it = random.random() <= self.prob
 33 |         if not do_it:
 34 |             return img
 35 | 
 36 |         img = img.filter(
 37 |             ImageFilter.GaussianBlur(
 38 |                 radius=random.uniform(self.radius_min, self.radius_max)
 39 |             )
 40 |         )
 41 |         return img
 42 | 
 43 | class Solarization(object):
 44 |     """
 45 |     Apply Solarization to the PIL image.
 46 |     """
 47 |     def __init__(self, p=0.2):
 48 |         self.p = p
 49 | 
 50 |     def __call__(self, img):
 51 |         if random.random() < self.p:
 52 |             return ImageOps.solarize(img)
 53 |         else:
 54 |             return img
 55 | 
 56 | class gray_scale(object):
 57 |     """
 58 |     Apply Solarization to the PIL image.
 59 |     """
 60 |     def __init__(self, p=0.2):
 61 |         self.p = p
 62 |         self.transf = transforms.Grayscale(3)
 63 |  
 64 |     def __call__(self, img):
 65 |         if random.random() < self.p:
 66 |             return self.transf(img)
 67 |         else:
 68 |             return img
 69 |  
 70 |     
 71 |     
 72 | class horizontal_flip(object):
 73 |     """
 74 |     Apply Solarization to the PIL image.
 75 |     """
 76 |     def __init__(self, p=0.2,activate_pred=False):
 77 |         self.p = p
 78 |         self.transf = transforms.RandomHorizontalFlip(p=1.0)
 79 |  
 80 |     def __call__(self, img):
 81 |         if random.random() < self.p:
 82 |             return self.transf(img)
 83 |         else:
 84 |             return img
 85 |         
 86 |     
 87 |     
 88 | def new_data_aug_generator(args = None):
 89 |     img_size = args.input_size
 90 |     remove_random_resized_crop = False
 91 |     mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
 92 |     primary_tfl = []
 93 |     scale=(0.08, 1.0)
 94 |     interpolation='bicubic'
 95 |     if remove_random_resized_crop:
 96 |         primary_tfl = [
 97 |             transforms.Resize(img_size, interpolation=3),
 98 |             transforms.RandomCrop(img_size, padding=4,padding_mode='reflect'),
 99 |             transforms.RandomHorizontalFlip()
100 |         ]
101 |     else:
102 |         primary_tfl = [
103 |             RandomResizedCropAndInterpolation(
104 |                 img_size, scale=scale, interpolation=interpolation),
105 |             transforms.RandomHorizontalFlip()
106 |         ]
107 | 
108 |         
109 |     secondary_tfl = [transforms.RandomChoice([gray_scale(p=1.0),
110 |                                               Solarization(p=1.0),
111 |                                               GaussianBlur(p=1.0)])]
112 |    
113 |     if args.color_jitter is not None and not args.color_jitter==0:
114 |         secondary_tfl.append(transforms.ColorJitter(args.color_jitter, args.color_jitter, args.color_jitter))
115 |     final_tfl = [
116 |             transforms.ToTensor(),
117 |             transforms.Normalize(
118 |                 mean=torch.tensor(mean),
119 |                 std=torch.tensor(std))
120 |         ]
121 |     return transforms.Compose(primary_tfl+secondary_tfl+final_tfl)
122 | 


--------------------------------------------------------------------------------
/detection/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 | pretrain
3 | work_dirs
4 | results.pkl


--------------------------------------------------------------------------------
/detection/README.md:
--------------------------------------------------------------------------------
 1 | # Object Detection and Instance Segmentation
 2 | 
 3 | Detection and instance segmentation on MS COCO 2017 is implemented based on [MMDetection](https://github.com/open-mmlab/mmdetection).
 4 | 
 5 | ## Models
 6 | Results with RetinaNet
 7 | | Model | $AP$ | $AP_{50}$ | $AP_{75}$ | $AP_S$ | $AP_M$ | $AP_L$ | Log |
 8 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
 9 | | [LSNet-T](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_t_retinanet.pth) | 34.2 | 54.6 | 35.2 | 17.8 | 37.1 | 48.5 | [lsnet_t_retinanet.json](./logs/lsnet_t_retinanet.json) |
10 | | [LSNet-S](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_s_retinanet.pth) | 36.5 | 57.3 | 38.1 | 20.3 | 39.5 | 51.0 | [lsnet_s_retinanet.json](./logs/lsnet_s_retinanet.json) |
11 | | [LSNet-B](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_b_retinanet.pth) | 39.2 | 60.0 | 41.5 | 22.1 | 43.0 | 52.9 | [lsnet_b_retinanet.json](./logs/lsnet_b_retinanet.json) |
12 | 
13 | Results with MaskR-CNN
14 | | Model | $AP^b$ | $AP_{50}^b$ | $AP_{75}^b$ | $AP^m$ | $AP_{50}^m$ | $AP_{75}^m$ | Log |
15 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
16 | | [LSNet-T](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_t_maskrcnn.pth) | 35.0 | 57.0 | 37.3 | 32.7 | 53.8 | 34.3 | [lsnet_t_maskrcnn.json](./logs/lsnet_t_maskrcnn.json)  |
17 | | [LSNet-S](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_s_maskrcnn.pth) | 37.4 | 59.9 | 39.8 | 34.8 | 56.8 | 36.6 | [lsnet_s_maskrcnn.json](./logs/lsnet_s_maskrcnn.json)  |
18 | | [LSNet-B](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_b_maskrcnn.pth) | 40.8 | 63.4 | 44.0 | 37.8 | 60.5 | 40.1 | [lsnet_b_maskrcnn.json](./logs/lsnet_b_maskrcnn.json)  |
19 | 
20 | ## Installation
21 | ```bash
22 | pip install mmcv-full==1.7.2
23 | pip install mmdet==2.28.2
24 | # Please replace line 160 in anaconda3/envs/seg/lib/python3.10/site-packages/mmcv/parallel/distributed.py to module_to_run = self.module
25 | # Please patch mmcv following https://github.com/HarborYuan/mmcv_16/commit/ad1a72fe0cbeead2716706ff618dfa0269d2cf4c
26 | ```
27 | 
28 | ## Data preparation
29 | 
30 | Please prepare COCO 2017 dataset according to the [instructions in MMDetection](https://github.com/open-mmlab/mmdetection/blob/master/docs/en/1_exist_data_model.md#test-existing-models-on-standard-datasets).
31 | The dataset should be organized as 
32 | ```
33 | detection
34 | ├── data
35 | │   ├── coco
36 | │   │   ├── annotations
37 | │   │   ├── train2017
38 | │   │   ├── val2017
39 | │   │   ├── test2017
40 | ```
41 | 
42 | ## Testing
43 | For RetinaNet
44 | ```bash
45 | bash ./dist_test.sh configs/retinanet_lsnet_b_fpn_1x_coco.py pretrain/lsnet_b_retinanet.pth 8 --eval bbox --out results.pkl
46 | ```
47 | For Mask R-CNN
48 | ```bash
49 | bash ./dist_test.sh configs/mask_rcnn_lsnet_b_fpn_1x_coco.py pretrain/lsnet_b_maskrcnn.pth 8 --eval bbox segm --out results.pkl
50 | ```
51 | 
52 | ## Training
53 | Download ImageNet-1K pretrained weights into `./pretrain` 
54 | For RetinaNet
55 | ```bash
56 | bash ./dist_train.sh configs/retinanet_lsnet_b_fpn_1x_coco.py 8
57 | ```
58 | For Mask R-CNN
59 | ```bash
60 | bash ./dist_train.sh configs/mask_rcnn_lsnet_b_fpn_1x_coco.py 8
61 | ```
62 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesDataset'
 2 | data_root = 'data/cityscapes/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(
 9 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(2048, 1024),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=1,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=8,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=data_root +
40 |             'annotations/instancesonly_filtered_gtFine_train.json',
41 |             img_prefix=data_root + 'leftImg8bit/train/',
42 |             pipeline=train_pipeline)),
43 |     val=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root +
46 |         'annotations/instancesonly_filtered_gtFine_val.json',
47 |         img_prefix=data_root + 'leftImg8bit/val/',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         ann_file=data_root +
52 |         'annotations/instancesonly_filtered_gtFine_test.json',
53 |         img_prefix=data_root + 'leftImg8bit/test/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(interval=1, metric='bbox')
56 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesDataset'
 2 | data_root = 'data/cityscapes/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 8 |     dict(
 9 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(2048, 1024),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=1,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=8,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=data_root +
40 |             'annotations/instancesonly_filtered_gtFine_train.json',
41 |             img_prefix=data_root + 'leftImg8bit/train/',
42 |             pipeline=train_pipeline)),
43 |     val=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root +
46 |         'annotations/instancesonly_filtered_gtFine_val.json',
47 |         img_prefix=data_root + 'leftImg8bit/val/',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         ann_file=data_root +
52 |         'annotations/instancesonly_filtered_gtFine_test.json',
53 |         img_prefix=data_root + 'leftImg8bit/test/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(metric=['bbox', 'segm'])
56 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
14 | ]
15 | test_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(
18 |         type='MultiScaleFlipAug',
19 |         img_scale=(1333, 800),
20 |         flip=False,
21 |         transforms=[
22 |             dict(type='Resize', keep_ratio=True),
23 |             dict(type='RandomFlip'),
24 |             dict(type='Normalize', **img_norm_cfg),
25 |             dict(type='Pad', size_divisor=32),
26 |             dict(type='ImageToTensor', keys=['img']),
27 |             dict(type='Collect', keys=['img']),
28 |         ])
29 | ]
30 | data = dict(
31 |     samples_per_gpu=2,
32 |     workers_per_gpu=2,
33 |     train=dict(
34 |         type=dataset_type,
35 |         ann_file=data_root + 'annotations/instances_train2017.json',
36 |         img_prefix=data_root + 'train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_val2017.json',
41 |         img_prefix=data_root + 'val2017/',
42 |         pipeline=test_pipeline),
43 |     test=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline))
48 | evaluation = dict(interval=1, metric='bbox')
49 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 8 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
14 | ]
15 | test_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(
18 |         type='MultiScaleFlipAug',
19 |         img_scale=(1333, 800),
20 |         flip=False,
21 |         transforms=[
22 |             dict(type='Resize', keep_ratio=True),
23 |             dict(type='RandomFlip'),
24 |             dict(type='Normalize', **img_norm_cfg),
25 |             dict(type='Pad', size_divisor=32),
26 |             dict(type='ImageToTensor', keys=['img']),
27 |             dict(type='Collect', keys=['img']),
28 |         ])
29 | ]
30 | data = dict(
31 |     samples_per_gpu=2,
32 |     workers_per_gpu=2,
33 |     train=dict(
34 |         type=dataset_type,
35 |         ann_file=data_root + 'annotations/instances_train2017.json',
36 |         img_prefix=data_root + 'train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_val2017.json',
41 |         img_prefix=data_root + 'val2017/',
42 |         pipeline=test_pipeline),
43 |     test=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline))
48 | evaluation = dict(metric=['bbox', 'segm'])
49 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/coco_instance_semantic.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(
 8 |         type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='SegRescale', scale_factor=1 / 8),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(
16 |         type='Collect',
17 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(1333, 800),
24 |         flip=False,
25 |         transforms=[
26 |             dict(type='Resize', keep_ratio=True),
27 |             dict(type='RandomFlip', flip_ratio=0.5),
28 |             dict(type='Normalize', **img_norm_cfg),
29 |             dict(type='Pad', size_divisor=32),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=2,
36 |     workers_per_gpu=2,
37 |     train=dict(
38 |         type=dataset_type,
39 |         ann_file=data_root + 'annotations/instances_train2017.json',
40 |         img_prefix=data_root + 'train2017/',
41 |         seg_prefix=data_root + 'stuffthingmaps/train2017/',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline),
48 |     test=dict(
49 |         type=dataset_type,
50 |         ann_file=data_root + 'annotations/instances_val2017.json',
51 |         img_prefix=data_root + 'val2017/',
52 |         pipeline=test_pipeline))
53 | evaluation = dict(metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/deepfashion.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'DeepFashionDataset'
 3 | data_root = 'data/DeepFashion/In-shop/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=(750, 1101), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(750, 1101),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     imgs_per_gpu=2,
33 |     workers_per_gpu=1,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
37 |         img_prefix=data_root + 'Img/',
38 |         pipeline=train_pipeline,
39 |         data_root=data_root),
40 |     val=dict(
41 |         type=dataset_type,
42 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
43 |         img_prefix=data_root + 'Img/',
44 |         pipeline=test_pipeline,
45 |         data_root=data_root),
46 |     test=dict(
47 |         type=dataset_type,
48 |         ann_file=data_root +
49 |         'annotations/DeepFashion_segmentation_gallery.json',
50 |         img_prefix=data_root + 'Img/',
51 |         pipeline=test_pipeline,
52 |         data_root=data_root))
53 | evaluation = dict(interval=5, metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/lvis_v0.5_instance.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'coco_instance.py'
 2 | dataset_type = 'LVISV05Dataset'
 3 | data_root = 'data/lvis_v0.5/'
 4 | data = dict(
 5 |     samples_per_gpu=2,
 6 |     workers_per_gpu=2,
 7 |     train=dict(
 8 |         _delete_=True,
 9 |         type='ClassBalancedDataset',
10 |         oversample_thr=1e-3,
11 |         dataset=dict(
12 |             type=dataset_type,
13 |             ann_file=data_root + 'annotations/lvis_v0.5_train.json',
14 |             img_prefix=data_root + 'train2017/')),
15 |     val=dict(
16 |         type=dataset_type,
17 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
18 |         img_prefix=data_root + 'val2017/'),
19 |     test=dict(
20 |         type=dataset_type,
21 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
22 |         img_prefix=data_root + 'val2017/'))
23 | evaluation = dict(metric=['bbox', 'segm'])
24 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/lvis_v1_instance.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'coco_instance.py'
 2 | dataset_type = 'LVISV1Dataset'
 3 | data_root = 'data/lvis_v1/'
 4 | data = dict(
 5 |     samples_per_gpu=2,
 6 |     workers_per_gpu=2,
 7 |     train=dict(
 8 |         _delete_=True,
 9 |         type='ClassBalancedDataset',
10 |         oversample_thr=1e-3,
11 |         dataset=dict(
12 |             type=dataset_type,
13 |             ann_file=data_root + 'annotations/lvis_v1_train.json',
14 |             img_prefix=data_root)),
15 |     val=dict(
16 |         type=dataset_type,
17 |         ann_file=data_root + 'annotations/lvis_v1_val.json',
18 |         img_prefix=data_root),
19 |     test=dict(
20 |         type=dataset_type,
21 |         ann_file=data_root + 'annotations/lvis_v1_val.json',
22 |         img_prefix=data_root))
23 | evaluation = dict(metric=['bbox', 'segm'])
24 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'VOCDataset'
 3 | data_root = 'data/VOCdevkit/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1000, 600),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=3,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=[
40 |                 data_root + 'VOC2007/ImageSets/Main/trainval.txt',
41 |                 data_root + 'VOC2012/ImageSets/Main/trainval.txt'
42 |             ],
43 |             img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
44 |             pipeline=train_pipeline)),
45 |     val=dict(
46 |         type=dataset_type,
47 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
48 |         img_prefix=data_root + 'VOC2007/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
53 |         img_prefix=data_root + 'VOC2007/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(interval=1, metric='mAP')
56 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'WIDERFaceDataset'
 3 | data_root = 'data/WIDERFace/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile', to_float32=True),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(
 9 |         type='PhotoMetricDistortion',
10 |         brightness_delta=32,
11 |         contrast_range=(0.5, 1.5),
12 |         saturation_range=(0.5, 1.5),
13 |         hue_delta=18),
14 |     dict(
15 |         type='Expand',
16 |         mean=img_norm_cfg['mean'],
17 |         to_rgb=img_norm_cfg['to_rgb'],
18 |         ratio_range=(1, 4)),
19 |     dict(
20 |         type='MinIoURandomCrop',
21 |         min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
22 |         min_crop_size=0.3),
23 |     dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
24 |     dict(type='Normalize', **img_norm_cfg),
25 |     dict(type='RandomFlip', flip_ratio=0.5),
26 |     dict(type='DefaultFormatBundle'),
27 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
28 | ]
29 | test_pipeline = [
30 |     dict(type='LoadImageFromFile'),
31 |     dict(
32 |         type='MultiScaleFlipAug',
33 |         img_scale=(300, 300),
34 |         flip=False,
35 |         transforms=[
36 |             dict(type='Resize', keep_ratio=False),
37 |             dict(type='Normalize', **img_norm_cfg),
38 |             dict(type='ImageToTensor', keys=['img']),
39 |             dict(type='Collect', keys=['img']),
40 |         ])
41 | ]
42 | data = dict(
43 |     samples_per_gpu=60,
44 |     workers_per_gpu=2,
45 |     train=dict(
46 |         type='RepeatDataset',
47 |         times=2,
48 |         dataset=dict(
49 |             type=dataset_type,
50 |             ann_file=data_root + 'train.txt',
51 |             img_prefix=data_root + 'WIDER_train/',
52 |             min_size=17,
53 |             pipeline=train_pipeline)),
54 |     val=dict(
55 |         type=dataset_type,
56 |         ann_file=data_root + 'val.txt',
57 |         img_prefix=data_root + 'WIDER_val/',
58 |         pipeline=test_pipeline),
59 |     test=dict(
60 |         type=dataset_type,
61 |         ann_file=data_root + 'val.txt',
62 |         img_prefix=data_root + 'WIDER_val/',
63 |         pipeline=test_pipeline))
64 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='TextLoggerHook'),
 7 |         # dict(type='TensorboardLoggerHook')
 8 |     ])
 9 | # yapf:enable
10 | custom_hooks = [dict(type='NumClassCheckHook')]
11 | 
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     roi_head=dict(
20 |         type='StandardRoIHead',
21 |         bbox_roi_extractor=dict(
22 |             type='SingleRoIExtractor',
23 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 |             out_channels=256,
25 |             featmap_strides=[4, 8, 16, 32]),
26 |         bbox_head=dict(
27 |             type='Shared2FCBBoxHead',
28 |             in_channels=256,
29 |             fc_out_channels=1024,
30 |             roi_feat_size=7,
31 |             num_classes=80,
32 |             bbox_coder=dict(
33 |                 type='DeltaXYWHBBoxCoder',
34 |                 target_means=[0., 0., 0., 0.],
35 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |             reg_class_agnostic=False,
37 |             loss_cls=dict(
38 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 |     # model training and testing settings
41 |     train_cfg=dict(
42 |         rcnn=dict(
43 |             assigner=dict(
44 |                 type='MaxIoUAssigner',
45 |                 pos_iou_thr=0.5,
46 |                 neg_iou_thr=0.5,
47 |                 min_pos_iou=0.5,
48 |                 match_low_quality=False,
49 |                 ignore_iof_thr=-1),
50 |             sampler=dict(
51 |                 type='RandomSampler',
52 |                 num=512,
53 |                 pos_fraction=0.25,
54 |                 neg_pos_ub=-1,
55 |                 add_gt_as_proposals=True),
56 |             pos_weight=-1,
57 |             debug=False)),
58 |     test_cfg=dict(
59 |         rcnn=dict(
60 |             score_thr=0.05,
61 |             nms=dict(type='nms', iou_threshold=0.5),
62 |             max_per_img=100)))
63 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/faster_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=3,
 10 |         strides=(1, 2, 2),
 11 |         dilations=(1, 1, 1),
 12 |         out_indices=(2, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=1024,
 20 |         feat_channels=1024,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         shared_head=dict(
 36 |             type='ResLayer',
 37 |             depth=50,
 38 |             stage=3,
 39 |             stride=2,
 40 |             dilation=1,
 41 |             style='caffe',
 42 |             norm_cfg=norm_cfg,
 43 |             norm_eval=True),
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 47 |             out_channels=1024,
 48 |             featmap_strides=[16]),
 49 |         bbox_head=dict(
 50 |             type='BBoxHead',
 51 |             with_avg_pool=True,
 52 |             roi_feat_size=7,
 53 |             in_channels=2048,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 63 |     # model training and testing settings
 64 |     train_cfg=dict(
 65 |         rpn=dict(
 66 |             assigner=dict(
 67 |                 type='MaxIoUAssigner',
 68 |                 pos_iou_thr=0.7,
 69 |                 neg_iou_thr=0.3,
 70 |                 min_pos_iou=0.3,
 71 |                 match_low_quality=True,
 72 |                 ignore_iof_thr=-1),
 73 |             sampler=dict(
 74 |                 type='RandomSampler',
 75 |                 num=256,
 76 |                 pos_fraction=0.5,
 77 |                 neg_pos_ub=-1,
 78 |                 add_gt_as_proposals=False),
 79 |             allowed_border=0,
 80 |             pos_weight=-1,
 81 |             debug=False),
 82 |         rpn_proposal=dict(
 83 |             nms_pre=12000,
 84 |             max_per_img=2000,
 85 |             nms=dict(type='nms', iou_threshold=0.7),
 86 |             min_bbox_size=0),
 87 |         rcnn=dict(
 88 |             assigner=dict(
 89 |                 type='MaxIoUAssigner',
 90 |                 pos_iou_thr=0.5,
 91 |                 neg_iou_thr=0.5,
 92 |                 min_pos_iou=0.5,
 93 |                 match_low_quality=False,
 94 |                 ignore_iof_thr=-1),
 95 |             sampler=dict(
 96 |                 type='RandomSampler',
 97 |                 num=512,
 98 |                 pos_fraction=0.25,
 99 |                 neg_pos_ub=-1,
100 |                 add_gt_as_proposals=True),
101 |             pos_weight=-1,
102 |             debug=False)),
103 |     test_cfg=dict(
104 |         rpn=dict(
105 |             nms_pre=6000,
106 |             max_per_img=1000,
107 |             nms=dict(type='nms', iou_threshold=0.7),
108 |             min_bbox_size=0),
109 |         rcnn=dict(
110 |             score_thr=0.05,
111 |             nms=dict(type='nms', iou_threshold=0.5),
112 |             max_per_img=100)))
113 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=4,
 10 |         strides=(1, 2, 2, 1),
 11 |         dilations=(1, 1, 1, 2),
 12 |         out_indices=(3, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=2048,
 20 |         feat_channels=2048,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=2048,
 39 |             featmap_strides=[16]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=2048,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=80,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 54 |     # model training and testing settings
 55 |     train_cfg=dict(
 56 |         rpn=dict(
 57 |             assigner=dict(
 58 |                 type='MaxIoUAssigner',
 59 |                 pos_iou_thr=0.7,
 60 |                 neg_iou_thr=0.3,
 61 |                 min_pos_iou=0.3,
 62 |                 match_low_quality=True,
 63 |                 ignore_iof_thr=-1),
 64 |             sampler=dict(
 65 |                 type='RandomSampler',
 66 |                 num=256,
 67 |                 pos_fraction=0.5,
 68 |                 neg_pos_ub=-1,
 69 |                 add_gt_as_proposals=False),
 70 |             allowed_border=0,
 71 |             pos_weight=-1,
 72 |             debug=False),
 73 |         rpn_proposal=dict(
 74 |             nms_pre=12000,
 75 |             max_per_img=2000,
 76 |             nms=dict(type='nms', iou_threshold=0.7),
 77 |             min_bbox_size=0),
 78 |         rcnn=dict(
 79 |             assigner=dict(
 80 |                 type='MaxIoUAssigner',
 81 |                 pos_iou_thr=0.5,
 82 |                 neg_iou_thr=0.5,
 83 |                 min_pos_iou=0.5,
 84 |                 match_low_quality=False,
 85 |                 ignore_iof_thr=-1),
 86 |             sampler=dict(
 87 |                 type='RandomSampler',
 88 |                 num=512,
 89 |                 pos_fraction=0.25,
 90 |                 neg_pos_ub=-1,
 91 |                 add_gt_as_proposals=True),
 92 |             pos_weight=-1,
 93 |             debug=False)),
 94 |     test_cfg=dict(
 95 |         rpn=dict(
 96 |             nms=dict(type='nms', iou_threshold=0.7),
 97 |             nms_pre=6000,
 98 |             max_per_img=1000,
 99 |             min_bbox_size=0),
100 |         rcnn=dict(
101 |             score_thr=0.05,
102 |             nms=dict(type='nms', iou_threshold=0.5),
103 |             max_per_img=100)))
104 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | model = dict(
  2 |     type='FasterRCNN',
  3 |     pretrained='torchvision://resnet50',
  4 |     backbone=dict(
  5 |         type='ResNet',
  6 |         depth=50,
  7 |         num_stages=4,
  8 |         out_indices=(0, 1, 2, 3),
  9 |         frozen_stages=1,
 10 |         norm_cfg=dict(type='BN', requires_grad=True),
 11 |         norm_eval=True,
 12 |         style='pytorch'),
 13 |     neck=dict(
 14 |         type='FPN',
 15 |         in_channels=[256, 512, 1024, 2048],
 16 |         out_channels=256,
 17 |         num_outs=5),
 18 |     rpn_head=dict(
 19 |         type='RPNHead',
 20 |         in_channels=256,
 21 |         feat_channels=256,
 22 |         anchor_generator=dict(
 23 |             type='AnchorGenerator',
 24 |             scales=[8],
 25 |             ratios=[0.5, 1.0, 2.0],
 26 |             strides=[4, 8, 16, 32, 64]),
 27 |         bbox_coder=dict(
 28 |             type='DeltaXYWHBBoxCoder',
 29 |             target_means=[.0, .0, .0, .0],
 30 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 31 |         loss_cls=dict(
 32 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 33 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 34 |     roi_head=dict(
 35 |         type='StandardRoIHead',
 36 |         bbox_roi_extractor=dict(
 37 |             type='SingleRoIExtractor',
 38 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 39 |             out_channels=256,
 40 |             featmap_strides=[4, 8, 16, 32]),
 41 |         bbox_head=dict(
 42 |             type='Shared2FCBBoxHead',
 43 |             in_channels=256,
 44 |             fc_out_channels=1024,
 45 |             roi_feat_size=7,
 46 |             num_classes=80,
 47 |             bbox_coder=dict(
 48 |                 type='DeltaXYWHBBoxCoder',
 49 |                 target_means=[0., 0., 0., 0.],
 50 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 51 |             reg_class_agnostic=False,
 52 |             loss_cls=dict(
 53 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 54 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 55 |     # model training and testing settings
 56 |     train_cfg=dict(
 57 |         rpn=dict(
 58 |             assigner=dict(
 59 |                 type='MaxIoUAssigner',
 60 |                 pos_iou_thr=0.7,
 61 |                 neg_iou_thr=0.3,
 62 |                 min_pos_iou=0.3,
 63 |                 match_low_quality=True,
 64 |                 ignore_iof_thr=-1),
 65 |             sampler=dict(
 66 |                 type='RandomSampler',
 67 |                 num=256,
 68 |                 pos_fraction=0.5,
 69 |                 neg_pos_ub=-1,
 70 |                 add_gt_as_proposals=False),
 71 |             allowed_border=-1,
 72 |             pos_weight=-1,
 73 |             debug=False),
 74 |         rpn_proposal=dict(
 75 |             nms_pre=2000,
 76 |             max_per_img=1000,
 77 |             nms=dict(type='nms', iou_threshold=0.7),
 78 |             min_bbox_size=0),
 79 |         rcnn=dict(
 80 |             assigner=dict(
 81 |                 type='MaxIoUAssigner',
 82 |                 pos_iou_thr=0.5,
 83 |                 neg_iou_thr=0.5,
 84 |                 min_pos_iou=0.5,
 85 |                 match_low_quality=False,
 86 |                 ignore_iof_thr=-1),
 87 |             sampler=dict(
 88 |                 type='RandomSampler',
 89 |                 num=512,
 90 |                 pos_fraction=0.25,
 91 |                 neg_pos_ub=-1,
 92 |                 add_gt_as_proposals=True),
 93 |             pos_weight=-1,
 94 |             debug=False)),
 95 |     test_cfg=dict(
 96 |         rpn=dict(
 97 |             nms_pre=1000,
 98 |             max_per_img=1000,
 99 |             nms=dict(type='nms', iou_threshold=0.7),
100 |             min_bbox_size=0),
101 |         rcnn=dict(
102 |             score_thr=0.05,
103 |             nms=dict(type='nms', iou_threshold=0.5),
104 |             max_per_img=100)
105 |         # soft-nms is also supported for rcnn testing
106 |         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
107 |     ))
108 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_lsnet_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='lsnet_t',
  7 |         pretrained="",),
  8 |     neck=dict(
  9 |         type='FPN',
 10 |         in_channels=[256, 512, 1024, 2048],
 11 |         out_channels=256,
 12 |         num_outs=5),
 13 |     rpn_head=dict(
 14 |         type='RPNHead',
 15 |         in_channels=256,
 16 |         feat_channels=256,
 17 |         anchor_generator=dict(
 18 |             type='AnchorGenerator',
 19 |             scales=[8],
 20 |             ratios=[0.5, 1.0, 2.0],
 21 |             strides=[4, 8, 16, 32, 64]),
 22 |         bbox_coder=dict(
 23 |             type='DeltaXYWHBBoxCoder',
 24 |             target_means=[.0, .0, .0, .0],
 25 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 26 |         loss_cls=dict(
 27 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 28 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 29 |     roi_head=dict(
 30 |         type='StandardRoIHead',
 31 |         bbox_roi_extractor=dict(
 32 |             type='SingleRoIExtractor',
 33 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 34 |             out_channels=256,
 35 |             featmap_strides=[4, 8, 16, 32]),
 36 |         bbox_head=dict(
 37 |             type='Shared2FCBBoxHead',
 38 |             in_channels=256,
 39 |             fc_out_channels=1024,
 40 |             roi_feat_size=7,
 41 |             num_classes=80,
 42 |             bbox_coder=dict(
 43 |                 type='DeltaXYWHBBoxCoder',
 44 |                 target_means=[0., 0., 0., 0.],
 45 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 46 |             reg_class_agnostic=False,
 47 |             loss_cls=dict(
 48 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 49 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 50 |         mask_roi_extractor=dict(
 51 |             type='SingleRoIExtractor',
 52 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 53 |             out_channels=256,
 54 |             featmap_strides=[4, 8, 16, 32]),
 55 |         mask_head=dict(
 56 |             type='FCNMaskHead',
 57 |             num_convs=4,
 58 |             in_channels=256,
 59 |             conv_out_channels=256,
 60 |             num_classes=80,
 61 |             loss_mask=dict(
 62 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 63 |     # model training and testing settings
 64 |     train_cfg=dict(
 65 |         rpn=dict(
 66 |             assigner=dict(
 67 |                 type='MaxIoUAssigner',
 68 |                 pos_iou_thr=0.7,
 69 |                 neg_iou_thr=0.3,
 70 |                 min_pos_iou=0.3,
 71 |                 match_low_quality=True,
 72 |                 ignore_iof_thr=-1),
 73 |             sampler=dict(
 74 |                 type='RandomSampler',
 75 |                 num=256,
 76 |                 pos_fraction=0.5,
 77 |                 neg_pos_ub=-1,
 78 |                 add_gt_as_proposals=False),
 79 |             allowed_border=-1,
 80 |             pos_weight=-1,
 81 |             debug=False),
 82 |         rpn_proposal=dict(
 83 |             nms_pre=2000,
 84 |             max_per_img=1000,
 85 |             nms=dict(type='nms', iou_threshold=0.7),
 86 |             min_bbox_size=0),
 87 |         rcnn=dict(
 88 |             assigner=dict(
 89 |                 type='MaxIoUAssigner',
 90 |                 pos_iou_thr=0.5,
 91 |                 neg_iou_thr=0.5,
 92 |                 min_pos_iou=0.5,
 93 |                 match_low_quality=True,
 94 |                 ignore_iof_thr=-1),
 95 |             sampler=dict(
 96 |                 type='RandomSampler',
 97 |                 num=512,
 98 |                 pos_fraction=0.25,
 99 |                 neg_pos_ub=-1,
100 |                 add_gt_as_proposals=True),
101 |             mask_size=28,
102 |             pos_weight=-1,
103 |             debug=False)),
104 |     test_cfg=dict(
105 |         rpn=dict(
106 |             nms_pre=1000,
107 |             max_per_img=1000,
108 |             nms=dict(type='nms', iou_threshold=0.7),
109 |             min_bbox_size=0),
110 |         rcnn=dict(
111 |             score_thr=0.05,
112 |             nms=dict(type='nms', iou_threshold=0.5),
113 |             max_per_img=100,
114 |             mask_thr_binary=0.5)))
115 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='MaskRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=3,
 10 |         strides=(1, 2, 2),
 11 |         dilations=(1, 1, 1),
 12 |         out_indices=(2, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=1024,
 20 |         feat_channels=1024,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         shared_head=dict(
 36 |             type='ResLayer',
 37 |             depth=50,
 38 |             stage=3,
 39 |             stride=2,
 40 |             dilation=1,
 41 |             style='caffe',
 42 |             norm_cfg=norm_cfg,
 43 |             norm_eval=True),
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 47 |             out_channels=1024,
 48 |             featmap_strides=[16]),
 49 |         bbox_head=dict(
 50 |             type='BBoxHead',
 51 |             with_avg_pool=True,
 52 |             roi_feat_size=7,
 53 |             in_channels=2048,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 63 |         mask_roi_extractor=None,
 64 |         mask_head=dict(
 65 |             type='FCNMaskHead',
 66 |             num_convs=0,
 67 |             in_channels=2048,
 68 |             conv_out_channels=256,
 69 |             num_classes=80,
 70 |             loss_mask=dict(
 71 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 72 |     # model training and testing settings
 73 |     train_cfg=dict(
 74 |         rpn=dict(
 75 |             assigner=dict(
 76 |                 type='MaxIoUAssigner',
 77 |                 pos_iou_thr=0.7,
 78 |                 neg_iou_thr=0.3,
 79 |                 min_pos_iou=0.3,
 80 |                 match_low_quality=True,
 81 |                 ignore_iof_thr=-1),
 82 |             sampler=dict(
 83 |                 type='RandomSampler',
 84 |                 num=256,
 85 |                 pos_fraction=0.5,
 86 |                 neg_pos_ub=-1,
 87 |                 add_gt_as_proposals=False),
 88 |             allowed_border=0,
 89 |             pos_weight=-1,
 90 |             debug=False),
 91 |         rpn_proposal=dict(
 92 |             nms_pre=12000,
 93 |             max_per_img=2000,
 94 |             nms=dict(type='nms', iou_threshold=0.7),
 95 |             min_bbox_size=0),
 96 |         rcnn=dict(
 97 |             assigner=dict(
 98 |                 type='MaxIoUAssigner',
 99 |                 pos_iou_thr=0.5,
100 |                 neg_iou_thr=0.5,
101 |                 min_pos_iou=0.5,
102 |                 match_low_quality=False,
103 |                 ignore_iof_thr=-1),
104 |             sampler=dict(
105 |                 type='RandomSampler',
106 |                 num=512,
107 |                 pos_fraction=0.25,
108 |                 neg_pos_ub=-1,
109 |                 add_gt_as_proposals=True),
110 |             mask_size=14,
111 |             pos_weight=-1,
112 |             debug=False)),
113 |     test_cfg=dict(
114 |         rpn=dict(
115 |             nms_pre=6000,
116 |             nms=dict(type='nms', iou_threshold=0.7),
117 |             max_per_img=1000,
118 |             min_bbox_size=0),
119 |         rcnn=dict(
120 |             score_thr=0.05,
121 |             nms=dict(type='nms', iou_threshold=0.5),
122 |             max_per_img=100,
123 |             mask_thr_binary=0.5)))
124 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 56 |         mask_roi_extractor=dict(
 57 |             type='SingleRoIExtractor',
 58 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 59 |             out_channels=256,
 60 |             featmap_strides=[4, 8, 16, 32]),
 61 |         mask_head=dict(
 62 |             type='FCNMaskHead',
 63 |             num_convs=4,
 64 |             in_channels=256,
 65 |             conv_out_channels=256,
 66 |             num_classes=80,
 67 |             loss_mask=dict(
 68 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         rpn=dict(
 72 |             assigner=dict(
 73 |                 type='MaxIoUAssigner',
 74 |                 pos_iou_thr=0.7,
 75 |                 neg_iou_thr=0.3,
 76 |                 min_pos_iou=0.3,
 77 |                 match_low_quality=True,
 78 |                 ignore_iof_thr=-1),
 79 |             sampler=dict(
 80 |                 type='RandomSampler',
 81 |                 num=256,
 82 |                 pos_fraction=0.5,
 83 |                 neg_pos_ub=-1,
 84 |                 add_gt_as_proposals=False),
 85 |             allowed_border=-1,
 86 |             pos_weight=-1,
 87 |             debug=False),
 88 |         rpn_proposal=dict(
 89 |             nms_pre=2000,
 90 |             max_per_img=1000,
 91 |             nms=dict(type='nms', iou_threshold=0.7),
 92 |             min_bbox_size=0),
 93 |         rcnn=dict(
 94 |             assigner=dict(
 95 |                 type='MaxIoUAssigner',
 96 |                 pos_iou_thr=0.5,
 97 |                 neg_iou_thr=0.5,
 98 |                 min_pos_iou=0.5,
 99 |                 match_low_quality=True,
100 |                 ignore_iof_thr=-1),
101 |             sampler=dict(
102 |                 type='RandomSampler',
103 |                 num=512,
104 |                 pos_fraction=0.25,
105 |                 neg_pos_ub=-1,
106 |                 add_gt_as_proposals=True),
107 |             mask_size=28,
108 |             pos_weight=-1,
109 |             debug=False)),
110 |     test_cfg=dict(
111 |         rpn=dict(
112 |             nms_pre=1000,
113 |             max_per_img=1000,
114 |             nms=dict(type='nms', iou_threshold=0.7),
115 |             min_bbox_size=0),
116 |         rcnn=dict(
117 |             score_thr=0.05,
118 |             nms=dict(type='nms', iou_threshold=0.5),
119 |             max_per_img=100,
120 |             mask_thr_binary=0.5)))
121 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_swin_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained=None,
  5 |     backbone=dict(
  6 |         type='SwinTransformer',
  7 |         embed_dim=96,
  8 |         depths=[2, 2, 6, 2],
  9 |         num_heads=[3, 6, 12, 24],
 10 |         window_size=7,
 11 |         mlp_ratio=4.,
 12 |         qkv_bias=True,
 13 |         qk_scale=None,
 14 |         drop_rate=0.,
 15 |         attn_drop_rate=0.,
 16 |         drop_path_rate=0.2,
 17 |         ape=False,
 18 |         patch_norm=True,
 19 |         out_indices=(0, 1, 2, 3),
 20 |         use_checkpoint=False),
 21 |     neck=dict(
 22 |         type='FPN',
 23 |         in_channels=[96, 192, 384, 768],
 24 |         out_channels=256,
 25 |         num_outs=5),
 26 |     rpn_head=dict(
 27 |         type='RPNHead',
 28 |         in_channels=256,
 29 |         feat_channels=256,
 30 |         anchor_generator=dict(
 31 |             type='AnchorGenerator',
 32 |             scales=[8],
 33 |             ratios=[0.5, 1.0, 2.0],
 34 |             strides=[4, 8, 16, 32, 64]),
 35 |         bbox_coder=dict(
 36 |             type='DeltaXYWHBBoxCoder',
 37 |             target_means=[.0, .0, .0, .0],
 38 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 39 |         loss_cls=dict(
 40 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 41 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 42 |     roi_head=dict(
 43 |         type='StandardRoIHead',
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 47 |             out_channels=256,
 48 |             featmap_strides=[4, 8, 16, 32]),
 49 |         bbox_head=dict(
 50 |             type='Shared2FCBBoxHead',
 51 |             in_channels=256,
 52 |             fc_out_channels=1024,
 53 |             roi_feat_size=7,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 63 |         mask_roi_extractor=dict(
 64 |             type='SingleRoIExtractor',
 65 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 66 |             out_channels=256,
 67 |             featmap_strides=[4, 8, 16, 32]),
 68 |         mask_head=dict(
 69 |             type='FCNMaskHead',
 70 |             num_convs=4,
 71 |             in_channels=256,
 72 |             conv_out_channels=256,
 73 |             num_classes=80,
 74 |             loss_mask=dict(
 75 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 76 |     # model training and testing settings
 77 |     train_cfg=dict(
 78 |         rpn=dict(
 79 |             assigner=dict(
 80 |                 type='MaxIoUAssigner',
 81 |                 pos_iou_thr=0.7,
 82 |                 neg_iou_thr=0.3,
 83 |                 min_pos_iou=0.3,
 84 |                 match_low_quality=True,
 85 |                 ignore_iof_thr=-1),
 86 |             sampler=dict(
 87 |                 type='RandomSampler',
 88 |                 num=256,
 89 |                 pos_fraction=0.5,
 90 |                 neg_pos_ub=-1,
 91 |                 add_gt_as_proposals=False),
 92 |             allowed_border=-1,
 93 |             pos_weight=-1,
 94 |             debug=False),
 95 |         rpn_proposal=dict(
 96 |             nms_pre=2000,
 97 |             max_per_img=1000,
 98 |             nms=dict(type='nms', iou_threshold=0.7),
 99 |             min_bbox_size=0),
100 |         rcnn=dict(
101 |             assigner=dict(
102 |                 type='MaxIoUAssigner',
103 |                 pos_iou_thr=0.5,
104 |                 neg_iou_thr=0.5,
105 |                 min_pos_iou=0.5,
106 |                 match_low_quality=True,
107 |                 ignore_iof_thr=-1),
108 |             sampler=dict(
109 |                 type='RandomSampler',
110 |                 num=512,
111 |                 pos_fraction=0.25,
112 |                 neg_pos_ub=-1,
113 |                 add_gt_as_proposals=True),
114 |             mask_size=28,
115 |             pos_weight=-1,
116 |             debug=False)),
117 |     test_cfg=dict(
118 |         rpn=dict(
119 |             nms_pre=1000,
120 |             max_per_img=1000,
121 |             nms=dict(type='nms', iou_threshold=0.7),
122 |             min_bbox_size=0),
123 |         rcnn=dict(
124 |             score_thr=0.05,
125 |             nms=dict(type='nms', iou_threshold=0.5),
126 |             max_per_img=100,
127 |             mask_thr_binary=0.5)))
128 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_reppointsv2_swin_bifpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
  3 | model = dict(
  4 |     type='RepPointsV2MaskDetector',
  5 |     pretrained=None,
  6 |     backbone=dict(
  7 |         type='SwinTransformer',
  8 |         embed_dim=96,
  9 |         depths=[2, 2, 6, 2],
 10 |         num_heads=[3, 6, 12, 24],
 11 |         window_size=7,
 12 |         mlp_ratio=4.,
 13 |         qkv_bias=True,
 14 |         qk_scale=None,
 15 |         drop_rate=0.,
 16 |         attn_drop_rate=0.,
 17 |         drop_path_rate=0.2,
 18 |         ape=False,
 19 |         patch_norm=True,
 20 |         out_indices=(1, 2, 3),
 21 |         use_checkpoint=False),
 22 |     neck=dict(
 23 |         type='BiFPN',
 24 |         in_channels=[192, 384, 768],
 25 |         out_channels=256,
 26 |         start_level=0,
 27 |         add_extra_convs=False,
 28 |         num_outs=5,
 29 |         no_norm_on_lateral=False,
 30 |         num_repeat=2,
 31 |         norm_cfg=norm_cfg
 32 |     ),
 33 |     bbox_head=dict(
 34 |         type='RepPointsV2Head',
 35 |         num_classes=80,
 36 |         in_channels=256,
 37 |         feat_channels=256,
 38 |         point_feat_channels=256,
 39 |         stacked_convs=3,
 40 |         shared_stacked_convs=1,
 41 |         first_kernel_size=3,
 42 |         kernel_size=1,
 43 |         corner_dim=64,
 44 |         num_points=9,
 45 |         gradient_mul=0.1,
 46 |         point_strides=[8, 16, 32, 64, 128],
 47 |         point_base_scale=4,
 48 |         norm_cfg=norm_cfg,
 49 |         loss_cls=dict(
 50 |             type='RPDQualityFocalLoss',
 51 |             use_sigmoid=True,
 52 |             beta=2.0,
 53 |             loss_weight=1.0),
 54 |         loss_bbox_init=dict(type='RPDGIoULoss', loss_weight=1.0),
 55 |         loss_bbox_refine=dict(type='RPDGIoULoss', loss_weight=2.0),
 56 |         loss_heatmap=dict(
 57 |             type='GaussianFocalLoss',
 58 |             alpha=2.0,
 59 |             gamma=4.0,
 60 |             loss_weight=0.25),
 61 |         loss_offset=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
 62 |         loss_sem=dict(
 63 |             type='SEPFocalLoss',
 64 |             gamma=2.0,
 65 |             alpha=0.25,
 66 |             loss_weight=0.1),
 67 |         transform_method='exact_minmax',
 68 |         # new for condconv
 69 |         coord_pos='center',
 70 |         mask_head=dict(
 71 |             type='CondConvMaskHead',
 72 |             branch_cfg=dict(
 73 |                 in_channels=256, # == neck out channels
 74 |                 channels=128,
 75 |                 in_features=[0,1,2],
 76 |                 out_stride=[8,16,32], # p3, p4, p5
 77 |                 norm=dict(type='BN', requires_grad=True),
 78 |                 num_convs=4,
 79 |                 out_channels=8,
 80 |                 semantic_loss_on=False,
 81 |                 num_classes=80,
 82 |                 loss_sem=dict(
 83 |                     type='FocalLoss',
 84 |                     use_sigmoid=True,
 85 |                     gamma=2.0,
 86 |                     alpha=0.25,
 87 |                     loss_weight=1.0,
 88 |                     prior_prob=0.01)
 89 |             ),
 90 |             head_cfg=dict(
 91 |                 channels=8,
 92 |                 disable_rel_coords=False,
 93 |                 num_layers=3,
 94 |                 use_fp16=False,
 95 |                 mask_out_stride=4,
 96 |                 max_proposals=500,
 97 |                 aux_loss=True,
 98 |                 mask_loss_weight=[0.,0.6,1.],
 99 |                 sizes_of_interest=[64, 128, 256, 512, 1024]
100 |             ),
101 |         )),
102 |     train_cfg = dict(
103 |         init=dict(
104 |             assigner=dict(type='PointAssignerV2', scale=4, pos_num=1, mask_center_sample=True, use_center=True),
105 |             allowed_border=-1,
106 |             pos_weight=-1,
107 |             debug=False),
108 |         heatmap=dict(
109 |             assigner=dict(type='PointHMAssigner', gaussian_bump=True, gaussian_iou=0.7),
110 |             allowed_border=-1,
111 |             pos_weight=-1,
112 |             debug=False),
113 |         refine=dict(
114 |             assigner=dict(type='ATSSAssignerV2', topk=9, mask_center_sample=True),
115 |             allowed_border=-1,
116 |             pos_weight=-1,
117 |             debug=False)),
118 |     test_cfg = dict(
119 |         nms_pre=1000,
120 |         min_bbox_size=0,
121 |         score_thr=0.05,
122 |         nms=dict(type='nms', iou_threshold=0.6),
123 |         max_per_img=100)
124 | )


--------------------------------------------------------------------------------
/detection/configs/_base_/models/reppointsv2_swin_bifpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='GN', num_groups=32, requires_grad=True)
 3 | model = dict(
 4 |     type='RepPointsV2Detector',
 5 |     pretrained=None,
 6 |     backbone=dict(
 7 |         type='SwinTransformer',
 8 |         embed_dim=96,
 9 |         depths=[2, 2, 6, 2],
10 |         num_heads=[3, 6, 12, 24],
11 |         window_size=7,
12 |         mlp_ratio=4.,
13 |         qkv_bias=True,
14 |         qk_scale=None,
15 |         drop_rate=0.,
16 |         attn_drop_rate=0.,
17 |         drop_path_rate=0.2,
18 |         ape=False,
19 |         patch_norm=True,
20 |         out_indices=(1, 2, 3),
21 |         use_checkpoint=False),
22 |     neck=dict(
23 |         type='BiFPN',
24 |         in_channels=[192, 384, 768],
25 |         out_channels=256,
26 |         start_level=0,
27 |         add_extra_convs=False,
28 |         num_outs=5,
29 |         no_norm_on_lateral=False,
30 |         num_repeat=2,
31 |         norm_cfg=norm_cfg
32 |     ),
33 |     bbox_head=dict(
34 |         type='RepPointsV2Head',
35 |         num_classes=80,
36 |         in_channels=256,
37 |         feat_channels=256,
38 |         point_feat_channels=256,
39 |         stacked_convs=3,
40 |         shared_stacked_convs=1,
41 |         first_kernel_size=3,
42 |         kernel_size=1,
43 |         corner_dim=64,
44 |         num_points=9,
45 |         gradient_mul=0.1,
46 |         point_strides=[8, 16, 32, 64, 128],
47 |         point_base_scale=4,
48 |         norm_cfg=norm_cfg,
49 |         loss_cls=dict(
50 |             type='RPDQualityFocalLoss',
51 |             use_sigmoid=True,
52 |             beta=2.0,
53 |             loss_weight=1.0),
54 |         loss_bbox_init=dict(type='RPDGIoULoss', loss_weight=1.0),
55 |         loss_bbox_refine=dict(type='RPDGIoULoss', loss_weight=2.0),
56 |         loss_heatmap=dict(
57 |             type='GaussianFocalLoss',
58 |             alpha=2.0,
59 |             gamma=4.0,
60 |             loss_weight=0.25),
61 |         loss_offset=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
62 |         loss_sem=dict(
63 |             type='SEPFocalLoss',
64 |             gamma=2.0,
65 |             alpha=0.25,
66 |             loss_weight=0.1),
67 |         transform_method='exact_minmax'),
68 |     # training and testing settings
69 |     train_cfg = dict(
70 |         init=dict(
71 |             assigner=dict(type='PointAssignerV2', scale=4, pos_num=1),
72 |             allowed_border=-1,
73 |             pos_weight=-1,
74 |             debug=False),
75 |         heatmap=dict(
76 |             assigner=dict(type='PointHMAssigner', gaussian_bump=True, gaussian_iou=0.7),
77 |             allowed_border=-1,
78 |             pos_weight=-1,
79 |             debug=False),
80 |         refine=dict(
81 |             assigner=dict(type='ATSSAssignerV2', topk=9),
82 |             allowed_border=-1,
83 |             pos_weight=-1,
84 |             debug=False)),
85 |     test_cfg = dict(
86 |         nms_pre=1000,
87 |         min_bbox_size=0,
88 |         score_thr=0.05,
89 |         nms=dict(type='nms', iou_threshold=0.6),
90 |         max_per_img=100),
91 | )


--------------------------------------------------------------------------------
/detection/configs/_base_/models/retinanet_lsnet_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='lsnet_t',
 7 |         pretrained="",),
 8 |     neck=dict(
 9 |         type='FPN',
10 |         in_channels=[256, 512, 1024, 2048],
11 |         out_channels=256,
12 |         start_level=1,
13 |         add_extra_convs='on_input',
14 |         num_outs=5),
15 |     bbox_head=dict(
16 |         type='RetinaHead',
17 |         num_classes=80,
18 |         in_channels=256,
19 |         stacked_convs=4,
20 |         feat_channels=256,
21 |         anchor_generator=dict(
22 |             type='AnchorGenerator',
23 |             octave_base_scale=4,
24 |             scales_per_octave=3,
25 |             ratios=[0.5, 1.0, 2.0],
26 |             strides=[8, 16, 32, 64, 128]),
27 |         bbox_coder=dict(
28 |             type='DeltaXYWHBBoxCoder',
29 |             target_means=[.0, .0, .0, .0],
30 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
31 |         loss_cls=dict(
32 |             type='FocalLoss',
33 |             use_sigmoid=True,
34 |             gamma=2.0,
35 |             alpha=0.25,
36 |             loss_weight=1.0),
37 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
38 |     # training and testing settings
39 |     train_cfg=dict(
40 |         assigner=dict(
41 |             type='MaxIoUAssigner',
42 |             pos_iou_thr=0.5,
43 |             neg_iou_thr=0.4,
44 |             min_pos_iou=0,
45 |             ignore_iof_thr=-1),
46 |         allowed_border=-1,
47 |         pos_weight=-1,
48 |         debug=False),
49 |     test_cfg=dict(
50 |         nms_pre=1000,
51 |         min_bbox_size=0,
52 |         score_thr=0.05,
53 |         nms=dict(type='nms', iou_threshold=0.5),
54 |         max_per_img=100))
55 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 |     # training and testing settings
45 |     train_cfg=dict(
46 |         assigner=dict(
47 |             type='MaxIoUAssigner',
48 |             pos_iou_thr=0.5,
49 |             neg_iou_thr=0.4,
50 |             min_pos_iou=0,
51 |             ignore_iof_thr=-1),
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         debug=False),
55 |     test_cfg=dict(
56 |         nms_pre=1000,
57 |         min_bbox_size=0,
58 |         score_thr=0.05,
59 |         nms=dict(type='nms', iou_threshold=0.5),
60 |         max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=3,
 9 |         strides=(1, 2, 2),
10 |         dilations=(1, 1, 1),
11 |         out_indices=(2, ),
12 |         frozen_stages=1,
13 |         norm_cfg=dict(type='BN', requires_grad=False),
14 |         norm_eval=True,
15 |         style='caffe'),
16 |     neck=None,
17 |     rpn_head=dict(
18 |         type='RPNHead',
19 |         in_channels=1024,
20 |         feat_channels=1024,
21 |         anchor_generator=dict(
22 |             type='AnchorGenerator',
23 |             scales=[2, 4, 8, 16, 32],
24 |             ratios=[0.5, 1.0, 2.0],
25 |             strides=[16]),
26 |         bbox_coder=dict(
27 |             type='DeltaXYWHBBoxCoder',
28 |             target_means=[.0, .0, .0, .0],
29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
30 |         loss_cls=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(
35 |         rpn=dict(
36 |             assigner=dict(
37 |                 type='MaxIoUAssigner',
38 |                 pos_iou_thr=0.7,
39 |                 neg_iou_thr=0.3,
40 |                 min_pos_iou=0.3,
41 |                 ignore_iof_thr=-1),
42 |             sampler=dict(
43 |                 type='RandomSampler',
44 |                 num=256,
45 |                 pos_fraction=0.5,
46 |                 neg_pos_ub=-1,
47 |                 add_gt_as_proposals=False),
48 |             allowed_border=0,
49 |             pos_weight=-1,
50 |             debug=False)),
51 |     test_cfg=dict(
52 |         rpn=dict(
53 |             nms_pre=12000,
54 |             max_per_img=2000,
55 |             nms=dict(type='nms', iou_threshold=0.7),
56 |             min_bbox_size=0)))
57 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | 
 3 | model = dict(
 4 |     type='RPN',
 5 |     pretrained='torchvision://resnet50',
 6 |     backbone=dict(
 7 |         type='ResNet',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         frozen_stages=1,
12 |         norm_cfg=dict(type='BN', requires_grad=True),
13 |         norm_eval=True,
14 |         style='pytorch'),
15 |     neck=dict(
16 |         type='FPN',
17 |         in_channels=[256, 512, 1024, 2048],
18 |         out_channels=256,
19 |         num_outs=5),
20 |     rpn_head=dict(
21 |         type='RPNHead',
22 |         in_channels=256,
23 |         feat_channels=256,
24 |         anchor_generator=dict(
25 |             type='AnchorGenerator',
26 |             scales=[8],
27 |             ratios=[0.5, 1.0, 2.0],
28 |             strides=[4, 8, 16, 32, 64]),
29 |         bbox_coder=dict(
30 |             type='DeltaXYWHBBoxCoder',
31 |             target_means=[.0, .0, .0, .0],
32 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
33 |         loss_cls=dict(
34 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
35 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
36 |     # model training and testing settings
37 |     train_cfg=dict(
38 |         rpn=dict(
39 |             assigner=dict(
40 |                 type='MaxIoUAssigner',
41 |                 pos_iou_thr=0.7,
42 |                 neg_iou_thr=0.3,
43 |                 min_pos_iou=0.3,
44 |                 ignore_iof_thr=-1),
45 |             sampler=dict(
46 |                 type='RandomSampler',
47 |                 num=256,
48 |                 pos_fraction=0.5,
49 |                 neg_pos_ub=-1,
50 |                 add_gt_as_proposals=False),
51 |             allowed_border=0,
52 |             pos_weight=-1,
53 |             debug=False)),
54 |     test_cfg=dict(
55 |         rpn=dict(
56 |             nms_pre=2000,
57 |             max_per_img=1000,
58 |             nms=dict(type='nms', iou_threshold=0.7),
59 |             min_bbox_size=0)))
60 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     pretrained='open-mmlab://vgg16_caffe',
 6 |     backbone=dict(
 7 |         type='SSDVGG',
 8 |         input_size=input_size,
 9 |         depth=16,
10 |         with_last_pool=False,
11 |         ceil_mode=True,
12 |         out_indices=(3, 4),
13 |         out_feature_indices=(22, 34),
14 |         l2_norm_scale=20),
15 |     neck=None,
16 |     bbox_head=dict(
17 |         type='SSDHead',
18 |         in_channels=(512, 1024, 512, 256, 256, 256),
19 |         num_classes=80,
20 |         anchor_generator=dict(
21 |             type='SSDAnchorGenerator',
22 |             scale_major=False,
23 |             input_size=input_size,
24 |             basesize_ratio_range=(0.15, 0.9),
25 |             strides=[8, 16, 32, 64, 100, 300],
26 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
27 |         bbox_coder=dict(
28 |             type='DeltaXYWHBBoxCoder',
29 |             target_means=[.0, .0, .0, .0],
30 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
31 |     train_cfg=dict(
32 |         assigner=dict(
33 |             type='MaxIoUAssigner',
34 |             pos_iou_thr=0.5,
35 |             neg_iou_thr=0.5,
36 |             min_pos_iou=0.,
37 |             ignore_iof_thr=-1,
38 |             gt_max_assign_all=False),
39 |         smoothl1_beta=1.,
40 |         allowed_border=-1,
41 |         pos_weight=-1,
42 |         neg_pos_ratio=3,
43 |         debug=False),
44 |     test_cfg=dict(
45 |         nms_pre=1000,
46 |         nms=dict(type='nms', iou_threshold=0.45),
47 |         min_bbox_size=0,
48 |         score_thr=0.02,
49 |         max_per_img=200))
50 | cudnn_benchmark = True
51 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn_lsnet_b_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './_base_/models/mask_rcnn_lsnet_fpn.py',
 3 |     './_base_/datasets/coco_instance.py',
 4 |     './_base_/schedules/schedule_1x.py',
 5 |     './_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | model = dict(
 9 |     pretrained=None,
10 |     backbone=dict(        
11 |         type='lsnet_b',
12 |         pretrained="pretrain/lsnet_b.pth",
13 |         frozen_stages=-1,
14 |         ),
15 |     neck=dict(
16 |         type='LSNetFPN',
17 |         in_channels=[128, 256, 384, 512],
18 |         out_channels=256,
19 |         start_level=0,
20 |         num_outs=5,
21 |         num_extra_trans_convs=1,
22 |         ))
23 | 
24 | # optimizer
25 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
26 |                  paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.),
27 |                                                  'attention_bias_idxs': dict(decay_mult=0.),
28 |                                                  }))
29 | # optimizer_config = dict(grad_clip=None)
30 | # do not use mmdet version fp16
31 | # fp16 = None
32 | optimizer_config = dict(grad_clip=None)
33 | # learning policy
34 | lr_config = dict(
35 |     policy='step',
36 |     warmup='linear',
37 |     warmup_iters=500,
38 |     warmup_ratio=0.001,
39 |     step=[8, 11])
40 | total_epochs = 12
41 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn_lsnet_s_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './_base_/models/mask_rcnn_lsnet_fpn.py',
 3 |     './_base_/datasets/coco_instance.py',
 4 |     './_base_/schedules/schedule_1x.py',
 5 |     './_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | model = dict(
 9 |     pretrained=None,
10 |     backbone=dict(        
11 |         type='lsnet_s',
12 |         pretrained="pretrain/lsnet_s.pth",
13 |         frozen_stages=-1,
14 |         ),
15 |     neck=dict(
16 |         type='LSNetFPN',
17 |         in_channels=[96, 192, 320, 448],
18 |         out_channels=256,
19 |         start_level=0,
20 |         num_outs=5,
21 |         num_extra_trans_convs=1,
22 |         ))
23 | 
24 | # optimizer
25 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
26 |                  paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.),
27 |                                                  'attention_bias_idxs': dict(decay_mult=0.),
28 |                                                  }))
29 | # optimizer_config = dict(grad_clip=None)
30 | # do not use mmdet version fp16
31 | # fp16 = None
32 | optimizer_config = dict(grad_clip=None)
33 | # learning policy
34 | lr_config = dict(
35 |     policy='step',
36 |     warmup='linear',
37 |     warmup_iters=500,
38 |     warmup_ratio=0.001,
39 |     step=[8, 11])
40 | total_epochs = 12
41 | 


--------------------------------------------------------------------------------
/detection/configs/mask_rcnn_lsnet_t_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './_base_/models/mask_rcnn_lsnet_fpn.py',
 3 |     './_base_/datasets/coco_instance.py',
 4 |     './_base_/schedules/schedule_1x.py',
 5 |     './_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | model = dict(
 9 |     pretrained=None,
10 |     backbone=dict(        
11 |         type='lsnet_t',
12 |         pretrained="pretrain/lsnet_t.pth",
13 |         frozen_stages=-1,
14 |         ),
15 |     neck=dict(
16 |         type='LSNetFPN',
17 |         in_channels=[64, 128, 256, 384],
18 |         out_channels=256,
19 |         start_level=0,
20 |         num_outs=5,
21 |         num_extra_trans_convs=1,
22 |         ))
23 | 
24 | # optimizer
25 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
26 |                  paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.),
27 |                                                  'attention_bias_idxs': dict(decay_mult=0.),
28 |                                                  }))
29 | # optimizer_config = dict(grad_clip=None)
30 | # do not use mmdet version fp16
31 | # fp16 = None
32 | optimizer_config = dict(grad_clip=None)
33 | # learning policy
34 | lr_config = dict(
35 |     policy='step',
36 |     warmup='linear',
37 |     warmup_iters=500,
38 |     warmup_ratio=0.001,
39 |     step=[8, 11])
40 | total_epochs = 12
41 | 


--------------------------------------------------------------------------------
/detection/configs/retinanet_lsnet_b_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './_base_/models/retinanet_lsnet_fpn.py',
 3 |     './_base_/datasets/coco_detection.py',
 4 |     './_base_/schedules/schedule_1x.py', 
 5 |     './_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | model = dict(
 9 |     pretrained=None,
10 |     backbone=dict(        
11 |         type='lsnet_b',
12 |         pretrained="pretrain/lsnet_b.pth",
13 |         frozen_stages=-1,
14 |         ),
15 |     neck=dict(
16 |         type='LSNetFPN',
17 |         in_channels=[128, 256, 384, 512],
18 |         out_channels=256,
19 |         start_level=0,
20 |         num_outs=5,
21 |         num_extra_trans_convs=0,
22 |         ))
23 | 
24 | # optimizer
25 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
26 |                  paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.),
27 |                                                  'attention_bias_idxs': dict(decay_mult=0.),
28 |                                                  }))
29 | # optimizer_config = dict(grad_clip=None)
30 | # do not use mmdet version fp16
31 | # fp16 = None
32 | optimizer_config = dict(grad_clip=None)
33 | # learning policy
34 | lr_config = dict(
35 |     policy='step',
36 |     warmup='linear',
37 |     warmup_iters=500,
38 |     warmup_ratio=0.001,
39 |     step=[8, 11])
40 | total_epochs = 12
41 | 


--------------------------------------------------------------------------------
/detection/configs/retinanet_lsnet_s_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './_base_/models/retinanet_lsnet_fpn.py',
 3 |     './_base_/datasets/coco_detection.py',
 4 |     './_base_/schedules/schedule_1x.py', 
 5 |     './_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | model = dict(
 9 |     pretrained=None,
10 |     backbone=dict(        
11 |         type='lsnet_s',
12 |         pretrained="pretrain/lsnet_s.pth",
13 |         frozen_stages=-1,
14 |         ),
15 |     neck=dict(
16 |         type='LSNetFPN',
17 |         in_channels=[96, 192, 320, 448],
18 |         out_channels=256,
19 |         start_level=0,
20 |         num_outs=5,
21 |         ))
22 | 
23 | # optimizer
24 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
25 |                  paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.),
26 |                                                  'attention_bias_idxs': dict(decay_mult=0.),
27 |                                                  }))
28 | # optimizer_config = dict(grad_clip=None)
29 | # do not use mmdet version fp16
30 | # fp16 = None
31 | optimizer_config = dict(grad_clip=None)
32 | # learning policy
33 | lr_config = dict(
34 |     policy='step',
35 |     warmup='linear',
36 |     warmup_iters=500,
37 |     warmup_ratio=0.001,
38 |     step=[8, 11])
39 | total_epochs = 12
40 | 


--------------------------------------------------------------------------------
/detection/configs/retinanet_lsnet_t_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './_base_/models/retinanet_lsnet_fpn.py',
 3 |     './_base_/datasets/coco_detection.py',
 4 |     './_base_/schedules/schedule_1x.py', 
 5 |     './_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | model = dict(
 9 |     pretrained=None,
10 |     backbone=dict(        
11 |         type='lsnet_t',
12 |         pretrained="pretrain/lsnet_t.pth",
13 |         frozen_stages=-1,
14 |         ),
15 |     neck=dict(
16 |         type='LSNetFPN',
17 |         in_channels=[64, 128, 256, 384],
18 |         out_channels=256,
19 |         start_level=0,
20 |         num_outs=5,
21 |         ))
22 | 
23 | # optimizer
24 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
25 |                  paramwise_cfg=dict(custom_keys={'attention_biases': dict(decay_mult=0.),
26 |                                                  'attention_bias_idxs': dict(decay_mult=0.),
27 |                                                  }))
28 | # optimizer_config = dict(grad_clip=None)
29 | # do not use mmdet version fp16
30 | # fp16 = None
31 | optimizer_config = dict(grad_clip=None)
32 | # learning policy
33 | lr_config = dict(
34 |     policy='step',
35 |     warmup='linear',
36 |     warmup_iters=500,
37 |     warmup_ratio=0.001,
38 |     step=[8, 11])
39 | total_epochs = 12
40 | 


--------------------------------------------------------------------------------
/detection/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | NNODES=${NNODES:-1}
 7 | NODE_RANK=${NODE_RANK:-0}
 8 | PORT=${PORT:-29500}
 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 | 
11 | PYTHONPATH="$(dirname $0)/.":$PYTHONPATH \
12 | NCCL_P2P_DISABLE=1 \
13 | python -m torch.distributed.launch \
14 |     --nnodes=$NNODES \
15 |     --node_rank=$NODE_RANK \
16 |     --master_addr=$MASTER_ADDR \
17 |     --nproc_per_node=$GPUS \
18 |     --master_port=$PORT \
19 |     $(dirname "$0")/test.py \
20 |     $CONFIG \
21 |     $CHECKPOINT \
22 |     --launcher pytorch \
23 |     ${@:4}


--------------------------------------------------------------------------------
/detection/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29500}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/.":$PYTHONPATH \
11 | NCCL_P2P_DISABLE=1 \
12 | python -m torch.distributed.launch \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/train.py \
19 |     $CONFIG \
20 |     --seed 0 \
21 |     --launcher pytorch ${@:3}


--------------------------------------------------------------------------------
/detection/eval.sh:
--------------------------------------------------------------------------------
1 | # For RetinaNet
2 | bash ./dist_test.sh configs/retinanet_lsnet_t_fpn_1x_coco.py pretrain/lsnet_t_retinanet.pth 8 --eval bbox --out results.pkl
3 | 
4 | # For Mask R-CNN
5 | bash ./dist_test.sh configs/mask_rcnn_lsnet_t_fpn_1x_coco.py pretrain/lsnet_t_maskrcnn.pth 8 --eval bbox segm --out results.pkl


--------------------------------------------------------------------------------
/detection/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .checkpoint import load_checkpoint, load_state_dict, _load_checkpoint
4 | 
5 | __all__ = ['load_checkpoint', 'load_state_dict', '_load_checkpoint']
6 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/runner/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 | from .checkpoint import save_checkpoint
3 | from .epoch_based_runner import EpochBasedRunnerAmp
4 | 
5 | 
6 | __all__ = [
7 |     'EpochBasedRunnerAmp', 'save_checkpoint'
8 | ]
9 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/runner/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | import os.path as osp
 3 | import time
 4 | from tempfile import TemporaryDirectory
 5 | 
 6 | import torch
 7 | from torch.optim import Optimizer
 8 | 
 9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 | 
13 | 
14 | def save_checkpoint(model, filename, optimizer=None, meta=None):
15 |     """Save checkpoint to file.
16 | 
17 |     The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
18 |     ``optimizer``, ``amp``. By default ``meta`` will contain version
19 |     and time info.
20 | 
21 |     Args:
22 |         model (Module): Module whose params are to be saved.
23 |         filename (str): Checkpoint filename.
24 |         optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
25 |         meta (dict, optional): Metadata to be saved in checkpoint.
26 |     """
27 |     if meta is None:
28 |         meta = {}
29 |     elif not isinstance(meta, dict):
30 |         raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
31 |     meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
32 | 
33 |     if is_module_wrapper(model):
34 |         model = model.module
35 | 
36 |     if hasattr(model, 'CLASSES') and model.CLASSES is not None:
37 |         # save class name to the meta
38 |         meta.update(CLASSES=model.CLASSES)
39 | 
40 |     checkpoint = {
41 |         'meta': meta,
42 |         'state_dict': weights_to_cpu(get_state_dict(model))
43 |     }
44 |     # save optimizer state dict in the checkpoint
45 |     if isinstance(optimizer, Optimizer):
46 |         checkpoint['optimizer'] = optimizer.state_dict()
47 |     elif isinstance(optimizer, dict):
48 |         checkpoint['optimizer'] = {}
49 |         for name, optim in optimizer.items():
50 |             checkpoint['optimizer'][name] = optim.state_dict()
51 | 
52 |     # save amp state dict in the checkpoint
53 |     checkpoint['amp'] = apex.amp.state_dict()
54 | 
55 |     if filename.startswith('pavi://'):
56 |         try:
57 |             from pavi import modelcloud
58 |             from pavi.exception import NodeNotFoundError
59 |         except ImportError:
60 |             raise ImportError(
61 |                 'Please install pavi to load checkpoint from modelcloud.')
62 |         model_path = filename[7:]
63 |         root = modelcloud.Folder()
64 |         model_dir, model_name = osp.split(model_path)
65 |         try:
66 |             model = modelcloud.get(model_dir)
67 |         except NodeNotFoundError:
68 |             model = root.create_training_model(model_dir)
69 |         with TemporaryDirectory() as tmp_dir:
70 |             checkpoint_file = osp.join(tmp_dir, model_name)
71 |             with open(checkpoint_file, 'wb') as f:
72 |                 torch.save(checkpoint, f)
73 |                 f.flush()
74 |             model.create_file(checkpoint_file, name=model_name)
75 |     else:
76 |         mmcv.mkdir_or_exist(osp.dirname(filename))
77 |         # immediately flush buffer
78 |         with open(filename, 'wb') as f:
79 |             torch.save(checkpoint, f)
80 |             f.flush()
81 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/runner/epoch_based_runner.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Open-MMLab. All rights reserved.
  2 | import os.path as osp
  3 | import platform
  4 | import shutil
  5 | 
  6 | import torch
  7 | from torch.optim import Optimizer
  8 | 
  9 | import mmcv
 10 | from mmcv.runner import RUNNERS, EpochBasedRunner
 11 | from .checkpoint import save_checkpoint
 12 | 
 13 | 
 14 | @RUNNERS.register_module()
 15 | class EpochBasedRunnerAmp(EpochBasedRunner):
 16 |     """Epoch-based Runner with AMP support.
 17 | 
 18 |     This runner train models epoch by epoch.
 19 |     """
 20 | 
 21 |     def save_checkpoint(self,
 22 |                         out_dir,
 23 |                         filename_tmpl='epoch_{}.pth',
 24 |                         save_optimizer=True,
 25 |                         meta=None,
 26 |                         create_symlink=True):
 27 |         """Save the checkpoint.
 28 | 
 29 |         Args:
 30 |             out_dir (str): The directory that checkpoints are saved.
 31 |             filename_tmpl (str, optional): The checkpoint filename template,
 32 |                 which contains a placeholder for the epoch number.
 33 |                 Defaults to 'epoch_{}.pth'.
 34 |             save_optimizer (bool, optional): Whether to save the optimizer to
 35 |                 the checkpoint. Defaults to True.
 36 |             meta (dict, optional): The meta information to be saved in the
 37 |                 checkpoint. Defaults to None.
 38 |             create_symlink (bool, optional): Whether to create a symlink
 39 |                 "latest.pth" to point to the latest checkpoint.
 40 |                 Defaults to True.
 41 |         """
 42 |         if meta is None:
 43 |             meta = dict(epoch=self.epoch + 1, iter=self.iter)
 44 |         elif isinstance(meta, dict):
 45 |             meta.update(epoch=self.epoch + 1, iter=self.iter)
 46 |         else:
 47 |             raise TypeError(
 48 |                 f'meta should be a dict or None, but got {type(meta)}')
 49 |         if self.meta is not None:
 50 |             meta.update(self.meta)
 51 | 
 52 |         filename = filename_tmpl.format(self.epoch + 1)
 53 |         filepath = osp.join(out_dir, filename)
 54 |         optimizer = self.optimizer if save_optimizer else None
 55 |         save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
 56 |         # in some environments, `os.symlink` is not supported, you may need to
 57 |         # set `create_symlink` to False
 58 |         if create_symlink:
 59 |             dst_file = osp.join(out_dir, 'latest.pth')
 60 |             if platform.system() != 'Windows':
 61 |                 mmcv.symlink(filename, dst_file)
 62 |             else:
 63 |                 shutil.copy(filepath, dst_file)
 64 | 
 65 |     def resume(self,
 66 |                checkpoint,
 67 |                resume_optimizer=True,
 68 |                map_location='default'):
 69 |         if map_location == 'default':
 70 |             if torch.cuda.is_available():
 71 |                 device_id = torch.cuda.current_device()
 72 |                 checkpoint = self.load_checkpoint(
 73 |                     checkpoint,
 74 |                     map_location=lambda storage, loc: storage.cuda(device_id))
 75 |             else:
 76 |                 checkpoint = self.load_checkpoint(checkpoint)
 77 |         else:
 78 |             checkpoint = self.load_checkpoint(
 79 |                 checkpoint, map_location=map_location)
 80 | 
 81 |         self._epoch = checkpoint['meta']['epoch']
 82 |         self._iter = checkpoint['meta']['iter']
 83 |         if 'optimizer' in checkpoint and resume_optimizer:
 84 |             if isinstance(self.optimizer, Optimizer):
 85 |                 self.optimizer.load_state_dict(checkpoint['optimizer'])
 86 |             elif isinstance(self.optimizer, dict):
 87 |                 for k in self.optimizer.keys():
 88 |                     self.optimizer[k].load_state_dict(
 89 |                         checkpoint['optimizer'][k])
 90 |             else:
 91 |                 raise TypeError(
 92 |                     'Optimizer should be dict or torch.optim.Optimizer '
 93 |                     f'but got {type(self.optimizer)}')
 94 | 
 95 |         if 'amp' in checkpoint:
 96 |             apex.amp.load_state_dict(checkpoint['amp'])
 97 |             self.logger.info('load amp state dict')
 98 | 
 99 |         self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter)
100 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/runner/optimizer.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner import OptimizerHook, HOOKS
 2 | 
 3 | @HOOKS.register_module()
 4 | class DistOptimizerHook(OptimizerHook):
 5 |     """Optimizer hook for distributed training."""
 6 | 
 7 |     def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False):
 8 |         self.grad_clip = grad_clip
 9 |         self.coalesce = coalesce
10 |         self.bucket_size_mb = bucket_size_mb
11 |         self.update_interval = update_interval
12 |         self.use_fp16 = use_fp16
13 | 
14 |     def before_run(self, runner):
15 |         runner.optimizer.zero_grad()
16 | 
17 |     def after_train_iter(self, runner):
18 |         runner.outputs['loss'] /= self.update_interval
19 |         if self.use_fp16:
20 |             with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
21 |                 scaled_loss.backward()
22 |         else:
23 |             runner.outputs['loss'].backward()
24 |         if self.every_n_iters(runner, self.update_interval):
25 |             if self.grad_clip is not None:
26 |                 self.clip_grads(runner.model.parameters())
27 |             runner.optimizer.step()
28 |             runner.optimizer.zero_grad()


--------------------------------------------------------------------------------
/detection/model/ska.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Function
  3 | import triton
  4 | import triton.language as tl
  5 | from torch.amp import custom_fwd, custom_bwd
  6 | import math
  7 | 
  8 | def _grid(numel: int, bs: int) -> tuple:
  9 |     return (triton.cdiv(numel, bs),)
 10 | 
 11 | @triton.jit
 12 | def _idx(i, n: int, c: int, h: int, w: int):
 13 |     ni = i // (c * h * w)
 14 |     ci = (i // (h * w)) % c
 15 |     hi = (i // w) % h
 16 |     wi = i % w
 17 |     m = i < (n * c * h * w)
 18 |     return ni, ci, hi, wi, m
 19 | 
 20 | @triton.jit
 21 | def ska_fwd(
 22 |     x_ptr, w_ptr, o_ptr,
 23 |     n, ic, h, w, ks, pad, wc,
 24 |     BS: tl.constexpr,
 25 |     CT: tl.constexpr, AT: tl.constexpr
 26 | ):
 27 |     pid = tl.program_id(0)
 28 |     start = pid * BS
 29 |     offs = start + tl.arange(0, BS)
 30 | 
 31 |     ni, ci, hi, wi, m = _idx(offs, n, ic, h, w)
 32 |     val = tl.zeros((BS,), dtype=AT)
 33 | 
 34 |     for kh in range(ks):
 35 |         hin = hi - pad + kh
 36 |         hb = (hin >= 0) & (hin < h)
 37 |         for kw in range(ks):
 38 |             win = wi - pad + kw
 39 |             b = hb & (win >= 0) & (win < w)
 40 | 
 41 |             x_off = ((ni * ic + ci) * h + hin) * w + win
 42 |             w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi
 43 | 
 44 |             x_val = tl.load(x_ptr + x_off, mask=m & b, other=0.0).to(CT)
 45 |             w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT)
 46 |             val += tl.where(b & m, x_val * w_val, 0.0).to(AT)
 47 | 
 48 |     tl.store(o_ptr + offs, val.to(CT), mask=m)
 49 | 
 50 | @triton.jit
 51 | def ska_bwd_x(
 52 |     go_ptr, w_ptr, gi_ptr,
 53 |     n, ic, h, w, ks, pad, wc,
 54 |     BS: tl.constexpr,
 55 |     CT: tl.constexpr, AT: tl.constexpr
 56 | ):
 57 |     pid = tl.program_id(0)
 58 |     start = pid * BS
 59 |     offs = start + tl.arange(0, BS)
 60 | 
 61 |     ni, ci, hi, wi, m = _idx(offs, n, ic, h, w)
 62 |     val = tl.zeros((BS,), dtype=AT)
 63 | 
 64 |     for kh in range(ks):
 65 |         ho = hi + pad - kh
 66 |         hb = (ho >= 0) & (ho < h)
 67 |         for kw in range(ks):
 68 |             wo = wi + pad - kw
 69 |             b = hb & (wo >= 0) & (wo < w)
 70 | 
 71 |             go_off = ((ni * ic + ci) * h + ho) * w + wo
 72 |             w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + ho * w + wo
 73 | 
 74 |             go_val = tl.load(go_ptr + go_off, mask=m & b, other=0.0).to(CT)
 75 |             w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT)
 76 |             val += tl.where(b & m, go_val * w_val, 0.0).to(AT)
 77 | 
 78 |     tl.store(gi_ptr + offs, val.to(CT), mask=m)
 79 | 
 80 | @triton.jit
 81 | def ska_bwd_w(
 82 |     go_ptr, x_ptr, gw_ptr,
 83 |     n, wc, h, w, ic, ks, pad,
 84 |     BS: tl.constexpr,
 85 |     CT: tl.constexpr, AT: tl.constexpr
 86 | ):
 87 |     pid = tl.program_id(0)
 88 |     start = pid * BS
 89 |     offs = start + tl.arange(0, BS)
 90 | 
 91 |     ni, ci, hi, wi, m = _idx(offs, n, wc, h, w)
 92 | 
 93 |     for kh in range(ks):
 94 |         hin = hi - pad + kh
 95 |         hb = (hin >= 0) & (hin < h)
 96 |         for kw in range(ks):
 97 |             win = wi - pad + kw
 98 |             b = hb & (win >= 0) & (win < w)
 99 |             w_off = ((ni * wc + ci) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi
100 | 
101 |             val = tl.zeros((BS,), dtype=AT)
102 |             steps = (ic - ci + wc - 1) // wc
103 |             for s in range(tl.max(steps, axis=0)):
104 |                 cc = ci + s * wc
105 |                 cm = (cc < ic) & m & b
106 | 
107 |                 x_off = ((ni * ic + cc) * h + hin) * w + win
108 |                 go_off = ((ni * ic + cc) * h + hi) * w + wi
109 | 
110 |                 x_val = tl.load(x_ptr + x_off, mask=cm, other=0.0).to(CT)
111 |                 go_val = tl.load(go_ptr + go_off, mask=cm, other=0.0).to(CT)
112 |                 val += tl.where(cm, x_val * go_val, 0.0).to(AT)
113 | 
114 |             tl.store(gw_ptr + w_off, val.to(CT), mask=m)
115 | 
116 | class SkaFn(Function):
117 |     @staticmethod
118 |     @custom_fwd(device_type='cuda')
119 |     def forward(ctx, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
120 |         ks = int(math.sqrt(w.shape[2]))
121 |         pad = (ks - 1) // 2
122 |         ctx.ks, ctx.pad = ks, pad
123 |         n, ic, h, width = x.shape
124 |         wc = w.shape[1]
125 |         o = torch.empty(n, ic, h, width, device=x.device, dtype=x.dtype)
126 |         numel = o.numel()
127 | 
128 |         x = x.contiguous()
129 |         w = w.contiguous()
130 | 
131 |         grid = lambda meta: _grid(numel, meta["BS"])
132 | 
133 |         ct = tl.float16 if x.dtype == torch.float16 else (tl.float32 if x.dtype == torch.float32 else tl.float64)
134 |         at = tl.float32 if x.dtype == torch.float16 else ct
135 | 
136 |         ska_fwd[grid](x, w, o, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at)
137 | 
138 |         ctx.save_for_backward(x, w)
139 |         ctx.ct, ctx.at = ct, at
140 |         return o
141 | 
142 |     @staticmethod
143 |     @custom_bwd(device_type='cuda')
144 |     def backward(ctx, go: torch.Tensor) -> tuple:
145 |         ks, pad = ctx.ks, ctx.pad
146 |         x, w = ctx.saved_tensors
147 |         n, ic, h, width = x.shape
148 |         wc = w.shape[1]
149 | 
150 |         go = go.contiguous()
151 |         gx = gw = None
152 |         ct, at = ctx.ct, ctx.at
153 | 
154 |         if ctx.needs_input_grad[0]:
155 |             gx = torch.empty_like(x)
156 |             numel = gx.numel()
157 |             ska_bwd_x[lambda meta: _grid(numel, meta["BS"])](go, w, gx, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at)
158 | 
159 |         if ctx.needs_input_grad[1]:
160 |             gw = torch.empty_like(w)
161 |             numel = gw.numel() // w.shape[2]
162 |             ska_bwd_w[lambda meta: _grid(numel, meta["BS"])](go, x, gw, n, wc, h, width, ic, ks, pad, BS=1024, CT=ct, AT=at)
163 | 
164 |         return gx, gw, None, None
165 | 
166 | class SKA(torch.nn.Module):
167 |     def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
168 |         return SkaFn.apply(x, w) # type: ignore
169 | 


--------------------------------------------------------------------------------
/detection/train.sh:
--------------------------------------------------------------------------------
1 | # For RetinaNet
2 | bash ./dist_train.sh configs/retinanet_lsnet_t_fpn_1x_coco.py 8
3 | 
4 | # For Mask R-CNN
5 | bash ./dist_train.sh configs/mask_rcnn_lsnet_t_fpn_1x_coco.py 8
6 | 


--------------------------------------------------------------------------------
/engine.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import sys
  3 | from typing import Iterable, Optional
  4 | 
  5 | import torch
  6 | 
  7 | from timm.data import Mixup
  8 | from timm.utils import accuracy, ModelEma
  9 | 
 10 | from losses import DistillationLoss
 11 | import utils
 12 | 
 13 | def set_bn_state(model):
 14 |     for m in model.modules():
 15 |         if isinstance(m, torch.nn.modules.batchnorm._BatchNorm):
 16 |             m.eval()
 17 | 
 18 | def train_one_epoch(model: torch.nn.Module, criterion: DistillationLoss,
 19 |                     data_loader: Iterable, optimizer: torch.optim.Optimizer,
 20 |                     device: torch.device, epoch: int, loss_scaler,
 21 |                     clip_grad: float = 0,
 22 |                     clip_mode: str = 'norm',
 23 |                     model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None,
 24 |                     set_training_mode=True,
 25 |                     set_bn_eval=False,):
 26 |     model.train(set_training_mode)
 27 |     if set_bn_eval:
 28 |         set_bn_state(model)
 29 |     metric_logger = utils.MetricLogger(delimiter="  ")
 30 |     metric_logger.add_meter('lr', utils.SmoothedValue(
 31 |         window_size=1, fmt='{value:.6f}'))
 32 |     header = 'Epoch: [{}]'.format(epoch)
 33 |     print_freq = 100
 34 | 
 35 |     for samples, targets in metric_logger.log_every(
 36 |             data_loader, print_freq, header):
 37 |         samples = samples.to(device, non_blocking=True)
 38 |         targets = targets.to(device, non_blocking=True)
 39 | 
 40 |         if mixup_fn is not None:
 41 |             samples, targets = mixup_fn(samples, targets)
 42 | 
 43 |         with torch.amp.autocast(enabled=False, device_type="cuda"):
 44 |             outputs = model(samples)
 45 |             loss = criterion(samples, outputs, targets)
 46 | 
 47 |         loss_value = loss.item()
 48 | 
 49 |         if not math.isfinite(loss_value):
 50 |             print("Loss is {}, stopping training".format(loss_value))
 51 |             sys.exit(1)
 52 | 
 53 |         optimizer.zero_grad()
 54 | 
 55 |         # this attribute is added by timm on one optimizer (adahessian)
 56 |         is_second_order = hasattr(
 57 |             optimizer, 'is_second_order') and optimizer.is_second_order
 58 |         loss_scaler(loss, optimizer, clip_grad=clip_grad, clip_mode=clip_mode,
 59 |                     parameters=model.parameters(), create_graph=is_second_order)
 60 | 
 61 |         torch.cuda.synchronize()
 62 |         if model_ema is not None:
 63 |             model_ema.update(model)
 64 | 
 65 |         metric_logger.update(loss=loss_value)
 66 |         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
 67 |     # gather the stats from all processes
 68 |     metric_logger.synchronize_between_processes()
 69 |     print("Averaged stats:", metric_logger)
 70 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
 71 | 
 72 | 
 73 | @torch.no_grad()
 74 | def evaluate(data_loader, model, device):
 75 |     criterion = torch.nn.CrossEntropyLoss()
 76 | 
 77 |     metric_logger = utils.MetricLogger(delimiter="  ")
 78 |     header = 'Test:'
 79 | 
 80 |     # switch to evaluation mode
 81 |     model.eval()
 82 | 
 83 |     for images, target in metric_logger.log_every(data_loader, 10, header):
 84 |         images = images.to(device, non_blocking=True)
 85 |         target = target.to(device, non_blocking=True)
 86 | 
 87 |         # compute output
 88 |         with torch.amp.autocast(enabled=False, device_type="cuda"):
 89 |             output = model(images)
 90 |             loss = criterion(output, target)
 91 | 
 92 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
 93 | 
 94 |         batch_size = images.shape[0]
 95 |         metric_logger.update(loss=loss.item())
 96 |         metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
 97 |         metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
 98 |     # gather the stats from all processes
 99 |     metric_logger.synchronize_between_processes()
100 |     print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
101 |           .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss))
102 | 
103 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
104 | 


--------------------------------------------------------------------------------
/eval.sh:
--------------------------------------------------------------------------------
1 | python main.py --eval --model lsnet_b --resume ./pretrain/lsnet_b_distill.pth --data-path ~/imagenet --distillation-type hard


--------------------------------------------------------------------------------
/eval_robust.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | set -x
 3 | 
 4 | MODEL=$1
 5 | CKPT=$2
 6 | INPUT=$3
 7 | 
 8 | export HF_ENDPOINT=https://hf-mirror.com
 9 | 
10 | python main.py --eval --model ${MODEL} --resume ${CKPT} --data-path ~/imagenet \
11 | --inc_path ~/datasets/OpenDataLab___ImageNet-C/raw \
12 | --insk_path ~/datasets/OpenDataLab___ImageNet-Sketch/raw/sketch \
13 | --ina_path ~/datasets/OpenDataLab___ImageNet-A/raw/imagenet-a \
14 | --inr_path ~/datasets/OpenDataLab___ImageNet-R/raw/imagenet-r \
15 | --batch-size 512 \
16 | --input-size ${INPUT}


--------------------------------------------------------------------------------
/flops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from timm import create_model
 3 | import model.build
 4 | import utils
 5 | from fvcore.nn import FlopCountAnalysis
 6 | from model.ska import SKA
 7 | import torch.nn.functional as F
 8 | from argparse import ArgumentParser
 9 | 
10 | def forward_flops(self, x: torch.Tensor, w: torch.Tensor):
11 |     w = w.squeeze(0)[..., 0, 0].reshape(-1, 1, 3, 3)
12 |     w = w.repeat(8, 1, 1, 1)
13 |     return F.conv2d(x, w, None, 1, 1, 1, groups=w.shape[0])
14 | 
15 | SKA.forward = forward_flops
16 | 
17 | torch.autograd.set_grad_enabled(False)
18 | 
19 | if __name__ == "__main__":
20 |     parser = ArgumentParser()
21 |     parser.add_argument("--model", default="lsnet_t", type=str)
22 |     parser.add_argument("--resolution", default=224, type=int)
23 |     
24 |     args = parser.parse_args()
25 |     model = args.model
26 |     resolution = args.resolution
27 | 
28 |     inputs = torch.randn(1, 3, resolution,
29 |                             resolution)
30 |     model = create_model(model, num_classes=1000)
31 |     utils.replace_batchnorm(model)
32 |     model.cuda()
33 |     n_parameters = sum(p.numel()
34 |                        for p in model.parameters() if p.requires_grad)
35 |     print('Number of params:', n_parameters / 1e6)
36 |     flops = FlopCountAnalysis(model, inputs.cuda()).unsupported_ops_warnings(False).uncalled_modules_warnings(False)
37 |     print("Flops: ", flops.total() / 1e9)
38 | 


--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import functional as F
 3 | 
 4 | 
 5 | class DistillationLoss(torch.nn.Module):
 6 |     """
 7 |     This module wraps a standard criterion and adds an extra knowledge distillation loss by
 8 |     taking a teacher model prediction and using it as additional supervision.
 9 |     """
10 | 
11 |     def __init__(self, base_criterion: torch.nn.Module, teacher_model: torch.nn.Module,
12 |                  distillation_type: str, alpha: float, tau: float):
13 |         super().__init__()
14 |         self.base_criterion = base_criterion
15 |         self.teacher_model = teacher_model
16 |         assert distillation_type in ['none', 'soft', 'hard']
17 |         self.distillation_type = distillation_type
18 |         self.alpha = alpha
19 |         self.tau = tau
20 | 
21 |     def forward(self, inputs, outputs, labels):
22 |         """
23 |         Args:
24 |             inputs: The original inputs that are feed to the teacher model
25 |             outputs: the outputs of the model to be trained. It is expected to be
26 |                 either a Tensor, or a Tuple[Tensor, Tensor], with the original output
27 |                 in the first position and the distillation predictions as the second output
28 |             labels: the labels for the base criterion
29 |         """
30 |         outputs_kd = None
31 |         if not isinstance(outputs, torch.Tensor):
32 |             # assume that the model outputs a tuple of [outputs, outputs_kd]
33 |             outputs, outputs_kd = outputs
34 |         base_loss = self.base_criterion(outputs, labels)
35 |         if self.distillation_type == 'none':
36 |             return base_loss
37 | 
38 |         if outputs_kd is None:
39 |             raise ValueError("When knowledge distillation is enabled, the model is "
40 |                              "expected to return a Tuple[Tensor, Tensor] with the output of the "
41 |                              "class_token and the dist_token")
42 |         # don't backprop throught the teacher
43 |         with torch.no_grad():
44 |             teacher_outputs = self.teacher_model(inputs)
45 | 
46 |         if self.distillation_type == 'soft':
47 |             T = self.tau
48 |             # taken from https://github.com/peterliht/knowledge-distillation-pytorch/blob/master/model/net.py#L100
49 |             # with slight modifications
50 |             distillation_loss = F.kl_div(
51 |                 F.log_softmax(outputs_kd / T, dim=1),
52 |                 F.log_softmax(teacher_outputs / T, dim=1),
53 |                 reduction='sum',
54 |                 log_target=True
55 |             ) * (T * T) / outputs_kd.numel()
56 |         elif self.distillation_type == 'hard':
57 |             distillation_loss = F.cross_entropy(
58 |                 outputs_kd, teacher_outputs.argmax(dim=1))
59 | 
60 |         loss = base_loss * (1 - self.alpha) + distillation_loss * self.alpha
61 |         return loss
62 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/THU-MIG/lsnet/cbe737c92b7c43ecf02d08545a07f03f1010177c/model/__init__.py


--------------------------------------------------------------------------------
/model/build.py:
--------------------------------------------------------------------------------
1 | import model.lsnet


--------------------------------------------------------------------------------
/model/ska.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Function
  3 | import triton
  4 | import triton.language as tl
  5 | from torch.amp import custom_fwd, custom_bwd
  6 | import math
  7 | 
  8 | def _grid(numel: int, bs: int) -> tuple:
  9 |     return (triton.cdiv(numel, bs),)
 10 | 
 11 | @triton.jit
 12 | def _idx(i, n: int, c: int, h: int, w: int):
 13 |     ni = i // (c * h * w)
 14 |     ci = (i // (h * w)) % c
 15 |     hi = (i // w) % h
 16 |     wi = i % w
 17 |     m = i < (n * c * h * w)
 18 |     return ni, ci, hi, wi, m
 19 | 
 20 | @triton.jit
 21 | def ska_fwd(
 22 |     x_ptr, w_ptr, o_ptr,
 23 |     n, ic, h, w, ks, pad, wc,
 24 |     BS: tl.constexpr,
 25 |     CT: tl.constexpr, AT: tl.constexpr
 26 | ):
 27 |     pid = tl.program_id(0)
 28 |     start = pid * BS
 29 |     offs = start + tl.arange(0, BS)
 30 | 
 31 |     ni, ci, hi, wi, m = _idx(offs, n, ic, h, w)
 32 |     val = tl.zeros((BS,), dtype=AT)
 33 | 
 34 |     for kh in range(ks):
 35 |         hin = hi - pad + kh
 36 |         hb = (hin >= 0) & (hin < h)
 37 |         for kw in range(ks):
 38 |             win = wi - pad + kw
 39 |             b = hb & (win >= 0) & (win < w)
 40 | 
 41 |             x_off = ((ni * ic + ci) * h + hin) * w + win
 42 |             w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi
 43 | 
 44 |             x_val = tl.load(x_ptr + x_off, mask=m & b, other=0.0).to(CT)
 45 |             w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT)
 46 |             val += tl.where(b & m, x_val * w_val, 0.0).to(AT)
 47 | 
 48 |     tl.store(o_ptr + offs, val.to(CT), mask=m)
 49 | 
 50 | @triton.jit
 51 | def ska_bwd_x(
 52 |     go_ptr, w_ptr, gi_ptr,
 53 |     n, ic, h, w, ks, pad, wc,
 54 |     BS: tl.constexpr,
 55 |     CT: tl.constexpr, AT: tl.constexpr
 56 | ):
 57 |     pid = tl.program_id(0)
 58 |     start = pid * BS
 59 |     offs = start + tl.arange(0, BS)
 60 | 
 61 |     ni, ci, hi, wi, m = _idx(offs, n, ic, h, w)
 62 |     val = tl.zeros((BS,), dtype=AT)
 63 | 
 64 |     for kh in range(ks):
 65 |         ho = hi + pad - kh
 66 |         hb = (ho >= 0) & (ho < h)
 67 |         for kw in range(ks):
 68 |             wo = wi + pad - kw
 69 |             b = hb & (wo >= 0) & (wo < w)
 70 | 
 71 |             go_off = ((ni * ic + ci) * h + ho) * w + wo
 72 |             w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + ho * w + wo
 73 | 
 74 |             go_val = tl.load(go_ptr + go_off, mask=m & b, other=0.0).to(CT)
 75 |             w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT)
 76 |             val += tl.where(b & m, go_val * w_val, 0.0).to(AT)
 77 | 
 78 |     tl.store(gi_ptr + offs, val.to(CT), mask=m)
 79 | 
 80 | @triton.jit
 81 | def ska_bwd_w(
 82 |     go_ptr, x_ptr, gw_ptr,
 83 |     n, wc, h, w, ic, ks, pad,
 84 |     BS: tl.constexpr,
 85 |     CT: tl.constexpr, AT: tl.constexpr
 86 | ):
 87 |     pid = tl.program_id(0)
 88 |     start = pid * BS
 89 |     offs = start + tl.arange(0, BS)
 90 | 
 91 |     ni, ci, hi, wi, m = _idx(offs, n, wc, h, w)
 92 | 
 93 |     for kh in range(ks):
 94 |         hin = hi - pad + kh
 95 |         hb = (hin >= 0) & (hin < h)
 96 |         for kw in range(ks):
 97 |             win = wi - pad + kw
 98 |             b = hb & (win >= 0) & (win < w)
 99 |             w_off = ((ni * wc + ci) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi
100 | 
101 |             val = tl.zeros((BS,), dtype=AT)
102 |             steps = (ic - ci + wc - 1) // wc
103 |             for s in range(tl.max(steps, axis=0)):
104 |                 cc = ci + s * wc
105 |                 cm = (cc < ic) & m & b
106 | 
107 |                 x_off = ((ni * ic + cc) * h + hin) * w + win
108 |                 go_off = ((ni * ic + cc) * h + hi) * w + wi
109 | 
110 |                 x_val = tl.load(x_ptr + x_off, mask=cm, other=0.0).to(CT)
111 |                 go_val = tl.load(go_ptr + go_off, mask=cm, other=0.0).to(CT)
112 |                 val += tl.where(cm, x_val * go_val, 0.0).to(AT)
113 | 
114 |             tl.store(gw_ptr + w_off, val.to(CT), mask=m)
115 | 
116 | class SkaFn(Function):
117 |     @staticmethod
118 |     @custom_fwd(device_type='cuda')
119 |     def forward(ctx, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
120 |         ks = int(math.sqrt(w.shape[2]))
121 |         pad = (ks - 1) // 2
122 |         ctx.ks, ctx.pad = ks, pad
123 |         n, ic, h, width = x.shape
124 |         wc = w.shape[1]
125 |         o = torch.empty(n, ic, h, width, device=x.device, dtype=x.dtype)
126 |         numel = o.numel()
127 | 
128 |         x = x.contiguous()
129 |         w = w.contiguous()
130 | 
131 |         grid = lambda meta: _grid(numel, meta["BS"])
132 | 
133 |         ct = tl.float16 if x.dtype == torch.float16 else (tl.float32 if x.dtype == torch.float32 else tl.float64)
134 |         at = tl.float32 if x.dtype == torch.float16 else ct
135 | 
136 |         ska_fwd[grid](x, w, o, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at)
137 | 
138 |         ctx.save_for_backward(x, w)
139 |         ctx.ct, ctx.at = ct, at
140 |         return o
141 | 
142 |     @staticmethod
143 |     @custom_bwd(device_type='cuda')
144 |     def backward(ctx, go: torch.Tensor) -> tuple:
145 |         ks, pad = ctx.ks, ctx.pad
146 |         x, w = ctx.saved_tensors
147 |         n, ic, h, width = x.shape
148 |         wc = w.shape[1]
149 | 
150 |         go = go.contiguous()
151 |         gx = gw = None
152 |         ct, at = ctx.ct, ctx.at
153 | 
154 |         if ctx.needs_input_grad[0]:
155 |             gx = torch.empty_like(x)
156 |             numel = gx.numel()
157 |             ska_bwd_x[lambda meta: _grid(numel, meta["BS"])](go, w, gx, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at)
158 | 
159 |         if ctx.needs_input_grad[1]:
160 |             gw = torch.empty_like(w)
161 |             numel = gw.numel() // w.shape[2]
162 |             ska_bwd_w[lambda meta: _grid(numel, meta["BS"])](go, x, gw, n, wc, h, width, ic, ks, pad, BS=1024, CT=ct, AT=at)
163 | 
164 |         return gx, gw, None, None
165 | 
166 | class SKA(torch.nn.Module):
167 |     def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
168 |         return SkaFn.apply(x, w) # type: ignore
169 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | timm==0.5.4 
 2 | einops==0.4.1
 3 | fvcore
 4 | easydict
 5 | matplotlib
 6 | numpy==1.21.0
 7 | yacs
 8 | scikit-image==0.19.3
 9 | pillow
10 | wandb
11 | torch==2.4.1


--------------------------------------------------------------------------------
/robust_utils.py:
--------------------------------------------------------------------------------
 1 | data_loaders_names = {
 2 |             'Brightness': 'brightness',
 3 |             'Contrast': 'contrast',
 4 |             'Defocus Blur': 'defocus_blur',
 5 |             'Elastic Transform': 'elastic_transform',
 6 |             'Fog': 'fog',
 7 |             'Frost': 'frost',
 8 |             'Gaussian Noise': 'gaussian_noise',
 9 |             'Glass Blur': 'glass_blur',
10 |             'Impulse Noise': 'impulse_noise',
11 |             'JPEG Compression': 'jpeg_compression',
12 |             'Motion Blur': 'motion_blur',
13 |             'Pixelate': 'pixelate',
14 |             'Shot Noise': 'shot_noise',
15 |             'Snow': 'snow',
16 |             'Zoom Blur': 'zoom_blur'
17 |         }
18 | 
19 | def get_ce_alexnet():
20 |     """Returns Corruption Error values for AlexNet"""
21 | 
22 |     ce_alexnet = dict()
23 |     ce_alexnet['Gaussian Noise'] = 0.886428
24 |     ce_alexnet['Shot Noise'] = 0.894468
25 |     ce_alexnet['Impulse Noise'] = 0.922640
26 |     ce_alexnet['Defocus Blur'] = 0.819880
27 |     ce_alexnet['Glass Blur'] = 0.826268
28 |     ce_alexnet['Motion Blur'] = 0.785948
29 |     ce_alexnet['Zoom Blur'] = 0.798360
30 |     ce_alexnet['Snow'] = 0.866816
31 |     ce_alexnet['Frost'] = 0.826572
32 |     ce_alexnet['Fog'] = 0.819324
33 |     ce_alexnet['Brightness'] = 0.564592
34 |     ce_alexnet['Contrast'] = 0.853204
35 |     ce_alexnet['Elastic Transform'] = 0.646056
36 |     ce_alexnet['Pixelate'] = 0.717840
37 |     ce_alexnet['JPEG Compression'] = 0.606500
38 | 
39 |     return ce_alexnet
40 | 
41 | def get_mce_from_accuracy(accuracy, error_alexnet):
42 |     """Computes mean Corruption Error from accuracy"""
43 |     error = 100. - accuracy
44 |     ce = error / (error_alexnet * 100.)
45 | 
46 |     return ce
47 | 


--------------------------------------------------------------------------------
/segmentation/.gitignore:
--------------------------------------------------------------------------------
1 | pretrain
2 | work_dirs
3 | data
4 | results


--------------------------------------------------------------------------------
/segmentation/README.md:
--------------------------------------------------------------------------------
 1 | # Semantic Segmentation 
 2 | 
 3 | Segmentation on ADE20K is implemented based on [MMSegmentation](https://github.com/open-mmlab/mmsegmentation).
 4 | 
 5 | ## Models
 6 | | Model | mIoU | Log |
 7 | |:-:|:-:|:-:|
 8 | | [LSNet-T](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_t_semfpn.pth) | 40.1 | [lsnet_t_semfpn.json](./logs/lsnet_t_semfpn.json) |
 9 | | [LSNet-S](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_s_semfpn.pth) | 41.6 | [lsnet_s_semfpn.json](./logs/lsnet_s_semfpn.json) |
10 | | [LSNet-B](https://huggingface.co/jameslahm/lsnet/blob/main/lsnet_b_semfpn.pth) | 43.1 | [lsnet_b_semfpn.json](./logs/lsnet_b_semfpn.json) |
11 | 
12 | ## Requirements
13 | ```bash
14 | pip install mmsegmentation==0.30.0
15 | ```
16 | 
17 | ## Data preparation
18 | 
19 | Please prepare ADE20K dataset following [insructions in MMSeg](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#prepare-datasets). 
20 | The data should appear as: 
21 | ```
22 | ├── segmentation
23 | │   ├── data
24 | │   │   ├── ade
25 | │   │   │   ├── ADEChallengeData2016
26 | │   │   │   │   ├── annotations
27 | │   │   │   │   │   ├── training
28 | │   │   │   │   │   ├── validation
29 | │   │   │   │   ├── images
30 | │   │   │   │   │   ├── training
31 | │   │   │   │   │   ├── validation
32 | 
33 | ```
34 | 
35 | ## Testing
36 | ```bash
37 | ./tools/dist_test.sh configs/sem_fpn/fpn_lsnet_b_ade20k_40k.py pretrain/lsnet_b_semfpn.pth 8 --eval mIoU
38 | ```
39 | 
40 | ## Training 
41 | Download ImageNet-1K pretrained weights into `./pretrain` 
42 | ```bash
43 | ./tools/dist_train.sh configs/sem_fpn/fpn_lsnet_b_ade20k_40k.py 8 --seed 0 --deterministic
44 | ```
45 | 
46 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/datasets/ade20k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ADE20KDataset'
 3 | data_root = 'data/ade/ADEChallengeData2016'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='AlignResize', keep_ratio=True, size_divisor=32),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type='RepeatDataset',
39 |         times=50,
40 |         dataset=dict(
41 |             type=dataset_type,
42 |             data_root=data_root,
43 |             img_dir='images/training',
44 |             ann_dir='annotations/training',
45 |             pipeline=train_pipeline)),
46 |     val=dict(
47 |         type=dataset_type,
48 |         data_root=data_root,
49 |         img_dir='images/validation',
50 |         ann_dir='annotations/validation',
51 |         pipeline=test_pipeline),
52 |     test=dict(
53 |         type=dataset_type,
54 |         data_root=data_root,
55 |         img_dir='images/validation',
56 |         ann_dir='annotations/validation',
57 |         pipeline=test_pipeline))
58 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # yapf:disable
 2 | log_config = dict(
 3 |     interval=50,
 4 |     hooks=[
 5 |         dict(type='TextLoggerHook', by_epoch=False),
 6 |         # dict(type='TensorboardLoggerHook')
 7 |     ])
 8 | # yapf:enable
 9 | dist_params = dict(backend='nccl')
10 | log_level = 'INFO'
11 | load_from = None
12 | resume_from = None
13 | workflow = [('train', 1)]
14 | cudnn_benchmark = True
15 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/models/fpn_r50.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 3 | model = dict(
 4 |     type='EncoderDecoder',
 5 |     pretrained='open-mmlab://resnet50_v1c',
 6 |     backbone=dict(
 7 |         type='ResNetV1c',
 8 |         depth=50,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         dilations=(1, 1, 1, 1),
12 |         strides=(1, 2, 2, 2),
13 |         norm_cfg=norm_cfg,
14 |         norm_eval=False,
15 |         style='pytorch',
16 |         contract_dilation=True),
17 |     neck=dict(
18 |         type='FPN',
19 |         in_channels=[256, 512, 1024, 2048],
20 |         out_channels=256,
21 |         num_outs=4),
22 |     decode_head=dict(
23 |         type='FPNHead',
24 |         in_channels=[256, 256, 256, 256],
25 |         in_index=[0, 1, 2, 3],
26 |         feature_strides=[4, 8, 16, 32],
27 |         channels=128,
28 |         dropout_ratio=0.1,
29 |         num_classes=19,
30 |         norm_cfg=norm_cfg,
31 |         align_corners=False,
32 |         loss_decode=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
34 |     # model training and testing settings
35 |     train_cfg=dict(),
36 |     test_cfg=dict(mode='whole'))
37 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=160000)
 8 | checkpoint_config = dict(by_epoch=False, interval=16000)
 9 | evaluation = dict(interval=16000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_20k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=20000)
 8 | checkpoint_config = dict(by_epoch=False, interval=2000)
 9 | evaluation = dict(interval=2000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=40000)
 8 | checkpoint_config = dict(by_epoch=False, interval=4000)
 9 | evaluation = dict(interval=4000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=80000)
 8 | checkpoint_config = dict(by_epoch=False, interval=8000)
 9 | evaluation = dict(interval=8000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/segmentation/configs/sem_fpn/fpn_lsnet_b_ade20k_40k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/fpn_r50.py',
 3 |     '../_base_/datasets/ade20k.py',
 4 |     '../_base_/default_runtime.py'
 5 | ]
 6 | # model settings
 7 | model = dict(
 8 |     pretrained=None,
 9 |     type='EncoderDecoder',
10 |     backbone=dict(
11 |         type='lsnet_b',
12 |         style='pytorch',
13 |         pretrained= 'pretrain/lsnet_b.pth',
14 |         frozen_stages=-1,
15 |     ),
16 |     neck=dict(
17 |         type='LSNetFPN',
18 |         in_channels=[128, 256, 384, 512],
19 |         out_channels=256,
20 |         num_outs=4,
21 |         # num_extra_trans_convs=1,
22 |         ),
23 |     decode_head=dict(num_classes=150))
24 | 
25 | gpu_multiples = 2  # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2
26 | # optimizer
27 | optimizer = dict(type='AdamW', lr=0.0001 * gpu_multiples, weight_decay=0.0001)
28 | optimizer_config = dict()
29 | # learning policy
30 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-6, by_epoch=False)
31 | # runtime settings
32 | runner = dict(type='IterBasedRunner', max_iters=80000 // gpu_multiples)
33 | checkpoint_config = dict(by_epoch=False, interval=8000 // gpu_multiples)
34 | evaluation = dict(interval=8000 // gpu_multiples, metric='mIoU')


--------------------------------------------------------------------------------
/segmentation/configs/sem_fpn/fpn_lsnet_s_ade20k_40k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/fpn_r50.py',
 3 |     '../_base_/datasets/ade20k.py',
 4 |     '../_base_/default_runtime.py'
 5 | ]
 6 | # model settings
 7 | model = dict(
 8 |     pretrained=None,
 9 |     type='EncoderDecoder',
10 |     backbone=dict(
11 |         type='lsnet_s',
12 |         style='pytorch',
13 |         pretrained= 'pretrain/lsnet_s.pth',
14 |         frozen_stages=-1,
15 |     ),
16 |     neck=dict(
17 |         type='LSNetFPN',
18 |         in_channels=[96, 192, 320, 448],
19 |         out_channels=256,
20 |         num_outs=4
21 |         ),
22 |     decode_head=dict(num_classes=150))
23 | 
24 | gpu_multiples = 2  # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2
25 | # optimizer
26 | optimizer = dict(type='AdamW', lr=0.0001 * gpu_multiples, weight_decay=0.0001)
27 | optimizer_config = dict()
28 | # learning policy
29 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-6, by_epoch=False)
30 | # runtime settings
31 | runner = dict(type='IterBasedRunner', max_iters=80000 // gpu_multiples)
32 | checkpoint_config = dict(by_epoch=False, interval=8000 // gpu_multiples)
33 | evaluation = dict(interval=8000 // gpu_multiples, metric='mIoU')
34 | 


--------------------------------------------------------------------------------
/segmentation/configs/sem_fpn/fpn_lsnet_t_ade20k_40k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/models/fpn_r50.py',
 3 |     '../_base_/datasets/ade20k.py',
 4 |     '../_base_/default_runtime.py'
 5 | ]
 6 | # model settings
 7 | model = dict(
 8 |     pretrained=None,
 9 |     type='EncoderDecoder',
10 |     backbone=dict(
11 |         type='lsnet_t',
12 |         style='pytorch',
13 |         pretrained= 'pretrain/lsnet_t.pth',
14 |         frozen_stages=-1,
15 |     ),
16 |     neck=dict(
17 |         type='LSNetFPN',
18 |         in_channels=[64, 128, 256, 384],
19 |         out_channels=256,
20 |         num_outs=4
21 |         ),
22 |     decode_head=dict(num_classes=150))
23 | 
24 | gpu_multiples = 2  # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2
25 | # optimizer
26 | optimizer = dict(type='AdamW', lr=0.0001 * gpu_multiples, weight_decay=0.0001)
27 | optimizer_config = dict()
28 | # learning policy
29 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-6, by_epoch=False)
30 | # runtime settings
31 | runner = dict(type='IterBasedRunner', max_iters=80000 // gpu_multiples)
32 | checkpoint_config = dict(by_epoch=False, interval=8000 // gpu_multiples)
33 | evaluation = dict(interval=8000 // gpu_multiples, metric='mIoU')
34 | 


--------------------------------------------------------------------------------
/segmentation/eval.sh:
--------------------------------------------------------------------------------
1 | PORT=12345 ./tools/dist_test.sh configs/sem_fpn/fpn_lsnet_t_ade20k_40k.py pretrain/lsnet_t_semfpn.pth 8 --eval mIoU


--------------------------------------------------------------------------------
/segmentation/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .checkpoint import load_checkpoint, load_state_dict, _load_checkpoint
4 | 
5 | __all__ = ['load_checkpoint', 'load_state_dict', '_load_checkpoint']
6 | 


--------------------------------------------------------------------------------
/segmentation/mmcv_custom/runner/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 | from .checkpoint import save_checkpoint
3 | from .epoch_based_runner import EpochBasedRunnerAmp
4 | 
5 | 
6 | __all__ = [
7 |     'EpochBasedRunnerAmp', 'save_checkpoint'
8 | ]
9 | 


--------------------------------------------------------------------------------
/segmentation/mmcv_custom/runner/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | import os.path as osp
 3 | import time
 4 | from tempfile import TemporaryDirectory
 5 | 
 6 | import torch
 7 | from torch.optim import Optimizer
 8 | 
 9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 | 
13 | 
14 | def save_checkpoint(model, filename, optimizer=None, meta=None):
15 |     """Save checkpoint to file.
16 | 
17 |     The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
18 |     ``optimizer``, ``amp``. By default ``meta`` will contain version
19 |     and time info.
20 | 
21 |     Args:
22 |         model (Module): Module whose params are to be saved.
23 |         filename (str): Checkpoint filename.
24 |         optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
25 |         meta (dict, optional): Metadata to be saved in checkpoint.
26 |     """
27 |     if meta is None:
28 |         meta = {}
29 |     elif not isinstance(meta, dict):
30 |         raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
31 |     meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
32 | 
33 |     if is_module_wrapper(model):
34 |         model = model.module
35 | 
36 |     if hasattr(model, 'CLASSES') and model.CLASSES is not None:
37 |         # save class name to the meta
38 |         meta.update(CLASSES=model.CLASSES)
39 | 
40 |     checkpoint = {
41 |         'meta': meta,
42 |         'state_dict': weights_to_cpu(get_state_dict(model))
43 |     }
44 |     # save optimizer state dict in the checkpoint
45 |     if isinstance(optimizer, Optimizer):
46 |         checkpoint['optimizer'] = optimizer.state_dict()
47 |     elif isinstance(optimizer, dict):
48 |         checkpoint['optimizer'] = {}
49 |         for name, optim in optimizer.items():
50 |             checkpoint['optimizer'][name] = optim.state_dict()
51 | 
52 |     # save amp state dict in the checkpoint
53 |     checkpoint['amp'] = apex.amp.state_dict()
54 | 
55 |     if filename.startswith('pavi://'):
56 |         try:
57 |             from pavi import modelcloud
58 |             from pavi.exception import NodeNotFoundError
59 |         except ImportError:
60 |             raise ImportError(
61 |                 'Please install pavi to load checkpoint from modelcloud.')
62 |         model_path = filename[7:]
63 |         root = modelcloud.Folder()
64 |         model_dir, model_name = osp.split(model_path)
65 |         try:
66 |             model = modelcloud.get(model_dir)
67 |         except NodeNotFoundError:
68 |             model = root.create_training_model(model_dir)
69 |         with TemporaryDirectory() as tmp_dir:
70 |             checkpoint_file = osp.join(tmp_dir, model_name)
71 |             with open(checkpoint_file, 'wb') as f:
72 |                 torch.save(checkpoint, f)
73 |                 f.flush()
74 |             model.create_file(checkpoint_file, name=model_name)
75 |     else:
76 |         mmcv.mkdir_or_exist(osp.dirname(filename))
77 |         # immediately flush buffer
78 |         with open(filename, 'wb') as f:
79 |             torch.save(checkpoint, f)
80 |             f.flush()
81 | 


--------------------------------------------------------------------------------
/segmentation/mmcv_custom/runner/epoch_based_runner.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Open-MMLab. All rights reserved.
  2 | import os.path as osp
  3 | import platform
  4 | import shutil
  5 | 
  6 | import torch
  7 | from torch.optim import Optimizer
  8 | 
  9 | import mmcv
 10 | from mmcv.runner import RUNNERS, EpochBasedRunner
 11 | from .checkpoint import save_checkpoint
 12 | 
 13 | 
 14 | @RUNNERS.register_module()
 15 | class EpochBasedRunnerAmp(EpochBasedRunner):
 16 |     """Epoch-based Runner with AMP support.
 17 | 
 18 |     This runner train models epoch by epoch.
 19 |     """
 20 | 
 21 |     def save_checkpoint(self,
 22 |                         out_dir,
 23 |                         filename_tmpl='epoch_{}.pth',
 24 |                         save_optimizer=True,
 25 |                         meta=None,
 26 |                         create_symlink=True):
 27 |         """Save the checkpoint.
 28 | 
 29 |         Args:
 30 |             out_dir (str): The directory that checkpoints are saved.
 31 |             filename_tmpl (str, optional): The checkpoint filename template,
 32 |                 which contains a placeholder for the epoch number.
 33 |                 Defaults to 'epoch_{}.pth'.
 34 |             save_optimizer (bool, optional): Whether to save the optimizer to
 35 |                 the checkpoint. Defaults to True.
 36 |             meta (dict, optional): The meta information to be saved in the
 37 |                 checkpoint. Defaults to None.
 38 |             create_symlink (bool, optional): Whether to create a symlink
 39 |                 "latest.pth" to point to the latest checkpoint.
 40 |                 Defaults to True.
 41 |         """
 42 |         if meta is None:
 43 |             meta = dict(epoch=self.epoch + 1, iter=self.iter)
 44 |         elif isinstance(meta, dict):
 45 |             meta.update(epoch=self.epoch + 1, iter=self.iter)
 46 |         else:
 47 |             raise TypeError(
 48 |                 f'meta should be a dict or None, but got {type(meta)}')
 49 |         if self.meta is not None:
 50 |             meta.update(self.meta)
 51 | 
 52 |         filename = filename_tmpl.format(self.epoch + 1)
 53 |         filepath = osp.join(out_dir, filename)
 54 |         optimizer = self.optimizer if save_optimizer else None
 55 |         save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
 56 |         # in some environments, `os.symlink` is not supported, you may need to
 57 |         # set `create_symlink` to False
 58 |         if create_symlink:
 59 |             dst_file = osp.join(out_dir, 'latest.pth')
 60 |             if platform.system() != 'Windows':
 61 |                 mmcv.symlink(filename, dst_file)
 62 |             else:
 63 |                 shutil.copy(filepath, dst_file)
 64 | 
 65 |     def resume(self,
 66 |                checkpoint,
 67 |                resume_optimizer=True,
 68 |                map_location='default'):
 69 |         if map_location == 'default':
 70 |             if torch.cuda.is_available():
 71 |                 device_id = torch.cuda.current_device()
 72 |                 checkpoint = self.load_checkpoint(
 73 |                     checkpoint,
 74 |                     map_location=lambda storage, loc: storage.cuda(device_id))
 75 |             else:
 76 |                 checkpoint = self.load_checkpoint(checkpoint)
 77 |         else:
 78 |             checkpoint = self.load_checkpoint(
 79 |                 checkpoint, map_location=map_location)
 80 | 
 81 |         self._epoch = checkpoint['meta']['epoch']
 82 |         self._iter = checkpoint['meta']['iter']
 83 |         if 'optimizer' in checkpoint and resume_optimizer:
 84 |             if isinstance(self.optimizer, Optimizer):
 85 |                 self.optimizer.load_state_dict(checkpoint['optimizer'])
 86 |             elif isinstance(self.optimizer, dict):
 87 |                 for k in self.optimizer.keys():
 88 |                     self.optimizer[k].load_state_dict(
 89 |                         checkpoint['optimizer'][k])
 90 |             else:
 91 |                 raise TypeError(
 92 |                     'Optimizer should be dict or torch.optim.Optimizer '
 93 |                     f'but got {type(self.optimizer)}')
 94 | 
 95 |         if 'amp' in checkpoint:
 96 |             apex.amp.load_state_dict(checkpoint['amp'])
 97 |             self.logger.info('load amp state dict')
 98 | 
 99 |         self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter)
100 | 


--------------------------------------------------------------------------------
/segmentation/mmcv_custom/runner/optimizer.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner import OptimizerHook, HOOKS
 2 | 
 3 | @HOOKS.register_module()
 4 | class DistOptimizerHook(OptimizerHook):
 5 |     """Optimizer hook for distributed training."""
 6 | 
 7 |     def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False):
 8 |         self.grad_clip = grad_clip
 9 |         self.coalesce = coalesce
10 |         self.bucket_size_mb = bucket_size_mb
11 |         self.update_interval = update_interval
12 |         self.use_fp16 = use_fp16
13 | 
14 |     def before_run(self, runner):
15 |         runner.optimizer.zero_grad()
16 | 
17 |     def after_train_iter(self, runner):
18 |         runner.outputs['loss'] /= self.update_interval
19 |         if self.use_fp16:
20 |             with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
21 |                 scaled_loss.backward()
22 |         else:
23 |             runner.outputs['loss'].backward()
24 |         if self.every_n_iters(runner, self.update_interval):
25 |             if self.grad_clip is not None:
26 |                 self.clip_grads(runner.model.parameters())
27 |             runner.optimizer.step()
28 |             runner.optimizer.zero_grad()


--------------------------------------------------------------------------------
/segmentation/model/ska.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Function
  3 | import triton
  4 | import triton.language as tl
  5 | from torch.amp import custom_fwd, custom_bwd
  6 | import math
  7 | 
  8 | def _grid(numel: int, bs: int) -> tuple:
  9 |     return (triton.cdiv(numel, bs),)
 10 | 
 11 | @triton.jit
 12 | def _idx(i, n: int, c: int, h: int, w: int):
 13 |     ni = i // (c * h * w)
 14 |     ci = (i // (h * w)) % c
 15 |     hi = (i // w) % h
 16 |     wi = i % w
 17 |     m = i < (n * c * h * w)
 18 |     return ni, ci, hi, wi, m
 19 | 
 20 | @triton.jit
 21 | def ska_fwd(
 22 |     x_ptr, w_ptr, o_ptr,
 23 |     n, ic, h, w, ks, pad, wc,
 24 |     BS: tl.constexpr,
 25 |     CT: tl.constexpr, AT: tl.constexpr
 26 | ):
 27 |     pid = tl.program_id(0)
 28 |     start = pid * BS
 29 |     offs = start + tl.arange(0, BS)
 30 | 
 31 |     ni, ci, hi, wi, m = _idx(offs, n, ic, h, w)
 32 |     val = tl.zeros((BS,), dtype=AT)
 33 | 
 34 |     for kh in range(ks):
 35 |         hin = hi - pad + kh
 36 |         hb = (hin >= 0) & (hin < h)
 37 |         for kw in range(ks):
 38 |             win = wi - pad + kw
 39 |             b = hb & (win >= 0) & (win < w)
 40 | 
 41 |             x_off = ((ni * ic + ci) * h + hin) * w + win
 42 |             w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi
 43 | 
 44 |             x_val = tl.load(x_ptr + x_off, mask=m & b, other=0.0).to(CT)
 45 |             w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT)
 46 |             val += tl.where(b & m, x_val * w_val, 0.0).to(AT)
 47 | 
 48 |     tl.store(o_ptr + offs, val.to(CT), mask=m)
 49 | 
 50 | @triton.jit
 51 | def ska_bwd_x(
 52 |     go_ptr, w_ptr, gi_ptr,
 53 |     n, ic, h, w, ks, pad, wc,
 54 |     BS: tl.constexpr,
 55 |     CT: tl.constexpr, AT: tl.constexpr
 56 | ):
 57 |     pid = tl.program_id(0)
 58 |     start = pid * BS
 59 |     offs = start + tl.arange(0, BS)
 60 | 
 61 |     ni, ci, hi, wi, m = _idx(offs, n, ic, h, w)
 62 |     val = tl.zeros((BS,), dtype=AT)
 63 | 
 64 |     for kh in range(ks):
 65 |         ho = hi + pad - kh
 66 |         hb = (ho >= 0) & (ho < h)
 67 |         for kw in range(ks):
 68 |             wo = wi + pad - kw
 69 |             b = hb & (wo >= 0) & (wo < w)
 70 | 
 71 |             go_off = ((ni * ic + ci) * h + ho) * w + wo
 72 |             w_off = ((ni * wc + ci % wc) * ks * ks + (kh * ks + kw)) * h * w + ho * w + wo
 73 | 
 74 |             go_val = tl.load(go_ptr + go_off, mask=m & b, other=0.0).to(CT)
 75 |             w_val = tl.load(w_ptr + w_off, mask=m, other=0.0).to(CT)
 76 |             val += tl.where(b & m, go_val * w_val, 0.0).to(AT)
 77 | 
 78 |     tl.store(gi_ptr + offs, val.to(CT), mask=m)
 79 | 
 80 | @triton.jit
 81 | def ska_bwd_w(
 82 |     go_ptr, x_ptr, gw_ptr,
 83 |     n, wc, h, w, ic, ks, pad,
 84 |     BS: tl.constexpr,
 85 |     CT: tl.constexpr, AT: tl.constexpr
 86 | ):
 87 |     pid = tl.program_id(0)
 88 |     start = pid * BS
 89 |     offs = start + tl.arange(0, BS)
 90 | 
 91 |     ni, ci, hi, wi, m = _idx(offs, n, wc, h, w)
 92 | 
 93 |     for kh in range(ks):
 94 |         hin = hi - pad + kh
 95 |         hb = (hin >= 0) & (hin < h)
 96 |         for kw in range(ks):
 97 |             win = wi - pad + kw
 98 |             b = hb & (win >= 0) & (win < w)
 99 |             w_off = ((ni * wc + ci) * ks * ks + (kh * ks + kw)) * h * w + hi * w + wi
100 | 
101 |             val = tl.zeros((BS,), dtype=AT)
102 |             steps = (ic - ci + wc - 1) // wc
103 |             for s in range(tl.max(steps, axis=0)):
104 |                 cc = ci + s * wc
105 |                 cm = (cc < ic) & m & b
106 | 
107 |                 x_off = ((ni * ic + cc) * h + hin) * w + win
108 |                 go_off = ((ni * ic + cc) * h + hi) * w + wi
109 | 
110 |                 x_val = tl.load(x_ptr + x_off, mask=cm, other=0.0).to(CT)
111 |                 go_val = tl.load(go_ptr + go_off, mask=cm, other=0.0).to(CT)
112 |                 val += tl.where(cm, x_val * go_val, 0.0).to(AT)
113 | 
114 |             tl.store(gw_ptr + w_off, val.to(CT), mask=m)
115 | 
116 | class SkaFn(Function):
117 |     @staticmethod
118 |     @custom_fwd(device_type='cuda')
119 |     def forward(ctx, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
120 |         ks = int(math.sqrt(w.shape[2]))
121 |         pad = (ks - 1) // 2
122 |         ctx.ks, ctx.pad = ks, pad
123 |         n, ic, h, width = x.shape
124 |         wc = w.shape[1]
125 |         o = torch.empty(n, ic, h, width, device=x.device, dtype=x.dtype)
126 |         numel = o.numel()
127 | 
128 |         x = x.contiguous()
129 |         w = w.contiguous()
130 | 
131 |         grid = lambda meta: _grid(numel, meta["BS"])
132 | 
133 |         ct = tl.float16 if x.dtype == torch.float16 else (tl.float32 if x.dtype == torch.float32 else tl.float64)
134 |         at = tl.float32 if x.dtype == torch.float16 else ct
135 | 
136 |         ska_fwd[grid](x, w, o, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at)
137 | 
138 |         ctx.save_for_backward(x, w)
139 |         ctx.ct, ctx.at = ct, at
140 |         return o
141 | 
142 |     @staticmethod
143 |     @custom_bwd(device_type='cuda')
144 |     def backward(ctx, go: torch.Tensor) -> tuple:
145 |         ks, pad = ctx.ks, ctx.pad
146 |         x, w = ctx.saved_tensors
147 |         n, ic, h, width = x.shape
148 |         wc = w.shape[1]
149 | 
150 |         go = go.contiguous()
151 |         gx = gw = None
152 |         ct, at = ctx.ct, ctx.at
153 | 
154 |         if ctx.needs_input_grad[0]:
155 |             gx = torch.empty_like(x)
156 |             numel = gx.numel()
157 |             ska_bwd_x[lambda meta: _grid(numel, meta["BS"])](go, w, gx, n, ic, h, width, ks, pad, wc, BS=1024, CT=ct, AT=at)
158 | 
159 |         if ctx.needs_input_grad[1]:
160 |             gw = torch.empty_like(w)
161 |             numel = gw.numel() // w.shape[2]
162 |             ska_bwd_w[lambda meta: _grid(numel, meta["BS"])](go, x, gw, n, wc, h, width, ic, ks, pad, BS=1024, CT=ct, AT=at)
163 | 
164 |         return gx, gw, None, None
165 | 
166 | class SKA(torch.nn.Module):
167 |     def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
168 |         return SkaFn.apply(x, w) # type: ignore
169 | 


--------------------------------------------------------------------------------
/segmentation/tools/analyze_logs.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | """Modified from https://github.com/open-
  3 | mmlab/mmdetection/blob/master/tools/analysis_tools/analyze_logs.py."""
  4 | import argparse
  5 | import json
  6 | from collections import defaultdict
  7 | 
  8 | import matplotlib.pyplot as plt
  9 | import seaborn as sns
 10 | 
 11 | 
 12 | def plot_curve(log_dicts, args):
 13 |     if args.backend is not None:
 14 |         plt.switch_backend(args.backend)
 15 |     sns.set_style(args.style)
 16 |     # if legend is None, use {filename}_{key} as legend
 17 |     legend = args.legend
 18 |     if legend is None:
 19 |         legend = []
 20 |         for json_log in args.json_logs:
 21 |             for metric in args.keys:
 22 |                 legend.append(f'{json_log}_{metric}')
 23 |     assert len(legend) == (len(args.json_logs) * len(args.keys))
 24 |     metrics = args.keys
 25 | 
 26 |     num_metrics = len(metrics)
 27 |     for i, log_dict in enumerate(log_dicts):
 28 |         epochs = list(log_dict.keys())
 29 |         for j, metric in enumerate(metrics):
 30 |             print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
 31 |             plot_epochs = []
 32 |             plot_iters = []
 33 |             plot_values = []
 34 |             # In some log files, iters number is not correct, `pre_iter` is
 35 |             # used to prevent generate wrong lines.
 36 |             pre_iter = -1
 37 |             for epoch in epochs:
 38 |                 epoch_logs = log_dict[epoch]
 39 |                 if metric not in epoch_logs.keys():
 40 |                     continue
 41 |                 if metric in ['mIoU', 'mAcc', 'aAcc']:
 42 |                     plot_epochs.append(epoch)
 43 |                     plot_values.append(epoch_logs[metric][0])
 44 |                 else:
 45 |                     for idx in range(len(epoch_logs[metric])):
 46 |                         if pre_iter > epoch_logs['iter'][idx]:
 47 |                             continue
 48 |                         pre_iter = epoch_logs['iter'][idx]
 49 |                         plot_iters.append(epoch_logs['iter'][idx])
 50 |                         plot_values.append(epoch_logs[metric][idx])
 51 |             ax = plt.gca()
 52 |             label = legend[i * num_metrics + j]
 53 |             if metric in ['mIoU', 'mAcc', 'aAcc']:
 54 |                 ax.set_xticks(plot_epochs)
 55 |                 plt.xlabel('epoch')
 56 |                 plt.plot(plot_epochs, plot_values, label=label, marker='o')
 57 |             else:
 58 |                 plt.xlabel('iter')
 59 |                 plt.plot(plot_iters, plot_values, label=label, linewidth=0.5)
 60 |         plt.legend()
 61 |         if args.title is not None:
 62 |             plt.title(args.title)
 63 |     if args.out is None:
 64 |         plt.show()
 65 |     else:
 66 |         print(f'save curve to: {args.out}')
 67 |         plt.savefig(args.out)
 68 |         plt.cla()
 69 | 
 70 | 
 71 | def parse_args():
 72 |     parser = argparse.ArgumentParser(description='Analyze Json Log')
 73 |     parser.add_argument(
 74 |         'json_logs',
 75 |         type=str,
 76 |         nargs='+',
 77 |         help='path of train log in json format')
 78 |     parser.add_argument(
 79 |         '--keys',
 80 |         type=str,
 81 |         nargs='+',
 82 |         default=['mIoU'],
 83 |         help='the metric that you want to plot')
 84 |     parser.add_argument('--title', type=str, help='title of figure')
 85 |     parser.add_argument(
 86 |         '--legend',
 87 |         type=str,
 88 |         nargs='+',
 89 |         default=None,
 90 |         help='legend of each plot')
 91 |     parser.add_argument(
 92 |         '--backend', type=str, default=None, help='backend of plt')
 93 |     parser.add_argument(
 94 |         '--style', type=str, default='dark', help='style of plt')
 95 |     parser.add_argument('--out', type=str, default=None)
 96 |     args = parser.parse_args()
 97 |     return args
 98 | 
 99 | 
100 | def load_json_logs(json_logs):
101 |     # load and convert json_logs to log_dict, key is epoch, value is a sub dict
102 |     # keys of sub dict is different metrics
103 |     # value of sub dict is a list of corresponding values of all iterations
104 |     log_dicts = [dict() for _ in json_logs]
105 |     for json_log, log_dict in zip(json_logs, log_dicts):
106 |         with open(json_log, 'r') as log_file:
107 |             for line in log_file:
108 |                 log = json.loads(line.strip())
109 |                 # skip lines without `epoch` field
110 |                 if 'epoch' not in log:
111 |                     continue
112 |                 epoch = log.pop('epoch')
113 |                 if epoch not in log_dict:
114 |                     log_dict[epoch] = defaultdict(list)
115 |                 for k, v in log.items():
116 |                     log_dict[epoch][k].append(v)
117 |     return log_dicts
118 | 
119 | 
120 | def main():
121 |     args = parse_args()
122 |     json_logs = args.json_logs
123 |     for json_log in json_logs:
124 |         assert json_log.endswith('.json')
125 |     log_dicts = load_json_logs(json_logs)
126 |     plot_curve(log_dicts, args)
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     main()
131 | 


--------------------------------------------------------------------------------
/segmentation/tools/benchmark.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import time
 4 | 
 5 | import torch
 6 | from mmcv import Config
 7 | from mmcv.parallel import MMDataParallel
 8 | from mmcv.runner import load_checkpoint, wrap_fp16_model
 9 | 
10 | from mmseg.datasets import build_dataloader, build_dataset
11 | from mmseg.models import build_segmentor
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description='MMSeg benchmark a model')
16 |     parser.add_argument('config', help='test config file path')
17 |     parser.add_argument('checkpoint', help='checkpoint file')
18 |     parser.add_argument(
19 |         '--log-interval', type=int, default=50, help='interval of logging')
20 |     args = parser.parse_args()
21 |     return args
22 | 
23 | 
24 | def main():
25 |     args = parse_args()
26 | 
27 |     cfg = Config.fromfile(args.config)
28 |     # set cudnn_benchmark
29 |     torch.backends.cudnn.benchmark = False
30 |     cfg.model.pretrained = None
31 |     cfg.data.test.test_mode = True
32 | 
33 |     # build the dataloader
34 |     # TODO: support multiple images per gpu (only minor changes are needed)
35 |     dataset = build_dataset(cfg.data.test)
36 |     data_loader = build_dataloader(
37 |         dataset,
38 |         samples_per_gpu=1,
39 |         workers_per_gpu=cfg.data.workers_per_gpu,
40 |         dist=False,
41 |         shuffle=False)
42 | 
43 |     # build the model and load checkpoint
44 |     cfg.model.train_cfg = None
45 |     model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg'))
46 |     fp16_cfg = cfg.get('fp16', None)
47 |     if fp16_cfg is not None:
48 |         wrap_fp16_model(model)
49 |     load_checkpoint(model, args.checkpoint, map_location='cpu')
50 | 
51 |     model = MMDataParallel(model, device_ids=[0])
52 | 
53 |     model.eval()
54 | 
55 |     # the first several iterations may be very slow so skip them
56 |     num_warmup = 5
57 |     pure_inf_time = 0
58 |     total_iters = 200
59 | 
60 |     # benchmark with 200 image and take the average
61 |     for i, data in enumerate(data_loader):
62 | 
63 |         torch.cuda.synchronize()
64 |         start_time = time.perf_counter()
65 | 
66 |         with torch.no_grad():
67 |             model(return_loss=False, rescale=True, **data)
68 | 
69 |         torch.cuda.synchronize()
70 |         elapsed = time.perf_counter() - start_time
71 | 
72 |         if i >= num_warmup:
73 |             pure_inf_time += elapsed
74 |             if (i + 1) % args.log_interval == 0:
75 |                 fps = (i + 1 - num_warmup) / pure_inf_time
76 |                 print(f'Done image [{i + 1:<3}/ {total_iters}], '
77 |                       f'fps: {fps:.2f} img / s')
78 | 
79 |         if (i + 1) == total_iters:
80 |             fps = (i + 1 - num_warmup) / pure_inf_time
81 |             print(f'Overall fps: {fps:.2f} img / s')
82 |             break
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     main()
87 | 


--------------------------------------------------------------------------------
/segmentation/tools/browse_dataset.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import warnings
  4 | from pathlib import Path
  5 | 
  6 | import mmcv
  7 | import numpy as np
  8 | from mmcv import Config
  9 | 
 10 | from mmseg.datasets.builder import build_dataset
 11 | 
 12 | 
 13 | def parse_args():
 14 |     parser = argparse.ArgumentParser(description='Browse a dataset')
 15 |     parser.add_argument('config', help='train config file path')
 16 |     parser.add_argument(
 17 |         '--show-origin',
 18 |         default=False,
 19 |         action='store_true',
 20 |         help='if True, omit all augmentation in pipeline,'
 21 |         ' show origin image and seg map')
 22 |     parser.add_argument(
 23 |         '--skip-type',
 24 |         type=str,
 25 |         nargs='+',
 26 |         default=['DefaultFormatBundle', 'Normalize', 'Collect'],
 27 |         help='skip some useless pipeline，if `show-origin` is true, '
 28 |         'all pipeline except `Load` will be skipped')
 29 |     parser.add_argument(
 30 |         '--output-dir',
 31 |         default='./output',
 32 |         type=str,
 33 |         help='If there is no display interface, you can save it')
 34 |     parser.add_argument('--show', default=False, action='store_true')
 35 |     parser.add_argument(
 36 |         '--show-interval',
 37 |         type=int,
 38 |         default=999,
 39 |         help='the interval of show (ms)')
 40 |     parser.add_argument(
 41 |         '--opacity',
 42 |         type=float,
 43 |         default=0.5,
 44 |         help='the opacity of semantic map')
 45 |     args = parser.parse_args()
 46 |     return args
 47 | 
 48 | 
 49 | def imshow_semantic(img,
 50 |                     seg,
 51 |                     class_names,
 52 |                     palette=None,
 53 |                     win_name='',
 54 |                     show=False,
 55 |                     wait_time=0,
 56 |                     out_file=None,
 57 |                     opacity=0.5):
 58 |     """Draw `result` over `img`.
 59 | 
 60 |     Args:
 61 |         img (str or Tensor): The image to be displayed.
 62 |         seg (Tensor): The semantic segmentation results to draw over
 63 |             `img`.
 64 |         class_names (list[str]): Names of each classes.
 65 |         palette (list[list[int]]] | np.ndarray | None): The palette of
 66 |             segmentation map. If None is given, random palette will be
 67 |             generated. Default: None
 68 |         win_name (str): The window name.
 69 |         wait_time (int): Value of waitKey param.
 70 |             Default: 0.
 71 |         show (bool): Whether to show the image.
 72 |             Default: False.
 73 |         out_file (str or None): The filename to write the image.
 74 |             Default: None.
 75 |         opacity(float): Opacity of painted segmentation map.
 76 |             Default 0.5.
 77 |             Must be in (0, 1] range.
 78 |     Returns:
 79 |         img (Tensor): Only if not `show` or `out_file`
 80 |     """
 81 |     img = mmcv.imread(img)
 82 |     img = img.copy()
 83 |     if palette is None:
 84 |         palette = np.random.randint(0, 255, size=(len(class_names), 3))
 85 |     palette = np.array(palette)
 86 |     assert palette.shape[0] == len(class_names)
 87 |     assert palette.shape[1] == 3
 88 |     assert len(palette.shape) == 2
 89 |     assert 0 < opacity <= 1.0
 90 |     color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
 91 |     for label, color in enumerate(palette):
 92 |         color_seg[seg == label, :] = color
 93 |     # convert to BGR
 94 |     color_seg = color_seg[..., ::-1]
 95 | 
 96 |     img = img * (1 - opacity) + color_seg * opacity
 97 |     img = img.astype(np.uint8)
 98 |     # if out_file specified, do not show image in window
 99 |     if out_file is not None:
100 |         show = False
101 | 
102 |     if show:
103 |         mmcv.imshow(img, win_name, wait_time)
104 |     if out_file is not None:
105 |         mmcv.imwrite(img, out_file)
106 | 
107 |     if not (show or out_file):
108 |         warnings.warn('show==False and out_file is not specified, only '
109 |                       'result image will be returned')
110 |         return img
111 | 
112 | 
113 | def _retrieve_data_cfg(_data_cfg, skip_type, show_origin):
114 |     if show_origin is True:
115 |         # only keep pipeline of Loading data and ann
116 |         _data_cfg['pipeline'] = [
117 |             x for x in _data_cfg.pipeline if 'Load' in x['type']
118 |         ]
119 |     else:
120 |         _data_cfg['pipeline'] = [
121 |             x for x in _data_cfg.pipeline if x['type'] not in skip_type
122 |         ]
123 | 
124 | 
125 | def retrieve_data_cfg(config_path, skip_type, show_origin=False):
126 |     cfg = Config.fromfile(config_path)
127 |     train_data_cfg = cfg.data.train
128 |     if isinstance(train_data_cfg, list):
129 |         for _data_cfg in train_data_cfg:
130 |             if 'pipeline' in _data_cfg:
131 |                 _retrieve_data_cfg(_data_cfg, skip_type, show_origin)
132 |             elif 'dataset' in _data_cfg:
133 |                 _retrieve_data_cfg(_data_cfg['dataset'], skip_type,
134 |                                    show_origin)
135 |             else:
136 |                 raise ValueError
137 |     elif 'dataset' in train_data_cfg:
138 |         _retrieve_data_cfg(train_data_cfg['dataset'], skip_type, show_origin)
139 |     else:
140 |         _retrieve_data_cfg(train_data_cfg, skip_type, show_origin)
141 |     return cfg
142 | 
143 | 
144 | def main():
145 |     args = parse_args()
146 |     cfg = retrieve_data_cfg(args.config, args.skip_type, args.show_origin)
147 |     dataset = build_dataset(cfg.data.train)
148 |     progress_bar = mmcv.ProgressBar(len(dataset))
149 |     for item in dataset:
150 |         filename = os.path.join(args.output_dir,
151 |                                 Path(item['filename']).name
152 |                                 ) if args.output_dir is not None else None
153 |         imshow_semantic(
154 |             item['img'],
155 |             item['gt_semantic_seg'],
156 |             dataset.CLASSES,
157 |             dataset.PALETTE,
158 |             show=args.show,
159 |             wait_time=args.show_interval,
160 |             out_file=filename,
161 |             opacity=args.opacity,
162 |         )
163 |         progress_bar.update()
164 | 
165 | 
166 | if __name__ == '__main__':
167 |     main()
168 | 


--------------------------------------------------------------------------------
/segmentation/tools/convert_datasets/chase_db1.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os
 4 | import os.path as osp
 5 | import tempfile
 6 | import zipfile
 7 | 
 8 | import mmcv
 9 | 
10 | CHASE_DB1_LEN = 28 * 3
11 | TRAINING_LEN = 60
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(
16 |         description='Convert CHASE_DB1 dataset to mmsegmentation format')
17 |     parser.add_argument('dataset_path', help='path of CHASEDB1.zip')
18 |     parser.add_argument('--tmp_dir', help='path of the temporary directory')
19 |     parser.add_argument('-o', '--out_dir', help='output path')
20 |     args = parser.parse_args()
21 |     return args
22 | 
23 | 
24 | def main():
25 |     args = parse_args()
26 |     dataset_path = args.dataset_path
27 |     if args.out_dir is None:
28 |         out_dir = osp.join('data', 'CHASE_DB1')
29 |     else:
30 |         out_dir = args.out_dir
31 | 
32 |     print('Making directories...')
33 |     mmcv.mkdir_or_exist(out_dir)
34 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
35 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
36 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
37 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
38 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
39 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
40 | 
41 |     with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
42 |         print('Extracting CHASEDB1.zip...')
43 |         zip_file = zipfile.ZipFile(dataset_path)
44 |         zip_file.extractall(tmp_dir)
45 | 
46 |         print('Generating training dataset...')
47 | 
48 |         assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
49 |             'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN)
50 | 
51 |         for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
52 |             img = mmcv.imread(osp.join(tmp_dir, img_name))
53 |             if osp.splitext(img_name)[1] == '.jpg':
54 |                 mmcv.imwrite(
55 |                     img,
56 |                     osp.join(out_dir, 'images', 'training',
57 |                              osp.splitext(img_name)[0] + '.png'))
58 |             else:
59 |                 # The annotation img should be divided by 128, because some of
60 |                 # the annotation imgs are not standard. We should set a
61 |                 # threshold to convert the nonstandard annotation imgs. The
62 |                 # value divided by 128 is equivalent to '1 if value >= 128
63 |                 # else 0'
64 |                 mmcv.imwrite(
65 |                     img[:, :, 0] // 128,
66 |                     osp.join(out_dir, 'annotations', 'training',
67 |                              osp.splitext(img_name)[0] + '.png'))
68 | 
69 |         for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
70 |             img = mmcv.imread(osp.join(tmp_dir, img_name))
71 |             if osp.splitext(img_name)[1] == '.jpg':
72 |                 mmcv.imwrite(
73 |                     img,
74 |                     osp.join(out_dir, 'images', 'validation',
75 |                              osp.splitext(img_name)[0] + '.png'))
76 |             else:
77 |                 mmcv.imwrite(
78 |                     img[:, :, 0] // 128,
79 |                     osp.join(out_dir, 'annotations', 'validation',
80 |                              osp.splitext(img_name)[0] + '.png'))
81 | 
82 |         print('Removing the temporary files...')
83 | 
84 |     print('Done!')
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     main()
89 | 


--------------------------------------------------------------------------------
/segmentation/tools/convert_datasets/cityscapes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | 
 5 | import mmcv
 6 | from cityscapesscripts.preparation.json2labelImg import json2labelImg
 7 | 
 8 | 
 9 | def convert_json_to_label(json_file):
10 |     label_file = json_file.replace('_polygons.json', '_labelTrainIds.png')
11 |     json2labelImg(json_file, label_file, 'trainIds')
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(
16 |         description='Convert Cityscapes annotations to TrainIds')
17 |     parser.add_argument('cityscapes_path', help='cityscapes data path')
18 |     parser.add_argument('--gt-dir', default='gtFine', type=str)
19 |     parser.add_argument('-o', '--out-dir', help='output path')
20 |     parser.add_argument(
21 |         '--nproc', default=1, type=int, help='number of process')
22 |     args = parser.parse_args()
23 |     return args
24 | 
25 | 
26 | def main():
27 |     args = parse_args()
28 |     cityscapes_path = args.cityscapes_path
29 |     out_dir = args.out_dir if args.out_dir else cityscapes_path
30 |     mmcv.mkdir_or_exist(out_dir)
31 | 
32 |     gt_dir = osp.join(cityscapes_path, args.gt_dir)
33 | 
34 |     poly_files = []
35 |     for poly in mmcv.scandir(gt_dir, '_polygons.json', recursive=True):
36 |         poly_file = osp.join(gt_dir, poly)
37 |         poly_files.append(poly_file)
38 |     if args.nproc > 1:
39 |         mmcv.track_parallel_progress(convert_json_to_label, poly_files,
40 |                                      args.nproc)
41 |     else:
42 |         mmcv.track_progress(convert_json_to_label, poly_files)
43 | 
44 |     split_names = ['train', 'val', 'test']
45 | 
46 |     for split in split_names:
47 |         filenames = []
48 |         for poly in mmcv.scandir(
49 |                 osp.join(gt_dir, split), '_polygons.json', recursive=True):
50 |             filenames.append(poly.replace('_gtFine_polygons.json', ''))
51 |         with open(osp.join(out_dir, f'{split}.txt'), 'w') as f:
52 |             f.writelines(f + '\n' for f in filenames)
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     main()
57 | 


--------------------------------------------------------------------------------
/segmentation/tools/convert_datasets/drive.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os
  4 | import os.path as osp
  5 | import tempfile
  6 | import zipfile
  7 | 
  8 | import cv2
  9 | import mmcv
 10 | 
 11 | 
 12 | def parse_args():
 13 |     parser = argparse.ArgumentParser(
 14 |         description='Convert DRIVE dataset to mmsegmentation format')
 15 |     parser.add_argument(
 16 |         'training_path', help='the training part of DRIVE dataset')
 17 |     parser.add_argument(
 18 |         'testing_path', help='the testing part of DRIVE dataset')
 19 |     parser.add_argument('--tmp_dir', help='path of the temporary directory')
 20 |     parser.add_argument('-o', '--out_dir', help='output path')
 21 |     args = parser.parse_args()
 22 |     return args
 23 | 
 24 | 
 25 | def main():
 26 |     args = parse_args()
 27 |     training_path = args.training_path
 28 |     testing_path = args.testing_path
 29 |     if args.out_dir is None:
 30 |         out_dir = osp.join('data', 'DRIVE')
 31 |     else:
 32 |         out_dir = args.out_dir
 33 | 
 34 |     print('Making directories...')
 35 |     mmcv.mkdir_or_exist(out_dir)
 36 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
 37 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
 38 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
 39 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
 40 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
 41 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
 42 | 
 43 |     with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
 44 |         print('Extracting training.zip...')
 45 |         zip_file = zipfile.ZipFile(training_path)
 46 |         zip_file.extractall(tmp_dir)
 47 | 
 48 |         print('Generating training dataset...')
 49 |         now_dir = osp.join(tmp_dir, 'training', 'images')
 50 |         for img_name in os.listdir(now_dir):
 51 |             img = mmcv.imread(osp.join(now_dir, img_name))
 52 |             mmcv.imwrite(
 53 |                 img,
 54 |                 osp.join(
 55 |                     out_dir, 'images', 'training',
 56 |                     osp.splitext(img_name)[0].replace('_training', '') +
 57 |                     '.png'))
 58 | 
 59 |         now_dir = osp.join(tmp_dir, 'training', '1st_manual')
 60 |         for img_name in os.listdir(now_dir):
 61 |             cap = cv2.VideoCapture(osp.join(now_dir, img_name))
 62 |             ret, img = cap.read()
 63 |             mmcv.imwrite(
 64 |                 img[:, :, 0] // 128,
 65 |                 osp.join(out_dir, 'annotations', 'training',
 66 |                          osp.splitext(img_name)[0] + '.png'))
 67 | 
 68 |         print('Extracting test.zip...')
 69 |         zip_file = zipfile.ZipFile(testing_path)
 70 |         zip_file.extractall(tmp_dir)
 71 | 
 72 |         print('Generating validation dataset...')
 73 |         now_dir = osp.join(tmp_dir, 'test', 'images')
 74 |         for img_name in os.listdir(now_dir):
 75 |             img = mmcv.imread(osp.join(now_dir, img_name))
 76 |             mmcv.imwrite(
 77 |                 img,
 78 |                 osp.join(
 79 |                     out_dir, 'images', 'validation',
 80 |                     osp.splitext(img_name)[0].replace('_test', '') + '.png'))
 81 | 
 82 |         now_dir = osp.join(tmp_dir, 'test', '1st_manual')
 83 |         if osp.exists(now_dir):
 84 |             for img_name in os.listdir(now_dir):
 85 |                 cap = cv2.VideoCapture(osp.join(now_dir, img_name))
 86 |                 ret, img = cap.read()
 87 |                 # The annotation img should be divided by 128, because some of
 88 |                 # the annotation imgs are not standard. We should set a
 89 |                 # threshold to convert the nonstandard annotation imgs. The
 90 |                 # value divided by 128 is equivalent to '1 if value >= 128
 91 |                 # else 0'
 92 |                 mmcv.imwrite(
 93 |                     img[:, :, 0] // 128,
 94 |                     osp.join(out_dir, 'annotations', 'validation',
 95 |                              osp.splitext(img_name)[0] + '.png'))
 96 | 
 97 |         now_dir = osp.join(tmp_dir, 'test', '2nd_manual')
 98 |         if osp.exists(now_dir):
 99 |             for img_name in os.listdir(now_dir):
100 |                 cap = cv2.VideoCapture(osp.join(now_dir, img_name))
101 |                 ret, img = cap.read()
102 |                 mmcv.imwrite(
103 |                     img[:, :, 0] // 128,
104 |                     osp.join(out_dir, 'annotations', 'validation',
105 |                              osp.splitext(img_name)[0] + '.png'))
106 | 
107 |         print('Removing the temporary files...')
108 | 
109 |     print('Done!')
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     main()
114 | 


--------------------------------------------------------------------------------
/segmentation/tools/convert_datasets/hrf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os
  4 | import os.path as osp
  5 | import tempfile
  6 | import zipfile
  7 | 
  8 | import mmcv
  9 | 
 10 | HRF_LEN = 15
 11 | TRAINING_LEN = 5
 12 | 
 13 | 
 14 | def parse_args():
 15 |     parser = argparse.ArgumentParser(
 16 |         description='Convert HRF dataset to mmsegmentation format')
 17 |     parser.add_argument('healthy_path', help='the path of healthy.zip')
 18 |     parser.add_argument(
 19 |         'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip')
 20 |     parser.add_argument('glaucoma_path', help='the path of glaucoma.zip')
 21 |     parser.add_argument(
 22 |         'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip')
 23 |     parser.add_argument(
 24 |         'diabetic_retinopathy_path',
 25 |         help='the path of diabetic_retinopathy.zip')
 26 |     parser.add_argument(
 27 |         'diabetic_retinopathy_manualsegm_path',
 28 |         help='the path of diabetic_retinopathy_manualsegm.zip')
 29 |     parser.add_argument('--tmp_dir', help='path of the temporary directory')
 30 |     parser.add_argument('-o', '--out_dir', help='output path')
 31 |     args = parser.parse_args()
 32 |     return args
 33 | 
 34 | 
 35 | def main():
 36 |     args = parse_args()
 37 |     images_path = [
 38 |         args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path
 39 |     ]
 40 |     annotations_path = [
 41 |         args.healthy_manualsegm_path, args.glaucoma_manualsegm_path,
 42 |         args.diabetic_retinopathy_manualsegm_path
 43 |     ]
 44 |     if args.out_dir is None:
 45 |         out_dir = osp.join('data', 'HRF')
 46 |     else:
 47 |         out_dir = args.out_dir
 48 | 
 49 |     print('Making directories...')
 50 |     mmcv.mkdir_or_exist(out_dir)
 51 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'images'))
 52 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
 53 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
 54 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations'))
 55 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
 56 |     mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
 57 | 
 58 |     print('Generating images...')
 59 |     for now_path in images_path:
 60 |         with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
 61 |             zip_file = zipfile.ZipFile(now_path)
 62 |             zip_file.extractall(tmp_dir)
 63 | 
 64 |             assert len(os.listdir(tmp_dir)) == HRF_LEN, \
 65 |                 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN)
 66 | 
 67 |             for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
 68 |                 img = mmcv.imread(osp.join(tmp_dir, filename))
 69 |                 mmcv.imwrite(
 70 |                     img,
 71 |                     osp.join(out_dir, 'images', 'training',
 72 |                              osp.splitext(filename)[0] + '.png'))
 73 |             for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
 74 |                 img = mmcv.imread(osp.join(tmp_dir, filename))
 75 |                 mmcv.imwrite(
 76 |                     img,
 77 |                     osp.join(out_dir, 'images', 'validation',
 78 |                              osp.splitext(filename)[0] + '.png'))
 79 | 
 80 |     print('Generating annotations...')
 81 |     for now_path in annotations_path:
 82 |         with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
 83 |             zip_file = zipfile.ZipFile(now_path)
 84 |             zip_file.extractall(tmp_dir)
 85 | 
 86 |             assert len(os.listdir(tmp_dir)) == HRF_LEN, \
 87 |                 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN)
 88 | 
 89 |             for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
 90 |                 img = mmcv.imread(osp.join(tmp_dir, filename))
 91 |                 # The annotation img should be divided by 128, because some of
 92 |                 # the annotation imgs are not standard. We should set a
 93 |                 # threshold to convert the nonstandard annotation imgs. The
 94 |                 # value divided by 128 is equivalent to '1 if value >= 128
 95 |                 # else 0'
 96 |                 mmcv.imwrite(
 97 |                     img[:, :, 0] // 128,
 98 |                     osp.join(out_dir, 'annotations', 'training',
 99 |                              osp.splitext(filename)[0] + '.png'))
100 |             for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
101 |                 img = mmcv.imread(osp.join(tmp_dir, filename))
102 |                 mmcv.imwrite(
103 |                     img[:, :, 0] // 128,
104 |                     osp.join(out_dir, 'annotations', 'validation',
105 |                              osp.splitext(filename)[0] + '.png'))
106 | 
107 |     print('Done!')
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     main()
112 | 


--------------------------------------------------------------------------------
/segmentation/tools/convert_datasets/pascal_context.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | from functools import partial
 5 | 
 6 | import mmcv
 7 | import numpy as np
 8 | from detail import Detail
 9 | from PIL import Image
10 | 
11 | _mapping = np.sort(
12 |     np.array([
13 |         0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
14 |         158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
15 |         440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
16 |         85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
17 |     ]))
18 | _key = np.array(range(len(_mapping))).astype('uint8')
19 | 
20 | 
21 | def generate_labels(img_id, detail, out_dir):
22 | 
23 |     def _class_to_index(mask, _mapping, _key):
24 |         # assert the values
25 |         values = np.unique(mask)
26 |         for i in range(len(values)):
27 |             assert (values[i] in _mapping)
28 |         index = np.digitize(mask.ravel(), _mapping, right=True)
29 |         return _key[index].reshape(mask.shape)
30 | 
31 |     mask = Image.fromarray(
32 |         _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key))
33 |     filename = img_id['file_name']
34 |     mask.save(osp.join(out_dir, filename.replace('jpg', 'png')))
35 |     return osp.splitext(osp.basename(filename))[0]
36 | 
37 | 
38 | def parse_args():
39 |     parser = argparse.ArgumentParser(
40 |         description='Convert PASCAL VOC annotations to mmsegmentation format')
41 |     parser.add_argument('devkit_path', help='pascal voc devkit path')
42 |     parser.add_argument('json_path', help='annoation json filepath')
43 |     parser.add_argument('-o', '--out_dir', help='output path')
44 |     args = parser.parse_args()
45 |     return args
46 | 
47 | 
48 | def main():
49 |     args = parse_args()
50 |     devkit_path = args.devkit_path
51 |     if args.out_dir is None:
52 |         out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
53 |     else:
54 |         out_dir = args.out_dir
55 |     json_path = args.json_path
56 |     mmcv.mkdir_or_exist(out_dir)
57 |     img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')
58 | 
59 |     train_detail = Detail(json_path, img_dir, 'train')
60 |     train_ids = train_detail.getImgs()
61 | 
62 |     val_detail = Detail(json_path, img_dir, 'val')
63 |     val_ids = val_detail.getImgs()
64 | 
65 |     mmcv.mkdir_or_exist(
66 |         osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))
67 | 
68 |     train_list = mmcv.track_progress(
69 |         partial(generate_labels, detail=train_detail, out_dir=out_dir),
70 |         train_ids)
71 |     with open(
72 |             osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
73 |                      'train.txt'), 'w') as f:
74 |         f.writelines(line + '\n' for line in sorted(train_list))
75 | 
76 |     val_list = mmcv.track_progress(
77 |         partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
78 |     with open(
79 |             osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
80 |                      'val.txt'), 'w') as f:
81 |         f.writelines(line + '\n' for line in sorted(val_list))
82 | 
83 |     print('Done!')
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     main()
88 | 


--------------------------------------------------------------------------------
/segmentation/tools/convert_datasets/voc_aug.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | from functools import partial
 5 | 
 6 | import mmcv
 7 | import numpy as np
 8 | from PIL import Image
 9 | from scipy.io import loadmat
10 | 
11 | AUG_LEN = 10582
12 | 
13 | 
14 | def convert_mat(mat_file, in_dir, out_dir):
15 |     data = loadmat(osp.join(in_dir, mat_file))
16 |     mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8)
17 |     seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png'))
18 |     Image.fromarray(mask).save(seg_filename, 'PNG')
19 | 
20 | 
21 | def generate_aug_list(merged_list, excluded_list):
22 |     return list(set(merged_list) - set(excluded_list))
23 | 
24 | 
25 | def parse_args():
26 |     parser = argparse.ArgumentParser(
27 |         description='Convert PASCAL VOC annotations to mmsegmentation format')
28 |     parser.add_argument('devkit_path', help='pascal voc devkit path')
29 |     parser.add_argument('aug_path', help='pascal voc aug path')
30 |     parser.add_argument('-o', '--out_dir', help='output path')
31 |     parser.add_argument(
32 |         '--nproc', default=1, type=int, help='number of process')
33 |     args = parser.parse_args()
34 |     return args
35 | 
36 | 
37 | def main():
38 |     args = parse_args()
39 |     devkit_path = args.devkit_path
40 |     aug_path = args.aug_path
41 |     nproc = args.nproc
42 |     if args.out_dir is None:
43 |         out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug')
44 |     else:
45 |         out_dir = args.out_dir
46 |     mmcv.mkdir_or_exist(out_dir)
47 |     in_dir = osp.join(aug_path, 'dataset', 'cls')
48 | 
49 |     mmcv.track_parallel_progress(
50 |         partial(convert_mat, in_dir=in_dir, out_dir=out_dir),
51 |         list(mmcv.scandir(in_dir, suffix='.mat')),
52 |         nproc=nproc)
53 | 
54 |     full_aug_list = []
55 |     with open(osp.join(aug_path, 'dataset', 'train.txt')) as f:
56 |         full_aug_list += [line.strip() for line in f]
57 |     with open(osp.join(aug_path, 'dataset', 'val.txt')) as f:
58 |         full_aug_list += [line.strip() for line in f]
59 | 
60 |     with open(
61 |             osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
62 |                      'train.txt')) as f:
63 |         ori_train_list = [line.strip() for line in f]
64 |     with open(
65 |             osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
66 |                      'val.txt')) as f:
67 |         val_list = [line.strip() for line in f]
68 | 
69 |     aug_train_list = generate_aug_list(ori_train_list + full_aug_list,
70 |                                        val_list)
71 |     assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format(
72 |         AUG_LEN)
73 | 
74 |     with open(
75 |             osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
76 |                      'trainaug.txt'), 'w') as f:
77 |         f.writelines(line + '\n' for line in aug_train_list)
78 | 
79 |     aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list)
80 |     assert len(aug_list) == AUG_LEN - len(
81 |         ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN -
82 |                                                       len(ori_train_list))
83 |     with open(
84 |             osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'),
85 |             'w') as f:
86 |         f.writelines(line + '\n' for line in aug_list)
87 | 
88 |     print('Done!')
89 | 
90 | 
91 | if __name__ == '__main__':
92 |     main()
93 | 


--------------------------------------------------------------------------------
/segmentation/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | NCCL_P2P_DISABLE=1 \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/segmentation/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29500}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | NCCL_P2P_DISABLE=1 \
12 | python -m torch.distributed.launch \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/train.py \
19 |     $CONFIG \
20 |     --launcher pytorch ${@:3}
21 | 


--------------------------------------------------------------------------------
/segmentation/tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | from mmcv import Config
 5 | from mmcv.cnn import get_model_complexity_info
 6 | 
 7 | from mmseg.models import build_segmentor
 8 | import sys 
 9 | sys.path.append("..") 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='Train a segmentor')
13 |     parser.add_argument('config', help='train config file path')
14 |     parser.add_argument(
15 |         '--shape',
16 |         type=int,
17 |         nargs='+',
18 |         default=[512, 512],
19 |         help='input image size')
20 |     args = parser.parse_args()
21 |     return args
22 | 
23 | 
24 | def main():
25 | 
26 |     args = parse_args()
27 | 
28 |     if len(args.shape) == 1:
29 |         input_shape = (3, args.shape[0], args.shape[0])
30 |     elif len(args.shape) == 2:
31 |         input_shape = (3, ) + tuple(args.shape)
32 |     else:
33 |         raise ValueError('invalid input shape')
34 | 
35 |     cfg = Config.fromfile(args.config)
36 |     cfg.model.pretrained = None
37 |     model = build_segmentor(
38 |         cfg.model,
39 |         train_cfg=cfg.get('train_cfg'),
40 |         test_cfg=cfg.get('test_cfg')).cuda()
41 |     model.eval()
42 | 
43 |     if hasattr(model, 'forward_dummy'):
44 |         model.forward = model.forward_dummy
45 |     else:
46 |         raise NotImplementedError(
47 |             'FLOPs counter is currently not currently supported with {}'.
48 |             format(model.__class__.__name__))
49 | 
50 |     flops, params = get_model_complexity_info(model, input_shape)
51 |     split_line = '=' * 30
52 |     print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
53 |         split_line, input_shape, flops, params))
54 |     print('!!!Please be cautious if you use the results in papers. '
55 |           'You may need to check if all ops are supported and verify that the '
56 |           'flops computation is correct.')
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main()
61 | 


--------------------------------------------------------------------------------
/segmentation/tools/model_converters/mit2mmseg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | from collections import OrderedDict
 5 | 
 6 | import mmcv
 7 | import torch
 8 | from mmcv.runner import CheckpointLoader
 9 | 
10 | 
11 | def convert_mit(ckpt):
12 |     new_ckpt = OrderedDict()
13 |     # Process the concat between q linear weights and kv linear weights
14 |     for k, v in ckpt.items():
15 |         if k.startswith('head'):
16 |             continue
17 |         # patch embedding conversion
18 |         elif k.startswith('patch_embed'):
19 |             stage_i = int(k.split('.')[0].replace('patch_embed', ''))
20 |             new_k = k.replace(f'patch_embed{stage_i}', f'layers.{stage_i-1}.0')
21 |             new_v = v
22 |             if 'proj.' in new_k:
23 |                 new_k = new_k.replace('proj.', 'projection.')
24 |         # transformer encoder layer conversion
25 |         elif k.startswith('block'):
26 |             stage_i = int(k.split('.')[0].replace('block', ''))
27 |             new_k = k.replace(f'block{stage_i}', f'layers.{stage_i-1}.1')
28 |             new_v = v
29 |             if 'attn.q.' in new_k:
30 |                 sub_item_k = k.replace('q.', 'kv.')
31 |                 new_k = new_k.replace('q.', 'attn.in_proj_')
32 |                 new_v = torch.cat([v, ckpt[sub_item_k]], dim=0)
33 |             elif 'attn.kv.' in new_k:
34 |                 continue
35 |             elif 'attn.proj.' in new_k:
36 |                 new_k = new_k.replace('proj.', 'attn.out_proj.')
37 |             elif 'attn.sr.' in new_k:
38 |                 new_k = new_k.replace('sr.', 'sr.')
39 |             elif 'mlp.' in new_k:
40 |                 string = f'{new_k}-'
41 |                 new_k = new_k.replace('mlp.', 'ffn.layers.')
42 |                 if 'fc1.weight' in new_k or 'fc2.weight' in new_k:
43 |                     new_v = v.reshape((*v.shape, 1, 1))
44 |                 new_k = new_k.replace('fc1.', '0.')
45 |                 new_k = new_k.replace('dwconv.dwconv.', '1.')
46 |                 new_k = new_k.replace('fc2.', '4.')
47 |                 string += f'{new_k} {v.shape}-{new_v.shape}'
48 |         # norm layer conversion
49 |         elif k.startswith('norm'):
50 |             stage_i = int(k.split('.')[0].replace('norm', ''))
51 |             new_k = k.replace(f'norm{stage_i}', f'layers.{stage_i-1}.2')
52 |             new_v = v
53 |         else:
54 |             new_k = k
55 |             new_v = v
56 |         new_ckpt[new_k] = new_v
57 |     return new_ckpt
58 | 
59 | 
60 | def main():
61 |     parser = argparse.ArgumentParser(
62 |         description='Convert keys in official pretrained segformer to '
63 |         'MMSegmentation style.')
64 |     parser.add_argument('src', help='src model path or url')
65 |     # The dst path must be a full path of the new checkpoint.
66 |     parser.add_argument('dst', help='save path')
67 |     args = parser.parse_args()
68 | 
69 |     checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
70 |     if 'state_dict' in checkpoint:
71 |         state_dict = checkpoint['state_dict']
72 |     elif 'model' in checkpoint:
73 |         state_dict = checkpoint['model']
74 |     else:
75 |         state_dict = checkpoint
76 |     weight = convert_mit(state_dict)
77 |     mmcv.mkdir_or_exist(osp.dirname(args.dst))
78 |     torch.save(weight, args.dst)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     main()
83 | 


--------------------------------------------------------------------------------
/segmentation/tools/model_converters/swin2mmseg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | from collections import OrderedDict
 5 | 
 6 | import mmcv
 7 | import torch
 8 | from mmcv.runner import CheckpointLoader
 9 | 
10 | 
11 | def convert_swin(ckpt):
12 |     new_ckpt = OrderedDict()
13 | 
14 |     def correct_unfold_reduction_order(x):
15 |         out_channel, in_channel = x.shape
16 |         x = x.reshape(out_channel, 4, in_channel // 4)
17 |         x = x[:, [0, 2, 1, 3], :].transpose(1,
18 |                                             2).reshape(out_channel, in_channel)
19 |         return x
20 | 
21 |     def correct_unfold_norm_order(x):
22 |         in_channel = x.shape[0]
23 |         x = x.reshape(4, in_channel // 4)
24 |         x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel)
25 |         return x
26 | 
27 |     for k, v in ckpt.items():
28 |         if k.startswith('head'):
29 |             continue
30 |         elif k.startswith('layers'):
31 |             new_v = v
32 |             if 'attn.' in k:
33 |                 new_k = k.replace('attn.', 'attn.w_msa.')
34 |             elif 'mlp.' in k:
35 |                 if 'mlp.fc1.' in k:
36 |                     new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.')
37 |                 elif 'mlp.fc2.' in k:
38 |                     new_k = k.replace('mlp.fc2.', 'ffn.layers.1.')
39 |                 else:
40 |                     new_k = k.replace('mlp.', 'ffn.')
41 |             elif 'downsample' in k:
42 |                 new_k = k
43 |                 if 'reduction.' in k:
44 |                     new_v = correct_unfold_reduction_order(v)
45 |                 elif 'norm.' in k:
46 |                     new_v = correct_unfold_norm_order(v)
47 |             else:
48 |                 new_k = k
49 |             new_k = new_k.replace('layers', 'stages', 1)
50 |         elif k.startswith('patch_embed'):
51 |             new_v = v
52 |             if 'proj' in k:
53 |                 new_k = k.replace('proj', 'projection')
54 |             else:
55 |                 new_k = k
56 |         else:
57 |             new_v = v
58 |             new_k = k
59 | 
60 |         new_ckpt[new_k] = new_v
61 | 
62 |     return new_ckpt
63 | 
64 | 
65 | def main():
66 |     parser = argparse.ArgumentParser(
67 |         description='Convert keys in official pretrained swin models to'
68 |         'MMSegmentation style.')
69 |     parser.add_argument('src', help='src model path or url')
70 |     # The dst path must be a full path of the new checkpoint.
71 |     parser.add_argument('dst', help='save path')
72 |     args = parser.parse_args()
73 | 
74 |     checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
75 |     if 'state_dict' in checkpoint:
76 |         state_dict = checkpoint['state_dict']
77 |     elif 'model' in checkpoint:
78 |         state_dict = checkpoint['model']
79 |     else:
80 |         state_dict = checkpoint
81 |     weight = convert_swin(state_dict)
82 |     mmcv.mkdir_or_exist(osp.dirname(args.dst))
83 |     torch.save(weight, args.dst)
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     main()
88 | 


--------------------------------------------------------------------------------
/segmentation/tools/model_converters/vit2mmseg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import os.path as osp
 4 | from collections import OrderedDict
 5 | 
 6 | import mmcv
 7 | import torch
 8 | from mmcv.runner import CheckpointLoader
 9 | 
10 | 
11 | def convert_vit(ckpt):
12 | 
13 |     new_ckpt = OrderedDict()
14 | 
15 |     for k, v in ckpt.items():
16 |         if k.startswith('head'):
17 |             continue
18 |         if k.startswith('norm'):
19 |             new_k = k.replace('norm.', 'ln1.')
20 |         elif k.startswith('patch_embed'):
21 |             if 'proj' in k:
22 |                 new_k = k.replace('proj', 'projection')
23 |             else:
24 |                 new_k = k
25 |         elif k.startswith('blocks'):
26 |             if 'norm' in k:
27 |                 new_k = k.replace('norm', 'ln')
28 |             elif 'mlp.fc1' in k:
29 |                 new_k = k.replace('mlp.fc1', 'ffn.layers.0.0')
30 |             elif 'mlp.fc2' in k:
31 |                 new_k = k.replace('mlp.fc2', 'ffn.layers.1')
32 |             elif 'attn.qkv' in k:
33 |                 new_k = k.replace('attn.qkv.', 'attn.attn.in_proj_')
34 |             elif 'attn.proj' in k:
35 |                 new_k = k.replace('attn.proj', 'attn.attn.out_proj')
36 |             else:
37 |                 new_k = k
38 |             new_k = new_k.replace('blocks.', 'layers.')
39 |         else:
40 |             new_k = k
41 |         new_ckpt[new_k] = v
42 | 
43 |     return new_ckpt
44 | 
45 | 
46 | def main():
47 |     parser = argparse.ArgumentParser(
48 |         description='Convert keys in timm pretrained vit models to '
49 |         'MMSegmentation style.')
50 |     parser.add_argument('src', help='src model path or url')
51 |     # The dst path must be a full path of the new checkpoint.
52 |     parser.add_argument('dst', help='save path')
53 |     args = parser.parse_args()
54 | 
55 |     checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
56 |     if 'state_dict' in checkpoint:
57 |         # timm checkpoint
58 |         state_dict = checkpoint['state_dict']
59 |     elif 'model' in checkpoint:
60 |         # deit checkpoint
61 |         state_dict = checkpoint['model']
62 |     else:
63 |         state_dict = checkpoint
64 |     weight = convert_vit(state_dict)
65 |     mmcv.mkdir_or_exist(osp.dirname(args.dst))
66 |     torch.save(weight, args.dst)
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     main()
71 | 


--------------------------------------------------------------------------------
/segmentation/tools/print_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | 
 4 | from mmcv import Config, DictAction
 5 | 
 6 | from mmseg.apis import init_segmentor
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Print the whole config')
11 |     parser.add_argument('config', help='config file path')
12 |     parser.add_argument(
13 |         '--graph', action='store_true', help='print the models graph')
14 |     parser.add_argument(
15 |         '--options', nargs='+', action=DictAction, help='arguments in dict')
16 |     args = parser.parse_args()
17 | 
18 |     return args
19 | 
20 | 
21 | def main():
22 |     args = parse_args()
23 | 
24 |     cfg = Config.fromfile(args.config)
25 |     if args.options is not None:
26 |         cfg.merge_from_dict(args.options)
27 |     print(f'Config:\n{cfg.pretty_text}')
28 |     # dump config
29 |     cfg.dump('example.py')
30 |     # dump models graph
31 |     if args.graph:
32 |         model = init_segmentor(args.config, device='cpu')
33 |         print(f'Model graph:\n{str(model)}')
34 |         with open('example-graph.txt', 'w') as f:
35 |             f.writelines(str(model))
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     main()
40 | 


--------------------------------------------------------------------------------
/segmentation/tools/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import subprocess
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser(
10 |         description='Process a checkpoint to be published')
11 |     parser.add_argument('in_file', help='input checkpoint filename')
12 |     parser.add_argument('out_file', help='output checkpoint filename')
13 |     args = parser.parse_args()
14 |     return args
15 | 
16 | 
17 | def process_checkpoint(in_file, out_file):
18 |     checkpoint = torch.load(in_file, map_location='cpu')
19 |     # remove optimizer for smaller file size
20 |     if 'optimizer' in checkpoint:
21 |         del checkpoint['optimizer']
22 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
23 |     # add the code here.
24 |     torch.save(checkpoint, out_file)
25 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
26 |     final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
27 |     subprocess.Popen(['mv', out_file, final_file])
28 | 
29 | 
30 | def main():
31 |     args = parse_args()
32 |     process_checkpoint(args.in_file, args.out_file)
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     main()
37 | 


--------------------------------------------------------------------------------
/segmentation/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-4}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/segmentation/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | GPUS=${GPUS:-8}
 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4}
10 | CPUS_PER_TASK=${CPUS_PER_TASK:-12}
11 | SRUN_ARGS=${SRUN_ARGS:-""}
12 | PY_ARGS=${@:4}
13 | 
14 | export NCCL_P2P_DISABLE=1
15 | export MASTER_PORT=13579
16 | 
17 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
18 | srun -p ${PARTITION} \
19 |     --job-name=${JOB_NAME} \
20 |     --gres=gpu:${GPUS_PER_NODE} \
21 |     --ntasks=${GPUS} \
22 |     --ntasks-per-node=${GPUS_PER_NODE} \
23 |     --cpus-per-task=${CPUS_PER_TASK} \
24 |     --kill-on-bad-exit=1 \
25 |     --mem 250G \
26 |     ${SRUN_ARGS} \
27 |     python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
28 | 


--------------------------------------------------------------------------------
/segmentation/tools/torchserve/mmseg2torchserve.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | from argparse import ArgumentParser, Namespace
  3 | from pathlib import Path
  4 | from tempfile import TemporaryDirectory
  5 | 
  6 | import mmcv
  7 | 
  8 | try:
  9 |     from model_archiver.model_packaging import package_model
 10 |     from model_archiver.model_packaging_utils import ModelExportUtils
 11 | except ImportError:
 12 |     package_model = None
 13 | 
 14 | 
 15 | def mmseg2torchserve(
 16 |     config_file: str,
 17 |     checkpoint_file: str,
 18 |     output_folder: str,
 19 |     model_name: str,
 20 |     model_version: str = '1.0',
 21 |     force: bool = False,
 22 | ):
 23 |     """Converts mmsegmentation model (config + checkpoint) to TorchServe
 24 |     `.mar`.
 25 | 
 26 |     Args:
 27 |         config_file:
 28 |             In MMSegmentation config format.
 29 |             The contents vary for each task repository.
 30 |         checkpoint_file:
 31 |             In MMSegmentation checkpoint format.
 32 |             The contents vary for each task repository.
 33 |         output_folder:
 34 |             Folder where `{model_name}.mar` will be created.
 35 |             The file created will be in TorchServe archive format.
 36 |         model_name:
 37 |             If not None, used for naming the `{model_name}.mar` file
 38 |             that will be created under `output_folder`.
 39 |             If None, `{Path(checkpoint_file).stem}` will be used.
 40 |         model_version:
 41 |             Model's version.
 42 |         force:
 43 |             If True, if there is an existing `{model_name}.mar`
 44 |             file under `output_folder` it will be overwritten.
 45 |     """
 46 |     mmcv.mkdir_or_exist(output_folder)
 47 | 
 48 |     config = mmcv.Config.fromfile(config_file)
 49 | 
 50 |     with TemporaryDirectory() as tmpdir:
 51 |         config.dump(f'{tmpdir}/config.py')
 52 | 
 53 |         args = Namespace(
 54 |             **{
 55 |                 'model_file': f'{tmpdir}/config.py',
 56 |                 'serialized_file': checkpoint_file,
 57 |                 'handler': f'{Path(__file__).parent}/mmseg_handler.py',
 58 |                 'model_name': model_name or Path(checkpoint_file).stem,
 59 |                 'version': model_version,
 60 |                 'export_path': output_folder,
 61 |                 'force': force,
 62 |                 'requirements_file': None,
 63 |                 'extra_files': None,
 64 |                 'runtime': 'python',
 65 |                 'archive_format': 'default'
 66 |             })
 67 |         manifest = ModelExportUtils.generate_manifest_json(args)
 68 |         package_model(args, manifest)
 69 | 
 70 | 
 71 | def parse_args():
 72 |     parser = ArgumentParser(
 73 |         description='Convert mmseg models to TorchServe `.mar` format.')
 74 |     parser.add_argument('config', type=str, help='config file path')
 75 |     parser.add_argument('checkpoint', type=str, help='checkpoint file path')
 76 |     parser.add_argument(
 77 |         '--output-folder',
 78 |         type=str,
 79 |         required=True,
 80 |         help='Folder where `{model_name}.mar` will be created.')
 81 |     parser.add_argument(
 82 |         '--model-name',
 83 |         type=str,
 84 |         default=None,
 85 |         help='If not None, used for naming the `{model_name}.mar`'
 86 |         'file that will be created under `output_folder`.'
 87 |         'If None, `{Path(checkpoint_file).stem}` will be used.')
 88 |     parser.add_argument(
 89 |         '--model-version',
 90 |         type=str,
 91 |         default='1.0',
 92 |         help='Number used for versioning.')
 93 |     parser.add_argument(
 94 |         '-f',
 95 |         '--force',
 96 |         action='store_true',
 97 |         help='overwrite the existing `{model_name}.mar`')
 98 |     args = parser.parse_args()
 99 | 
100 |     return args
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     args = parse_args()
105 | 
106 |     if package_model is None:
107 |         raise ImportError('`torch-model-archiver` is required.'
108 |                           'Try: pip install torch-model-archiver')
109 | 
110 |     mmseg2torchserve(args.config, args.checkpoint, args.output_folder,
111 |                      args.model_name, args.model_version, args.force)
112 | 


--------------------------------------------------------------------------------
/segmentation/tools/torchserve/mmseg_handler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import base64
 3 | import os
 4 | 
 5 | import cv2
 6 | import mmcv
 7 | import torch
 8 | from mmcv.cnn.utils.sync_bn import revert_sync_batchnorm
 9 | from ts.torch_handler.base_handler import BaseHandler
10 | 
11 | from mmseg.apis import inference_segmentor, init_segmentor
12 | 
13 | 
14 | class MMsegHandler(BaseHandler):
15 | 
16 |     def initialize(self, context):
17 |         properties = context.system_properties
18 |         self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
19 |         self.device = torch.device(self.map_location + ':' +
20 |                                    str(properties.get('gpu_id')) if torch.cuda.
21 |                                    is_available() else self.map_location)
22 |         self.manifest = context.manifest
23 | 
24 |         model_dir = properties.get('model_dir')
25 |         serialized_file = self.manifest['model']['serializedFile']
26 |         checkpoint = os.path.join(model_dir, serialized_file)
27 |         self.config_file = os.path.join(model_dir, 'config.py')
28 | 
29 |         self.model = init_segmentor(self.config_file, checkpoint, self.device)
30 |         self.model = revert_sync_batchnorm(self.model)
31 |         self.initialized = True
32 | 
33 |     def preprocess(self, data):
34 |         images = []
35 | 
36 |         for row in data:
37 |             image = row.get('data') or row.get('body')
38 |             if isinstance(image, str):
39 |                 image = base64.b64decode(image)
40 |             image = mmcv.imfrombytes(image)
41 |             images.append(image)
42 | 
43 |         return images
44 | 
45 |     def inference(self, data, *args, **kwargs):
46 |         results = [inference_segmentor(self.model, img) for img in data]
47 |         return results
48 | 
49 |     def postprocess(self, data):
50 |         output = []
51 | 
52 |         for image_result in data:
53 |             _, buffer = cv2.imencode('.png', image_result[0].astype('uint8'))
54 |             content = buffer.tobytes()
55 |             output.append(content)
56 |         return output
57 | 


--------------------------------------------------------------------------------
/segmentation/tools/torchserve/test_torchserve.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | from io import BytesIO
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | import mmcv
 6 | import requests
 7 | 
 8 | from mmseg.apis import inference_segmentor, init_segmentor
 9 | 
10 | 
11 | def parse_args():
12 |     parser = ArgumentParser(
13 |         description='Compare result of torchserve and pytorch,'
14 |         'and visualize them.')
15 |     parser.add_argument('img', help='Image file')
16 |     parser.add_argument('config', help='Config file')
17 |     parser.add_argument('checkpoint', help='Checkpoint file')
18 |     parser.add_argument('model_name', help='The model name in the server')
19 |     parser.add_argument(
20 |         '--inference-addr',
21 |         default='127.0.0.1:8080',
22 |         help='Address and port of the inference server')
23 |     parser.add_argument(
24 |         '--result-image',
25 |         type=str,
26 |         default=None,
27 |         help='save server output in result-image')
28 |     parser.add_argument(
29 |         '--device', default='cuda:0', help='Device used for inference')
30 | 
31 |     args = parser.parse_args()
32 |     return args
33 | 
34 | 
35 | def main(args):
36 |     url = 'http://' + args.inference_addr + '/predictions/' + args.model_name
37 |     with open(args.img, 'rb') as image:
38 |         tmp_res = requests.post(url, image)
39 |     content = tmp_res.content
40 |     if args.result_image:
41 |         with open(args.result_image, 'wb') as out_image:
42 |             out_image.write(content)
43 |         plt.imshow(mmcv.imread(args.result_image, 'grayscale'))
44 |         plt.show()
45 |     else:
46 |         plt.imshow(plt.imread(BytesIO(content)))
47 |         plt.show()
48 |     model = init_segmentor(args.config, args.checkpoint, args.device)
49 |     image = mmcv.imread(args.img)
50 |     result = inference_segmentor(model, image)
51 |     plt.imshow(result[0])
52 |     plt.show()
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     args = parse_args()
57 |     main(args)
58 | 


--------------------------------------------------------------------------------
/segmentation/train.sh:
--------------------------------------------------------------------------------
1 | ./tools/dist_train.sh configs/sem_fpn/fpn_lsnet_t_ade20k_40k.py 8 --seed 0 --deterministic
2 | 


--------------------------------------------------------------------------------
/speed.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import time
 3 | from timm import create_model
 4 | import model.build
 5 | import utils
 6 | from argparse import ArgumentParser
 7 | 
 8 | torch.autograd.set_grad_enabled(False)
 9 | 
10 | T0 = 5
11 | T1 = 10
12 | 
13 | def compute_throughput(model, device, batch_size, resolution=224):
14 |     inputs = torch.randn(batch_size, 3, resolution, resolution, device=device)
15 |     torch.cuda.empty_cache()
16 |     torch.cuda.synchronize()
17 |     start = time.time()
18 |     while time.time() - start < T0:
19 |         model(inputs)
20 |     timing = []
21 |     torch.cuda.synchronize()
22 |     while sum(timing) < T1:
23 |         start = time.time()
24 |         model(inputs)
25 |         torch.cuda.synchronize()
26 |         timing.append(time.time() - start)
27 |     timing = torch.as_tensor(timing, dtype=torch.float32)
28 |     print(batch_size / timing.mean().item(),
29 |           'images/s @ batch size', batch_size)
30 | 
31 | if __name__ == "__main__":
32 |     parser = ArgumentParser()
33 |     parser.add_argument("--model", default="lsnet_t", type=str)
34 |     parser.add_argument("--batch-size", default=2048, type=int)
35 |     parser.add_argument("--resolution", default=224, type=int)
36 |     parser.add_argument("--device", default=0, type=int)
37 |     
38 |     args = parser.parse_args()
39 |     model = args.model
40 |     batch_size = args.batch_size
41 |     resolution = args.resolution
42 |     device = args.device
43 |     torch.cuda.set_device(device)
44 |     
45 |     torch.cuda.empty_cache()
46 |     model = create_model(model, num_classes=1000)
47 |     utils.replace_batchnorm(model)
48 |     model.to(device)
49 |     model.eval()
50 |     compute_throughput(model, device,
51 |                         batch_size, resolution=resolution)
52 |     


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
1 | NCCL_P2P_DISABLE=1 python -m torch.distributed.launch --nproc_per_node=8 --master_port 12345 --use_env main.py --model lsnet_t --data-path ~/imagenet --dist-eval


--------------------------------------------------------------------------------