├── models ├── __init__.py └── losses.py ├── datasets ├── __init__.py ├── vtab.py ├── functional.py ├── food101.py ├── dtd.py └── volume_transforms.py ├── logo.png ├── video_datasets ├── load_binary.py └── video_datasets.py ├── dense_tasks └── Segmentation │ ├── mmseg │ ├── core │ │ ├── utils │ │ │ ├── __init__.py │ │ │ └── misc.py │ │ ├── __init__.py │ │ ├── seg │ │ │ ├── sampler │ │ │ │ ├── __init__.py │ │ │ │ ├── base_pixel_sampler.py │ │ │ │ └── ohem_pixel_sampler.py │ │ │ ├── __init__.py │ │ │ └── builder.py │ │ └── evaluation │ │ │ └── __init__.py │ ├── ops │ │ ├── __init__.py │ │ ├── wrappers.py │ │ └── encoding.py │ ├── utils │ │ ├── __init__.py │ │ ├── collect_env.py │ │ └── logger.py │ ├── models │ │ ├── segmentors │ │ │ ├── __init__.py │ │ │ └── cascade_encoder_decoder.py │ │ ├── necks │ │ │ ├── __init__.py │ │ │ ├── multilevel_neck.py │ │ │ └── mla_neck.py │ │ ├── __init__.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── shape_convert.py │ │ │ ├── make_divisible.py │ │ │ ├── se_layer.py │ │ │ ├── res_layer.py │ │ │ └── up_conv_block.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── accuracy.py │ │ │ └── utils.py │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ └── timm_backbone.py │ │ ├── decode_heads │ │ │ ├── __init__.py │ │ │ ├── cc_head.py │ │ │ ├── nl_head.py │ │ │ ├── gc_head.py │ │ │ ├── segformer_head.py │ │ │ ├── setr_mla_head.py │ │ │ ├── cascade_decode_head.py │ │ │ ├── sep_fcn_head.py │ │ │ ├── fpn_head.py │ │ │ ├── fcn_head.py │ │ │ ├── setr_up_head.py │ │ │ ├── lraspp_head.py │ │ │ ├── psp_head.py │ │ │ ├── aspp_head.py │ │ │ ├── sep_aspp_head.py │ │ │ ├── stdc_head.py │ │ │ ├── uper_head.py │ │ │ └── ocr_head.py │ │ └── builder.py │ ├── datasets │ │ ├── pipelines │ │ │ ├── formating.py │ │ │ ├── __init__.py │ │ │ └── compose.py │ │ ├── dark_zurich.py │ │ ├── night_driving.py │ │ ├── hrf.py │ │ ├── stare.py │ │ ├── drive.py │ │ ├── chase_db1.py │ │ ├── __init__.py │ │ ├── voc.py │ │ └── loveda.py │ ├── apis │ │ └── __init__.py │ ├── version.py │ └── __init__.py │ ├── mmcv_custom │ ├── apex_runner │ │ ├── __init__.py │ │ ├── optimizer.py │ │ ├── checkpoint.py │ │ └── apex_iter_based_runner.py │ ├── __init__.py │ └── layer_decay_optimizer_constructor.py │ ├── tools │ ├── dist_test.sh │ ├── seg_train.sh │ └── seg_train_coco-stuff164k.sh │ ├── configs │ ├── _base_ │ │ ├── default_runtime.py │ │ ├── schedules │ │ │ ├── schedule_80k.py │ │ │ └── schedule_160k.py │ │ ├── datasets │ │ │ └── ade20k.py │ │ └── models │ │ │ └── upernet_beit.py │ └── beit │ │ └── upernet │ │ ├── our_vit_coco-stuff164k.py │ │ └── our_vit.py │ └── README.md ├── measure_speed.sh ├── train_vtab.sh ├── util ├── lr_sched.py ├── metrics.py ├── logger.py ├── crop.py ├── lars.py ├── datasets.py └── lr_decay.py ├── train_video.sh ├── train_IN21K.sh ├── configs.py ├── .gitignore ├── requirements.txt └── README.md /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NUS-HPC-AI-Lab/Dynamic-Tuning/HEAD/logo.png -------------------------------------------------------------------------------- /video_datasets/load_binary.py: -------------------------------------------------------------------------------- 1 | def load_binary(path): 2 | with open(path, 'rb') as f: 3 | return f.read() 4 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .misc import add_prefix 3 | 4 | __all__ = ['add_prefix'] 5 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .encoding import Encoding 3 | from .wrappers import Upsample, resize 4 | 5 | __all__ = ['Upsample', 'resize', 'Encoding'] 6 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .evaluation import * # noqa: F401, F403 3 | from .seg import * # noqa: F401, F403 4 | from .utils import * # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .collect_env import collect_env 3 | from .logger import get_root_logger 4 | 5 | __all__ = ['get_root_logger', 'collect_env'] 6 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/core/seg/sampler/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base_pixel_sampler import BasePixelSampler 3 | from .ohem_pixel_sampler import OHEMPixelSampler 4 | 5 | __all__ = ['BasePixelSampler', 'OHEMPixelSampler'] 6 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/core/seg/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import build_pixel_sampler 3 | from .sampler import BasePixelSampler, OHEMPixelSampler 4 | 5 | __all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] 6 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmcv_custom/apex_runner/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | from .checkpoint import save_checkpoint 3 | from .apex_iter_based_runner import IterBasedRunnerAmp 4 | 5 | 6 | __all__ = [ 7 | 'save_checkpoint', 'IterBasedRunnerAmp', 8 | ] 9 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/segmentors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import BaseSegmentor 3 | from .cascade_encoder_decoder import CascadeEncoderDecoder 4 | from .encoder_decoder import EncoderDecoder 5 | 6 | __all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder'] 7 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .fpn import FPN 3 | from .ic_neck import ICNeck 4 | from .jpu import JPU 5 | from .mla_neck import MLANeck 6 | from .multilevel_neck import MultiLevelNeck 7 | 8 | __all__ = ['FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck', 'JPU'] 9 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 10 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/core/seg/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import Registry, build_from_cfg 3 | 4 | PIXEL_SAMPLERS = Registry('pixel sampler') 5 | 6 | 7 | def build_pixel_sampler(cfg, **default_args): 8 | """Build pixel sampler for segmentation map.""" 9 | return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) 10 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # flake8: noqa 3 | import warnings 4 | 5 | from .formatting import * 6 | 7 | warnings.warn('DeprecationWarning: mmseg.datasets.pipelines.formating will be ' 8 | 'deprecated in 2021, please replace it with ' 9 | 'mmseg.datasets.pipelines.formatting.') 10 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/tools/seg_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG="configs/beit/upernet/our_vit.py" 4 | GPUS=${GPUS:-8} 5 | PORT=$((12000 + $RANDOM % 20000)) 6 | 7 | CLUSTER=True \ 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/seg_train.py $CONFIG --launcher pytorch --finetune "VIT_BASE_IN21K" 11 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # yapf:disable 2 | log_config = dict( 3 | interval=50, 4 | hooks=[ 5 | dict(type='TextLoggerHook', by_epoch=False), 6 | # dict(type='TensorboardLoggerHook') 7 | ]) 8 | # yapf:enable 9 | dist_params = dict(backend='nccl') 10 | log_level = 'INFO' 11 | load_from = None 12 | resume_from = None 13 | workflow = [('train', 1)] 14 | cudnn_benchmark = True 15 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/core/seg/sampler/base_pixel_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | 5 | class BasePixelSampler(metaclass=ABCMeta): 6 | """Base class of pixel sampler.""" 7 | 8 | def __init__(self, **kwargs): 9 | pass 10 | 11 | @abstractmethod 12 | def sample(self, seg_logit, seg_label): 13 | """Placeholder for sample function.""" 14 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=80000) 8 | checkpoint_config = dict(by_epoch=False, interval=8000) 9 | evaluation = dict(interval=8000, metric='mIoU') -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/dark_zurich.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .cityscapes import CityscapesDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class DarkZurichDataset(CityscapesDataset): 7 | """DarkZurichDataset dataset.""" 8 | 9 | def __init__(self, **kwargs): 10 | super().__init__( 11 | img_suffix='_rgb_anon.png', 12 | seg_map_suffix='_gt_labelTrainIds.png', 13 | **kwargs) 14 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .checkpoint import load_checkpoint 4 | from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor 5 | from .resize_transform import SETR_Resize 6 | from .apex_runner.optimizer import DistOptimizerHook 7 | from .train_api import train_segmentor 8 | 9 | __all__ = ['load_checkpoint', 'LayerDecayOptimizerConstructor', 'SETR_Resize', 'DistOptimizerHook', 'train_segmentor'] 10 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/night_driving.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .cityscapes import CityscapesDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class NightDrivingDataset(CityscapesDataset): 7 | """NightDrivingDataset dataset.""" 8 | 9 | def __init__(self, **kwargs): 10 | super().__init__( 11 | img_suffix='_leftImg8bit.png', 12 | seg_map_suffix='_gtCoarse_labelTrainIds.png', 13 | **kwargs) 14 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=160000) 8 | checkpoint_config = dict(by_epoch=False, interval=16000) 9 | evaluation = dict(interval=16000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/tools/seg_train_coco-stuff164k.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG="configs/beit/upernet/our_vit_coco-stuff164k.py" 4 | GPUS=${GPUS:-8} 5 | PORT=$((12000 + $RANDOM % 20000)) 6 | 7 | CLUSTER=True \ 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/seg_train.py $CONFIG --launcher pytorch --finetune "VIT_BASE_IN21K" --dataset_name "coco-stuff164k" 11 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def add_prefix(inputs, prefix): 3 | """Add prefix for dict. 4 | 5 | Args: 6 | inputs (dict): The input dict with str keys. 7 | prefix (str): The prefix to add. 8 | 9 | Returns: 10 | 11 | dict: The dict with keys updated with ``prefix``. 12 | """ 13 | 14 | outputs = dict() 15 | for name, value in inputs.items(): 16 | outputs[f'{prefix}.{name}'] = value 17 | 18 | return outputs 19 | -------------------------------------------------------------------------------- /measure_speed.sh: -------------------------------------------------------------------------------- 1 | DATASET=cifar100_full 2 | CLUSTER=True \ 3 | 4 | python speed.py \ 5 | --batch_size 128 \ 6 | --cls_token \ 7 | --finetune VIT_BASE_IN21K \ 8 | --dist_eval \ 9 | --output_dir "./output/IN21K/0.5/${DATASET}" \ 10 | --drop_path 0.0 \ 11 | --blr 1e-3 \ 12 | --weight_decay 0.01 \ 13 | --dataset "${DATASET}" \ 14 | --ffn_adapt \ 15 | --auto_remove \ 16 | --token_target_ratio 0.5 \ 17 | --eval \ 18 | --eval_ckpt "your_ckpt" 19 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .class_names import get_classes, get_palette 3 | from .eval_hooks import DistEvalHook, EvalHook 4 | from .metrics import (eval_metrics, intersect_and_union, mean_dice, 5 | mean_fscore, mean_iou, pre_eval_to_metrics) 6 | 7 | __all__ = [ 8 | 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', 9 | 'eval_metrics', 'get_classes', 'get_palette', 'pre_eval_to_metrics', 10 | 'intersect_and_union' 11 | ] 12 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .inference import inference_segmentor, init_segmentor, show_result_pyplot 3 | from .test import multi_gpu_test, single_gpu_test 4 | from .train import (get_root_logger, init_random_seed, set_random_seed, 5 | train_segmentor) 6 | 7 | __all__ = [ 8 | 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', 9 | 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', 10 | 'show_result_pyplot', 'init_random_seed' 11 | ] 12 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import collect_env as collect_base_env 3 | from mmcv.utils import get_git_hash 4 | 5 | import mmseg 6 | 7 | 8 | def collect_env(): 9 | """Collect the information of the running environments.""" 10 | env_info = collect_base_env() 11 | env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' 12 | 13 | return env_info 14 | 15 | 16 | if __name__ == '__main__': 17 | for name, val in collect_env().items(): 18 | print('{}: {}'.format(name, val)) 19 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '0.20.2' 4 | 5 | 6 | def parse_version_info(version_str): 7 | version_info = [] 8 | for x in version_str.split('.'): 9 | if x.isdigit(): 10 | version_info.append(int(x)) 11 | elif x.find('rc') != -1: 12 | patch_version = x.split('rc') 13 | version_info.append(int(patch_version[0])) 14 | version_info.append(f'rc{patch_version[1]}') 15 | return tuple(version_info) 16 | 17 | 18 | version_info = parse_version_info(__version__) 19 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .backbones import * # noqa: F401,F403 3 | from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, 4 | build_head, build_loss, build_segmentor) 5 | from .decode_heads import * # noqa: F401,F403 6 | from .losses import * # noqa: F401,F403 7 | from .necks import * # noqa: F401,F403 8 | from .segmentors import * # noqa: F401,F403 9 | 10 | __all__ = [ 11 | 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone', 12 | 'build_head', 'build_loss', 'build_segmentor' 13 | ] 14 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .embed import PatchEmbed 2 | from .inverted_residual import InvertedResidual, InvertedResidualV3 3 | from .make_divisible import make_divisible 4 | from .res_layer import ResLayer 5 | from .se_layer import SELayer 6 | from .self_attention_block import SelfAttentionBlock 7 | from .shape_convert import nchw_to_nlc, nlc_to_nchw 8 | from .up_conv_block import UpConvBlock 9 | 10 | __all__ = [ 11 | 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', 12 | 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed', 13 | 'nchw_to_nlc', 'nlc_to_nchw' 14 | ] 15 | -------------------------------------------------------------------------------- /train_vtab.sh: -------------------------------------------------------------------------------- 1 | GPU_COUNT=8 2 | DATASETS=(cifar caltech101 dtd oxford_flowers102 oxford_iiit_pet svhn sun397 patch_camelyon eurosat resisc45 diabetic_retinopathy clevr_count clevr_dist dmlab kitti dsprites_loc dsprites_ori smallnorb_azi smallnorb_ele) 3 | i=0 4 | 5 | for DATASET in "${DATASETS[@]}" 6 | do 7 | GPU_ID=$((i % GPU_COUNT)) 8 | CLUSTER=True CUDA_VISIBLE_DEVICES=$GPU_ID python main_vtab.py --batch_size 64 --cls_token --finetune VIT_BASE_IN21K --dist_eval --output_dir "./output_vtab/${DATASET}" --drop_path 0.0 --dataset $DATASET --ffn_num 16 --ffn_adapt --auto_remove --eval_freq 1 --token_target_ratio 0.5 & 9 | i=$((i + 1)) 10 | done 11 | wait 12 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .accuracy import Accuracy, accuracy 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 4 | cross_entropy, mask_cross_entropy) 5 | from .dice_loss import DiceLoss 6 | from .focal_loss import FocalLoss 7 | from .lovasz_loss import LovaszLoss 8 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 9 | 10 | __all__ = [ 11 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 12 | 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', 13 | 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss', 14 | 'FocalLoss' 15 | ] 16 | -------------------------------------------------------------------------------- /util/lr_sched.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | def adjust_learning_rate(optimizer, epoch, args): 10 | """Decay the learning rate with half-cycle cosine after warmup""" 11 | if epoch < args.warmup_epochs: 12 | lr = args.lr * epoch / args.warmup_epochs 13 | else: 14 | lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \ 15 | (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs))) 16 | for param_group in optimizer.param_groups: 17 | if "lr_scale" in param_group: 18 | param_group["lr"] = lr * param_group["lr_scale"] 19 | else: 20 | param_group["lr"] = lr 21 | return lr 22 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/hrf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class HRFDataset(CustomDataset): 10 | """HRF dataset. 11 | 12 | In segmentation map annotation for HRF, 0 stands for background, which is 13 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 14 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 15 | '.png'. 16 | """ 17 | 18 | CLASSES = ('background', 'vessel') 19 | 20 | PALETTE = [[120, 120, 120], [6, 230, 230]] 21 | 22 | def __init__(self, **kwargs): 23 | super(HRFDataset, self).__init__( 24 | img_suffix='.png', 25 | seg_map_suffix='.png', 26 | reduce_zero_label=False, 27 | **kwargs) 28 | assert osp.exists(self.img_dir) 29 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/stare.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class STAREDataset(CustomDataset): 10 | """STARE dataset. 11 | 12 | In segmentation map annotation for STARE, 0 stands for background, which is 13 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 14 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 15 | '.ah.png'. 16 | """ 17 | 18 | CLASSES = ('background', 'vessel') 19 | 20 | PALETTE = [[120, 120, 120], [6, 230, 230]] 21 | 22 | def __init__(self, **kwargs): 23 | super(STAREDataset, self).__init__( 24 | img_suffix='.png', 25 | seg_map_suffix='.ah.png', 26 | reduce_zero_label=False, 27 | **kwargs) 28 | assert osp.exists(self.img_dir) 29 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/drive.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class DRIVEDataset(CustomDataset): 10 | """DRIVE dataset. 11 | 12 | In segmentation map annotation for DRIVE, 0 stands for background, which is 13 | included in 2 categories. ``reduce_zero_label`` is fixed to False. The 14 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 15 | '_manual1.png'. 16 | """ 17 | 18 | CLASSES = ('background', 'vessel') 19 | 20 | PALETTE = [[120, 120, 120], [6, 230, 230]] 21 | 22 | def __init__(self, **kwargs): 23 | super(DRIVEDataset, self).__init__( 24 | img_suffix='.png', 25 | seg_map_suffix='_manual1.png', 26 | reduce_zero_label=False, 27 | **kwargs) 28 | assert osp.exists(self.img_dir) 29 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/chase_db1.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class ChaseDB1Dataset(CustomDataset): 10 | """Chase_db1 dataset. 11 | 12 | In segmentation map annotation for Chase_db1, 0 stands for background, 13 | which is included in 2 categories. ``reduce_zero_label`` is fixed to False. 14 | The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to 15 | '_1stHO.png'. 16 | """ 17 | 18 | CLASSES = ('background', 'vessel') 19 | 20 | PALETTE = [[120, 120, 120], [6, 230, 230]] 21 | 22 | def __init__(self, **kwargs): 23 | super(ChaseDB1Dataset, self).__init__( 24 | img_suffix='.png', 25 | seg_map_suffix='_1stHO.png', 26 | reduce_zero_label=False, 27 | **kwargs) 28 | assert osp.exists(self.img_dir) 29 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .compose import Compose 3 | from .formatting import (Collect, ImageToTensor, ToDataContainer, ToTensor, 4 | Transpose, to_tensor) 5 | from .loading import LoadAnnotations, LoadImageFromFile 6 | from .test_time_aug import MultiScaleFlipAug 7 | from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, 8 | PhotoMetricDistortion, RandomCrop, RandomCutOut, 9 | RandomFlip, RandomRotate, Rerange, Resize, RGB2Gray, 10 | SegRescale) 11 | 12 | __all__ = [ 13 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 14 | 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', 15 | 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 16 | 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', 17 | 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray', 'RandomCutOut' 18 | ] 19 | -------------------------------------------------------------------------------- /util/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | def accuracy(output, target, topk=(1,)): 5 | """Computes the accuracy over the k top predictions for the specified values of k""" 6 | maxk = min(max(topk), output.size()[1]) 7 | batch_size = target.size(0) 8 | _, pred = output.topk(maxk, 1, True, True) 9 | pred = pred.t() 10 | correct = pred.eq(target.reshape(1, -1).expand_as(pred)) 11 | return [correct[:min(k, maxk)].reshape(-1).float().sum(0) * 100. / batch_size for k in topk] 12 | 13 | 14 | def mean_per_class_accuracy(pred, target, num_classes): 15 | pred_score, pred_label = torch.topk(pred, k=1) 16 | pred_label = pred_label.flatten() 17 | 18 | pred_label = F.one_hot(pred_label, num_classes) 19 | target_label = F.one_hot(target, num_classes) 20 | class_correct = (pred_label & target_label) 21 | 22 | tp_sum = class_correct.sum(0) 23 | gt_sum = target_label.sum(0) 24 | recall = tp_sum / torch.clamp(gt_sum, min=1).float() * 100 25 | recall = recall.mean(0) 26 | return recall -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import logging 3 | 4 | from mmcv.utils import get_logger 5 | 6 | 7 | def get_root_logger(log_file=None, log_level=logging.INFO): 8 | """Get the root logger. 9 | 10 | The logger will be initialized if it has not been initialized. By default a 11 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 12 | also be added. The name of the root logger is the top-level package name, 13 | e.g., "mmseg". 14 | 15 | Args: 16 | log_file (str | None): The log filename. If specified, a FileHandler 17 | will be added to the root logger. 18 | log_level (int): The root logger level. Note that only the process of 19 | rank 0 is affected, while other processes will set the level to 20 | "Error" and be silent most of the time. 21 | 22 | Returns: 23 | logging.Logger: The root logger. 24 | """ 25 | 26 | logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) 27 | 28 | return logger 29 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/utils/shape_convert.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def nlc_to_nchw(x, hw_shape): 3 | """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor. 4 | 5 | Args: 6 | x (Tensor): The input tensor of shape [N, L, C] before conversion. 7 | hw_shape (Sequence[int]): The height and width of output feature map. 8 | 9 | Returns: 10 | Tensor: The output tensor of shape [N, C, H, W] after conversion. 11 | """ 12 | H, W = hw_shape 13 | assert len(x.shape) == 3 14 | B, L, C = x.shape 15 | assert L == H * W, 'The seq_len doesn\'t match H, W' 16 | return x.transpose(1, 2).reshape(B, C, H, W) 17 | 18 | 19 | def nchw_to_nlc(x): 20 | """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor. 21 | 22 | Args: 23 | x (Tensor): The input tensor of shape [N, C, H, W] before conversion. 24 | 25 | Returns: 26 | Tensor: The output tensor of shape [N, L, C] after conversion. 27 | """ 28 | assert len(x.shape) == 4 29 | return x.flatten(2).transpose(1, 2).contiguous() 30 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .bisenetv1 import BiSeNetV1 3 | from .bisenetv2 import BiSeNetV2 4 | from .cgnet import CGNet 5 | from .erfnet import ERFNet 6 | from .fast_scnn import FastSCNN 7 | from .hrnet import HRNet 8 | from .icnet import ICNet 9 | from .mit import MixVisionTransformer 10 | from .mobilenet_v2 import MobileNetV2 11 | from .mobilenet_v3 import MobileNetV3 12 | from .resnest import ResNeSt 13 | from .resnet import ResNet, ResNetV1c, ResNetV1d 14 | from .resnext import ResNeXt 15 | from .stdc import STDCContextPathNet, STDCNet 16 | from .swin import SwinTransformer 17 | from .timm_backbone import TIMMBackbone 18 | from .twins import PCPVT, SVT 19 | from .unet import UNet 20 | from .vit import VisionTransformer 21 | 22 | __all__ = [ 23 | 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN', 24 | 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', 25 | 'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer', 26 | 'BiSeNetV1', 'BiSeNetV2', 'ICNet', 'TIMMBackbone', 'ERFNet', 'PCPVT', 27 | 'SVT', 'STDCNet', 'STDCContextPathNet' 28 | ] 29 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .ade import ADE20KDataset 3 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 4 | from .chase_db1 import ChaseDB1Dataset 5 | from .cityscapes import CityscapesDataset 6 | from .coco_stuff import COCOStuffDataset 7 | from .custom import CustomDataset 8 | from .dark_zurich import DarkZurichDataset 9 | from .dataset_wrappers import ConcatDataset, RepeatDataset 10 | from .drive import DRIVEDataset 11 | from .hrf import HRFDataset 12 | from .loveda import LoveDADataset 13 | from .night_driving import NightDrivingDataset 14 | from .pascal_context import PascalContextDataset, PascalContextDataset59 15 | from .stare import STAREDataset 16 | from .voc import PascalVOCDataset 17 | 18 | __all__ = [ 19 | 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 20 | 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', 21 | 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', 22 | 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', 23 | 'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset', 24 | 'COCOStuffDataset', 'LoveDADataset' 25 | ] 26 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/voc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .builder import DATASETS 5 | from .custom import CustomDataset 6 | 7 | 8 | @DATASETS.register_module() 9 | class PascalVOCDataset(CustomDataset): 10 | """Pascal VOC dataset. 11 | 12 | Args: 13 | split (str): Split txt file for Pascal VOC. 14 | """ 15 | 16 | CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 17 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 18 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 19 | 'train', 'tvmonitor') 20 | 21 | PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], 22 | [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], 23 | [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], 24 | [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], 25 | [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] 26 | 27 | def __init__(self, split, **kwargs): 28 | super(PascalVOCDataset, self).__init__( 29 | img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) 30 | assert osp.exists(self.img_dir) and self.split is not None 31 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmcv_custom/apex_runner/optimizer.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import OptimizerHook, HOOKS 2 | try: 3 | import apex 4 | except: 5 | print('apex is not installed') 6 | 7 | 8 | @HOOKS.register_module() 9 | class DistOptimizerHook(OptimizerHook): 10 | """Optimizer hook for distributed training.""" 11 | 12 | def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False): 13 | self.grad_clip = grad_clip 14 | self.coalesce = coalesce 15 | self.bucket_size_mb = bucket_size_mb 16 | self.update_interval = update_interval 17 | self.use_fp16 = use_fp16 18 | 19 | def before_run(self, runner): 20 | runner.optimizer.zero_grad() 21 | 22 | def after_train_iter(self, runner): 23 | runner.outputs['loss'] /= self.update_interval 24 | if self.use_fp16: 25 | with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss: 26 | scaled_loss.backward() 27 | else: 28 | runner.outputs['loss'].backward() 29 | if self.every_n_iters(runner, self.update_interval): 30 | if self.grad_clip is not None: 31 | self.clip_grads(runner.model.parameters()) 32 | runner.optimizer.step() 33 | runner.optimizer.zero_grad() 34 | -------------------------------------------------------------------------------- /util/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | import functools 5 | from termcolor import colored 6 | 7 | 8 | @functools.lru_cache() 9 | def create_logger(output_dir, dist_rank=0, name=''): 10 | # create logger 11 | logger = logging.getLogger(name) 12 | logger.setLevel(logging.DEBUG) 13 | logger.propagate = False 14 | 15 | # create formatter 16 | fmt = '[%(asctime)s %(name)s] (%(filename)s %(lineno)d): %(levelname)s %(message)s' 17 | color_fmt = colored('[%(asctime)s %(name)s]', 'green') + \ 18 | colored('(%(filename)s %(lineno)d)', 'yellow') + ': %(levelname)s %(message)s' 19 | 20 | # create console handlers for master process 21 | if dist_rank == 0: 22 | console_handler = logging.StreamHandler(sys.stdout) 23 | console_handler.setLevel(logging.DEBUG) 24 | console_handler.setFormatter( 25 | logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S')) 26 | logger.addHandler(console_handler) 27 | 28 | # create file handlers 29 | file_handler = logging.FileHandler(os.path.join(output_dir, f'log_rank{dist_rank}.txt'), mode='a') 30 | file_handler.setLevel(logging.DEBUG) 31 | file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S')) 32 | logger.addHandler(file_handler) 33 | 34 | return logger 35 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/utils/make_divisible.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | def make_divisible(value, divisor, min_value=None, min_ratio=0.9): 3 | """Make divisible function. 4 | 5 | This function rounds the channel number to the nearest value that can be 6 | divisible by the divisor. It is taken from the original tf repo. It ensures 7 | that all layers have a channel number that is divisible by divisor. It can 8 | be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa 9 | 10 | Args: 11 | value (int): The original channel number. 12 | divisor (int): The divisor to fully divide the channel number. 13 | min_value (int): The minimum value of the output channel. 14 | Default: None, means that the minimum value equal to the divisor. 15 | min_ratio (float): The minimum ratio of the rounded channel number to 16 | the original channel number. Default: 0.9. 17 | 18 | Returns: 19 | int: The modified output channel number. 20 | """ 21 | 22 | if min_value is None: 23 | min_value = divisor 24 | new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) 25 | # Make sure that round down does not go down by more than (1-min_ratio). 26 | if new_value < min_ratio * value: 27 | new_value += divisor 28 | return new_value 29 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .ann_head import ANNHead 3 | from .apc_head import APCHead 4 | from .aspp_head import ASPPHead 5 | from .cc_head import CCHead 6 | from .da_head import DAHead 7 | from .dm_head import DMHead 8 | from .dnl_head import DNLHead 9 | from .dpt_head import DPTHead 10 | from .ema_head import EMAHead 11 | from .enc_head import EncHead 12 | from .fcn_head import FCNHead 13 | from .fpn_head import FPNHead 14 | from .gc_head import GCHead 15 | from .isa_head import ISAHead 16 | from .lraspp_head import LRASPPHead 17 | from .nl_head import NLHead 18 | from .ocr_head import OCRHead 19 | from .point_head import PointHead 20 | from .psa_head import PSAHead 21 | from .psp_head import PSPHead 22 | from .segformer_head import SegformerHead 23 | from .sep_aspp_head import DepthwiseSeparableASPPHead 24 | from .sep_fcn_head import DepthwiseSeparableFCNHead 25 | from .setr_mla_head import SETRMLAHead 26 | from .setr_up_head import SETRUPHead 27 | from .stdc_head import STDCHead 28 | from .uper_head import UPerHead 29 | 30 | __all__ = [ 31 | 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', 32 | 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', 33 | 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', 34 | 'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead', 35 | 'SETRMLAHead', 'DPTHead', 'SETRMLAHead', 'SegformerHead', 'ISAHead', 36 | 'STDCHead' 37 | ] 38 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/cc_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | 4 | from ..builder import HEADS 5 | from .fcn_head import FCNHead 6 | 7 | try: 8 | from mmcv.ops import CrissCrossAttention 9 | except ModuleNotFoundError: 10 | CrissCrossAttention = None 11 | 12 | 13 | @HEADS.register_module() 14 | class CCHead(FCNHead): 15 | """CCNet: Criss-Cross Attention for Semantic Segmentation. 16 | 17 | This head is the implementation of `CCNet 18 | `_. 19 | 20 | Args: 21 | recurrence (int): Number of recurrence of Criss Cross Attention 22 | module. Default: 2. 23 | """ 24 | 25 | def __init__(self, recurrence=2, **kwargs): 26 | if CrissCrossAttention is None: 27 | raise RuntimeError('Please install mmcv-full for ' 28 | 'CrissCrossAttention ops') 29 | super(CCHead, self).__init__(num_convs=2, **kwargs) 30 | self.recurrence = recurrence 31 | self.cca = CrissCrossAttention(self.channels) 32 | 33 | def forward(self, inputs): 34 | """Forward function.""" 35 | x = self._transform_inputs(inputs) 36 | output = self.convs[0](x) 37 | for _ in range(self.recurrence): 38 | output = self.cca(output) 39 | output = self.convs[1](output) 40 | if self.concat_input: 41 | output = self.conv_cat(torch.cat([x, output], dim=1)) 42 | output = self.cls_seg(output) 43 | return output 44 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | from mmcv.cnn import MODELS as MMCV_MODELS 5 | from mmcv.cnn.bricks.registry import ATTENTION as MMCV_ATTENTION 6 | from mmcv.utils import Registry 7 | 8 | MODELS = Registry('models', parent=MMCV_MODELS) 9 | ATTENTION = Registry('attention', parent=MMCV_ATTENTION) 10 | 11 | BACKBONES = MODELS 12 | NECKS = MODELS 13 | HEADS = MODELS 14 | LOSSES = MODELS 15 | SEGMENTORS = MODELS 16 | 17 | 18 | def build_backbone(cfg): 19 | """Build backbone.""" 20 | return BACKBONES.build(cfg) 21 | 22 | 23 | def build_neck(cfg): 24 | """Build neck.""" 25 | return NECKS.build(cfg) 26 | 27 | 28 | def build_head(cfg): 29 | """Build head.""" 30 | return HEADS.build(cfg) 31 | 32 | 33 | def build_loss(cfg): 34 | """Build loss.""" 35 | return LOSSES.build(cfg) 36 | 37 | 38 | def build_segmentor(cfg, train_cfg=None, test_cfg=None): 39 | """Build segmentor.""" 40 | if train_cfg is not None or test_cfg is not None: 41 | warnings.warn( 42 | 'train_cfg and test_cfg is deprecated, ' 43 | 'please specify them in model', UserWarning) 44 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 45 | 'train_cfg specified in both outer field and model field ' 46 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 47 | 'test_cfg specified in both outer field and model field ' 48 | return SEGMENTORS.build( 49 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 50 | -------------------------------------------------------------------------------- /util/crop.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | import PIL.Image 10 | import torch 11 | 12 | from torchvision import transforms 13 | from torchvision.transforms import functional as F 14 | 15 | 16 | class RandomResizedCrop(transforms.RandomResizedCrop): 17 | """ 18 | RandomResizedCrop for matching TF/TPU implementation: no for-loop is used. 19 | This may lead to results different with torchvision's version. 20 | Following BYOL's TF code: 21 | https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206 22 | """ 23 | @staticmethod 24 | def get_params(img, scale, ratio): 25 | assert isinstance(img, PIL.Image.Image) 26 | # width, height = F._get_image_size(img) 27 | width, height = img.width, img.height 28 | area = height * width 29 | 30 | target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item() 31 | log_ratio = torch.log(torch.tensor(ratio)) 32 | aspect_ratio = torch.exp( 33 | torch.empty(1).uniform_(log_ratio[0], log_ratio[1]) 34 | ).item() 35 | 36 | w = int(round(math.sqrt(target_area * aspect_ratio))) 37 | h = int(round(math.sqrt(target_area / aspect_ratio))) 38 | 39 | w = min(w, width) 40 | h = min(h, height) 41 | 42 | i = torch.randint(0, height - h + 1, size=(1,)).item() 43 | j = torch.randint(0, width - w + 1, size=(1,)).item() 44 | 45 | return i, j, h, w -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import collections 3 | 4 | from mmcv.utils import build_from_cfg 5 | 6 | from ..builder import PIPELINES 7 | 8 | 9 | @PIPELINES.register_module() 10 | class Compose(object): 11 | """Compose multiple transforms sequentially. 12 | 13 | Args: 14 | transforms (Sequence[dict | callable]): Sequence of transform object or 15 | config dict to be composed. 16 | """ 17 | 18 | def __init__(self, transforms): 19 | assert isinstance(transforms, collections.abc.Sequence) 20 | self.transforms = [] 21 | for transform in transforms: 22 | if isinstance(transform, dict): 23 | transform = build_from_cfg(transform, PIPELINES) 24 | self.transforms.append(transform) 25 | elif callable(transform): 26 | self.transforms.append(transform) 27 | else: 28 | raise TypeError('transform must be callable or a dict') 29 | 30 | def __call__(self, data): 31 | """Call function to apply transforms sequentially. 32 | 33 | Args: 34 | data (dict): A result dict contains the data to transform. 35 | 36 | Returns: 37 | dict: Transformed data. 38 | """ 39 | 40 | for t in self.transforms: 41 | data = t(data) 42 | if data is None: 43 | return None 44 | return data 45 | 46 | def __repr__(self): 47 | format_string = self.__class__.__name__ + '(' 48 | for t in self.transforms: 49 | format_string += '\n' 50 | format_string += f' {t}' 51 | format_string += '\n)' 52 | return format_string 53 | -------------------------------------------------------------------------------- /train_video.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ADAPTER_CHANNEL=$1 4 | GPUS=${GPUS:-8} 5 | PORT=$((12000 + $RANDOM % 20000)) 6 | MASTER_ADDR=${MASTER_ADDR:-127.0.0.1} 7 | 8 | 9 | 10 | DATASET=K400 11 | CLUSTER=True \ 12 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ 13 | python -m torch.distributed.launch \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | --use_env \ 18 | main_video.py \ 19 | --batch_size 16 \ 20 | --cls_token \ 21 | --finetune VIT_BASE_IN21K \ 22 | --dist_eval \ 23 | --output_dir "./video_output/IN21K/0.5/${DATASET}" \ 24 | --drop_path 0.0 \ 25 | --blr 1e-3 \ 26 | --epochs 12 \ 27 | --warmup_epochs 2 \ 28 | --weight_decay 0.01 \ 29 | --dataset "${DATASET}" \ 30 | --ffn_adapt \ 31 | --auto_remove \ 32 | --token_target_ratio 0.5 33 | 34 | 35 | DATASET=SSV2 36 | CLUSTER=True \ 37 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ 38 | python -m torch.distributed.launch \ 39 | --master_addr=$MASTER_ADDR \ 40 | --nproc_per_node=$GPUS \ 41 | --master_port=$PORT \ 42 | --use_env \ 43 | main_video.py \ 44 | --batch_size 16 \ 45 | --cls_token \ 46 | --finetune VIT_BASE_IN21K \ 47 | --dist_eval \ 48 | --output_dir "./video_output/IN21K/0.5/${DATASET}" \ 49 | --drop_path 0.0 \ 50 | --blr 1e-3 \ 51 | --epochs 50 \ 52 | --warmup_epochs 2 \ 53 | --eval_freq 5 \ 54 | --weight_decay 0.01 \ 55 | --dataset "${DATASET}" \ 56 | --ffn_adapt \ 57 | --auto_remove \ 58 | --token_target_ratio 0.5 -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/nl_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.cnn import NonLocal2d 4 | 5 | from ..builder import HEADS 6 | from .fcn_head import FCNHead 7 | 8 | 9 | @HEADS.register_module() 10 | class NLHead(FCNHead): 11 | """Non-local Neural Networks. 12 | 13 | This head is the implementation of `NLNet 14 | `_. 15 | 16 | Args: 17 | reduction (int): Reduction factor of projection transform. Default: 2. 18 | use_scale (bool): Whether to scale pairwise_weight by 19 | sqrt(1/inter_channels). Default: True. 20 | mode (str): The nonlocal mode. Options are 'embedded_gaussian', 21 | 'dot_product'. Default: 'embedded_gaussian.'. 22 | """ 23 | 24 | def __init__(self, 25 | reduction=2, 26 | use_scale=True, 27 | mode='embedded_gaussian', 28 | **kwargs): 29 | super(NLHead, self).__init__(num_convs=2, **kwargs) 30 | self.reduction = reduction 31 | self.use_scale = use_scale 32 | self.mode = mode 33 | self.nl_block = NonLocal2d( 34 | in_channels=self.channels, 35 | reduction=self.reduction, 36 | use_scale=self.use_scale, 37 | conv_cfg=self.conv_cfg, 38 | norm_cfg=self.norm_cfg, 39 | mode=self.mode) 40 | 41 | def forward(self, inputs): 42 | """Forward function.""" 43 | x = self._transform_inputs(inputs) 44 | output = self.convs[0](x) 45 | output = self.nl_block(output) 46 | output = self.convs[1](output) 47 | if self.concat_input: 48 | output = self.conv_cat(torch.cat([x, output], dim=1)) 49 | output = self.cls_seg(output) 50 | return output 51 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/gc_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from mmcv.cnn import ContextBlock 4 | 5 | from ..builder import HEADS 6 | from .fcn_head import FCNHead 7 | 8 | 9 | @HEADS.register_module() 10 | class GCHead(FCNHead): 11 | """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. 12 | 13 | This head is the implementation of `GCNet 14 | `_. 15 | 16 | Args: 17 | ratio (float): Multiplier of channels ratio. Default: 1/4. 18 | pooling_type (str): The pooling type of context aggregation. 19 | Options are 'att', 'avg'. Default: 'avg'. 20 | fusion_types (tuple[str]): The fusion type for feature fusion. 21 | Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) 22 | """ 23 | 24 | def __init__(self, 25 | ratio=1 / 4., 26 | pooling_type='att', 27 | fusion_types=('channel_add', ), 28 | **kwargs): 29 | super(GCHead, self).__init__(num_convs=2, **kwargs) 30 | self.ratio = ratio 31 | self.pooling_type = pooling_type 32 | self.fusion_types = fusion_types 33 | self.gc_block = ContextBlock( 34 | in_channels=self.channels, 35 | ratio=self.ratio, 36 | pooling_type=self.pooling_type, 37 | fusion_types=self.fusion_types) 38 | 39 | def forward(self, inputs): 40 | """Forward function.""" 41 | x = self._transform_inputs(inputs) 42 | output = self.convs[0](x) 43 | output = self.gc_block(output) 44 | output = self.convs[1](output) 45 | if self.concat_input: 46 | output = self.conv_cat(torch.cat([x, output], dim=1)) 47 | output = self.cls_seg(output) 48 | return output 49 | -------------------------------------------------------------------------------- /util/lars.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # LARS optimizer, implementation from MoCo v3: 8 | # https://github.com/facebookresearch/moco-v3 9 | # -------------------------------------------------------- 10 | 11 | import torch 12 | 13 | 14 | class LARS(torch.optim.Optimizer): 15 | """ 16 | LARS optimizer, no rate scaling or weight decay for parameters <= 1D. 17 | """ 18 | def __init__(self, params, lr=0, weight_decay=0, momentum=0.9, trust_coefficient=0.001): 19 | defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum, trust_coefficient=trust_coefficient) 20 | super().__init__(params, defaults) 21 | 22 | @torch.no_grad() 23 | def step(self): 24 | for g in self.param_groups: 25 | for p in g['params']: 26 | dp = p.grad 27 | 28 | if dp is None: 29 | continue 30 | 31 | if p.ndim > 1: # if not normalization gamma/beta or bias 32 | dp = dp.add(p, alpha=g['weight_decay']) 33 | param_norm = torch.norm(p) 34 | update_norm = torch.norm(dp) 35 | one = torch.ones_like(param_norm) 36 | q = torch.where(param_norm > 0., 37 | torch.where(update_norm > 0, 38 | (g['trust_coefficient'] * param_norm / update_norm), one), 39 | one) 40 | dp = dp.mul(q) 41 | 42 | param_state = self.state[p] 43 | if 'mu' not in param_state: 44 | param_state['mu'] = torch.zeros_like(p) 45 | mu = param_state['mu'] 46 | mu.mul_(g['momentum']).add_(dp) 47 | p.add_(mu, alpha=-g['lr']) -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/ops/wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | def resize(input, 9 | size=None, 10 | scale_factor=None, 11 | mode='nearest', 12 | align_corners=None, 13 | warning=True): 14 | if warning: 15 | if size is not None and align_corners: 16 | input_h, input_w = tuple(int(x) for x in input.shape[2:]) 17 | output_h, output_w = tuple(int(x) for x in size) 18 | if output_h > input_h or output_w > output_h: 19 | if ((output_h > 1 and output_w > 1 and input_h > 1 20 | and input_w > 1) and (output_h - 1) % (input_h - 1) 21 | and (output_w - 1) % (input_w - 1)): 22 | warnings.warn( 23 | f'When align_corners={align_corners}, ' 24 | 'the output would more aligned if ' 25 | f'input size {(input_h, input_w)} is `x+1` and ' 26 | f'out size {(output_h, output_w)} is `nx+1`') 27 | return F.interpolate(input, size, scale_factor, mode, align_corners) 28 | 29 | 30 | class Upsample(nn.Module): 31 | 32 | def __init__(self, 33 | size=None, 34 | scale_factor=None, 35 | mode='nearest', 36 | align_corners=None): 37 | super(Upsample, self).__init__() 38 | self.size = size 39 | if isinstance(scale_factor, tuple): 40 | self.scale_factor = tuple(float(factor) for factor in scale_factor) 41 | else: 42 | self.scale_factor = float(scale_factor) if scale_factor else None 43 | self.mode = mode 44 | self.align_corners = align_corners 45 | 46 | def forward(self, x): 47 | if not self.size: 48 | size = [int(t * self.scale_factor) for t in x.shape[-2:]] 49 | else: 50 | size = self.size 51 | return resize(x, size, None, self.mode, self.align_corners) 52 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/configs/_base_/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = '/home/zhaowangbo.zwb/dataset/ADEChallengeData2016/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type=dataset_type, 39 | data_root=data_root, 40 | img_dir='images/training', 41 | ann_dir='annotations/training', 42 | pipeline=train_pipeline), 43 | val=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/validation', 47 | ann_dir='annotations/validation', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/validation', 53 | ann_dir='annotations/validation', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/configs/_base_/models/upernet_beit.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254) 3 | # Github source: https://github.com/microsoft/unilm/tree/master/beit 4 | # Copyright (c) 2021 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # By Hangbo Bao 7 | # Based on timm, mmseg, setr, xcit and swin code bases 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm 9 | # https://github.com/fudan-zvg/SETR 10 | # https://github.com/facebookresearch/xcit/ 11 | # https://github.com/microsoft/Swin-Transformer 12 | # --------------------------------------------------------' 13 | # norm_cfg = dict(type='SyncBN', requires_grad=True) 14 | norm_cfg = dict(type='BN', requires_grad=True) 15 | model = dict( 16 | type='EncoderDecoder', 17 | pretrained=None, 18 | backbone=dict( 19 | type='XCiT', 20 | patch_size=16, 21 | embed_dim=384, 22 | depth=12, 23 | num_heads=8, 24 | mlp_ratio=4, 25 | qkv_bias=True, 26 | use_abs_pos_emb=True, 27 | use_rel_pos_bias=False, 28 | ), 29 | decode_head=dict( 30 | type='UPerHead', 31 | in_channels=[384, 384, 384, 384], 32 | in_index=[0, 1, 2, 3], 33 | pool_scales=(1, 2, 3, 6), 34 | channels=512, 35 | dropout_ratio=0.1, 36 | num_classes=19, 37 | norm_cfg=norm_cfg, 38 | align_corners=False, 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 41 | auxiliary_head=dict( 42 | type='FCNHead', 43 | in_channels=384, 44 | in_index=2, 45 | channels=256, 46 | num_convs=1, 47 | concat_input=False, 48 | dropout_ratio=0.1, 49 | num_classes=19, 50 | norm_cfg=norm_cfg, 51 | align_corners=False, 52 | loss_decode=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 54 | # model training and testing settings 55 | train_cfg=dict(), 56 | test_cfg=dict(mode='whole')) 57 | -------------------------------------------------------------------------------- /datasets/vtab.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | 3 | from PIL import Image 4 | import os 5 | import os.path 6 | from torchvision import transforms 7 | 8 | _DATASET_NAME = ( 9 | 'cifar', 10 | 'caltech101', 11 | 'dtd', 12 | 'oxford_flowers102', 13 | 'oxford_iiit_pet', 14 | 'svhn', 15 | 'sun397', 16 | 'patch_camelyon', 17 | 'eurosat', 18 | 'resisc45', 19 | 'diabetic_retinopathy', 20 | 'clevr_count', 21 | 'clevr_dist', 22 | 'dmlab', 23 | 'kitti', 24 | 'dsprites_loc', 25 | 'dsprites_ori', 26 | 'smallnorb_azi', 27 | 'smallnorb_ele', 28 | ) 29 | 30 | _CLASSES_NUM = (100, 102, 47, 102, 37, 10, 397, 2, 10, 45, 5, 8, 6, 6, 4, 16, 16, 18, 9) 31 | 32 | def get_classes_num(dataset_name): 33 | dict_ = {name: num for name, num in zip(_DATASET_NAME, _CLASSES_NUM)} 34 | return dict_[dataset_name] 35 | 36 | 37 | def get_classes_name(idx): 38 | return _DATASET_NAME[idx] 39 | 40 | 41 | def default_loader(path): 42 | return Image.open(path).convert('RGB') 43 | 44 | 45 | def default_flist_reader(flist): 46 | imlist = [] 47 | with open(flist, 'r') as rf: 48 | for line in rf.readlines(): 49 | impath, imlabel = line.strip().split() 50 | imlist.append((impath, int(imlabel))) 51 | 52 | return imlist 53 | 54 | class ImageFilelist(data.Dataset): 55 | def __init__(self, root, flist, transform=None, target_transform=None, 56 | flist_reader=default_flist_reader, loader=default_loader): 57 | self.root = root 58 | self.imlist = flist_reader(flist) 59 | self.transform = transform 60 | self.target_transform = target_transform 61 | self.loader = loader 62 | 63 | def __getitem__(self, index): 64 | impath, target = self.imlist[index] 65 | img = self.loader(os.path.join(self.root, impath)) 66 | if self.transform is not None: 67 | img = self.transform(img) 68 | if self.target_transform is not None: 69 | target = self.target_transform(target) 70 | 71 | return img, target 72 | 73 | def __len__(self): 74 | return len(self.imlist) 75 | -------------------------------------------------------------------------------- /util/datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # References: 8 | # DeiT: https://github.com/facebookresearch/deit 9 | # -------------------------------------------------------- 10 | 11 | import os 12 | import PIL 13 | 14 | from torchvision import datasets, transforms 15 | 16 | from timm.data import create_transform 17 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD 18 | 19 | 20 | def build_dataset(is_train, args): 21 | transform = build_transform(is_train, args) 22 | 23 | root = os.path.join(args.data_path, 'train' if is_train else 'val') 24 | dataset = datasets.ImageFolder(root, transform=transform) 25 | 26 | print(dataset) 27 | 28 | return dataset 29 | 30 | 31 | def build_transform(is_train, args): 32 | mean = IMAGENET_DEFAULT_MEAN 33 | std = IMAGENET_DEFAULT_STD 34 | # train transform 35 | if is_train: 36 | # this should always dispatch to transforms_imagenet_train 37 | transform = create_transform( 38 | input_size=args.input_size, 39 | is_training=True, 40 | color_jitter=args.color_jitter, 41 | auto_augment=args.aa, 42 | interpolation='bicubic', 43 | re_prob=args.reprob, 44 | re_mode=args.remode, 45 | re_count=args.recount, 46 | mean=mean, 47 | std=std, 48 | ) 49 | return transform 50 | 51 | # eval transform 52 | t = [] 53 | if args.input_size <= 224: 54 | crop_pct = 224 / 256 55 | else: 56 | crop_pct = 1.0 57 | size = int(args.input_size / crop_pct) 58 | t.append( 59 | transforms.Resize(size, interpolation=PIL.Image.BICUBIC), # to maintain same ratio w.r.t. 224 images 60 | ) 61 | t.append(transforms.CenterCrop(args.input_size)) 62 | 63 | t.append(transforms.ToTensor()) 64 | t.append(transforms.Normalize(mean, std)) 65 | return transforms.Compose(t) 66 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import warnings 3 | 4 | import mmcv 5 | from packaging.version import parse 6 | 7 | from .version import __version__, version_info 8 | 9 | MMCV_MIN = '1.3.13' 10 | MMCV_MAX = '1.5.0' 11 | 12 | 13 | def digit_version(version_str: str, length: int = 4): 14 | """Convert a version string into a tuple of integers. 15 | 16 | This method is usually used for comparing two versions. For pre-release 17 | versions: alpha < beta < rc. 18 | 19 | Args: 20 | version_str (str): The version string. 21 | length (int): The maximum number of version levels. Default: 4. 22 | 23 | Returns: 24 | tuple[int]: The version info in digits (integers). 25 | """ 26 | version = parse(version_str) 27 | assert version.release, f'failed to parse version {version_str}' 28 | release = list(version.release) 29 | release = release[:length] 30 | if len(release) < length: 31 | release = release + [0] * (length - len(release)) 32 | if version.is_prerelease: 33 | mapping = {'a': -3, 'b': -2, 'rc': -1} 34 | val = -4 35 | # version.pre can be None 36 | if version.pre: 37 | if version.pre[0] not in mapping: 38 | warnings.warn(f'unknown prerelease version {version.pre[0]}, ' 39 | 'version checking may go wrong') 40 | else: 41 | val = mapping[version.pre[0]] 42 | release.extend([val, version.pre[-1]]) 43 | else: 44 | release.extend([val, 0]) 45 | 46 | elif version.is_postrelease: 47 | release.extend([1, version.post]) 48 | else: 49 | release.extend([0, 0]) 50 | return tuple(release) 51 | 52 | 53 | mmcv_min_version = digit_version(MMCV_MIN) 54 | mmcv_max_version = digit_version(MMCV_MAX) 55 | mmcv_version = digit_version(mmcv.__version__) 56 | 57 | 58 | assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \ 59 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 60 | f'Please install mmcv>={mmcv_min_version}, <={mmcv_max_version}.' 61 | 62 | __all__ = ['__version__', 'version_info', 'digit_version'] 63 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/backbones/timm_backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | try: 3 | import timm 4 | except ImportError: 5 | timm = None 6 | 7 | from mmcv.cnn.bricks.registry import NORM_LAYERS 8 | from mmcv.runner import BaseModule 9 | 10 | from ..builder import BACKBONES 11 | 12 | 13 | @BACKBONES.register_module() 14 | class TIMMBackbone(BaseModule): 15 | """Wrapper to use backbones from timm library. More details can be found in 16 | `timm `_ . 17 | 18 | Args: 19 | model_name (str): Name of timm model to instantiate. 20 | pretrained (bool): Load pretrained weights if True. 21 | checkpoint_path (str): Path of checkpoint to load after 22 | model is initialized. 23 | in_channels (int): Number of input image channels. Default: 3. 24 | init_cfg (dict, optional): Initialization config dict 25 | **kwargs: Other timm & model specific arguments. 26 | """ 27 | 28 | def __init__( 29 | self, 30 | model_name, 31 | features_only=True, 32 | pretrained=True, 33 | checkpoint_path='', 34 | in_channels=3, 35 | init_cfg=None, 36 | **kwargs, 37 | ): 38 | if timm is None: 39 | raise RuntimeError('timm is not installed') 40 | super(TIMMBackbone, self).__init__(init_cfg) 41 | if 'norm_layer' in kwargs: 42 | kwargs['norm_layer'] = NORM_LAYERS.get(kwargs['norm_layer']) 43 | self.timm_model = timm.create_model( 44 | model_name=model_name, 45 | features_only=features_only, 46 | pretrained=pretrained, 47 | in_chans=in_channels, 48 | checkpoint_path=checkpoint_path, 49 | **kwargs, 50 | ) 51 | 52 | # Make unused parameters None 53 | self.timm_model.global_pool = None 54 | self.timm_model.fc = None 55 | self.timm_model.classifier = None 56 | 57 | # Hack to use pretrained weights from timm 58 | if pretrained or checkpoint_path: 59 | self._is_init = True 60 | 61 | def forward(self, x): 62 | features = self.timm_model(x) 63 | return features 64 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/segformer_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | 6 | from mmseg.models.builder import HEADS 7 | from mmseg.models.decode_heads.decode_head import BaseDecodeHead 8 | from mmseg.ops import resize 9 | 10 | 11 | @HEADS.register_module() 12 | class SegformerHead(BaseDecodeHead): 13 | """The all mlp Head of segformer. 14 | 15 | This head is the implementation of 16 | `Segformer ` _. 17 | 18 | Args: 19 | interpolate_mode: The interpolate mode of MLP head upsample operation. 20 | Default: 'bilinear'. 21 | """ 22 | 23 | def __init__(self, interpolate_mode='bilinear', **kwargs): 24 | super().__init__(input_transform='multiple_select', **kwargs) 25 | 26 | self.interpolate_mode = interpolate_mode 27 | num_inputs = len(self.in_channels) 28 | 29 | assert num_inputs == len(self.in_index) 30 | 31 | self.convs = nn.ModuleList() 32 | for i in range(num_inputs): 33 | self.convs.append( 34 | ConvModule( 35 | in_channels=self.in_channels[i], 36 | out_channels=self.channels, 37 | kernel_size=1, 38 | stride=1, 39 | norm_cfg=self.norm_cfg, 40 | act_cfg=self.act_cfg)) 41 | 42 | self.fusion_conv = ConvModule( 43 | in_channels=self.channels * num_inputs, 44 | out_channels=self.channels, 45 | kernel_size=1, 46 | norm_cfg=self.norm_cfg) 47 | 48 | def forward(self, inputs): 49 | # Receive 4 stage backbone feature map: 1/4, 1/8, 1/16, 1/32 50 | inputs = self._transform_inputs(inputs) 51 | outs = [] 52 | for idx in range(len(inputs)): 53 | x = inputs[idx] 54 | conv = self.convs[idx] 55 | outs.append( 56 | resize( 57 | input=conv(x), 58 | size=inputs[0].shape[2:], 59 | mode=self.interpolate_mode, 60 | align_corners=self.align_corners)) 61 | 62 | out = self.fusion_conv(torch.cat(outs, dim=1)) 63 | 64 | out = self.cls_seg(out) 65 | 66 | return out 67 | -------------------------------------------------------------------------------- /train_IN21K.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ADAPTER_CHANNEL=$1 4 | GPUS=${GPUS:-8} 5 | PORT=$((12000 + $RANDOM % 20000)) 6 | MASTER_ADDR=${MASTER_ADDR:-127.0.0.1} 7 | 8 | 9 | DATASET=cifar100_full 10 | CLUSTER=True \ 11 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ 12 | python -m torch.distributed.launch \ 13 | --master_addr=$MASTER_ADDR \ 14 | --nproc_per_node=$GPUS \ 15 | --master_port=$PORT \ 16 | --use_env \ 17 | main_image.py \ 18 | --batch_size 128 \ 19 | --cls_token \ 20 | --finetune VIT_BASE_IN21K \ 21 | --dist_eval \ 22 | --output_dir "./output/IN21K/0.5/${DATASET}" \ 23 | --drop_path 0.0 \ 24 | --blr 1e-3 \ 25 | --weight_decay 0.01 \ 26 | --dataset "${DATASET}" \ 27 | --ffn_adapt \ 28 | --auto_remove \ 29 | --token_target_ratio 0.5 30 | 31 | 32 | DATASET=svhn_full 33 | CLUSTER=True \ 34 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ 35 | python -m torch.distributed.launch \ 36 | --master_addr=$MASTER_ADDR \ 37 | --nproc_per_node=$GPUS \ 38 | --master_port=$PORT \ 39 | --use_env \ 40 | main_image.py \ 41 | --batch_size 128 \ 42 | --cls_token \ 43 | --finetune VIT_BASE_IN21K \ 44 | --dist_eval \ 45 | --output_dir "./output/IN21K/0.5/${DATASET}" \ 46 | --drop_path 0.0 \ 47 | --blr 1e-3 \ 48 | --weight_decay 0.01 \ 49 | --dataset "${DATASET}" \ 50 | --ffn_adapt \ 51 | --auto_remove \ 52 | --token_target_ratio 0.5 53 | 54 | 55 | 56 | 57 | DATASET=food101_full 58 | CLUSTER=True \ 59 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ 60 | python -m torch.distributed.launch \ 61 | --master_addr=$MASTER_ADDR \ 62 | --nproc_per_node=$GPUS \ 63 | --master_port=$PORT \ 64 | --use_env \ 65 | main_image.py \ 66 | --batch_size 128 \ 67 | --cls_token \ 68 | --finetune VIT_BASE_IN21K \ 69 | --dist_eval \ 70 | --output_dir "./output/IN21K/0.5/${DATASET}" \ 71 | --drop_path 0.0 \ 72 | --blr 1e-3 \ 73 | --weight_decay 0.01 \ 74 | --dataset "${DATASET}" \ 75 | --ffn_adapt \ 76 | --auto_remove \ 77 | --token_target_ratio 0.5 78 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/utils/se_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | 6 | from .make_divisible import make_divisible 7 | 8 | 9 | class SELayer(nn.Module): 10 | """Squeeze-and-Excitation Module. 11 | 12 | Args: 13 | channels (int): The input (and output) channels of the SE layer. 14 | ratio (int): Squeeze ratio in SELayer, the intermediate channel will be 15 | ``int(channels/ratio)``. Default: 16. 16 | conv_cfg (None or dict): Config dict for convolution layer. 17 | Default: None, which means using conv2d. 18 | act_cfg (dict or Sequence[dict]): Config dict for activation layer. 19 | If act_cfg is a dict, two activation layers will be configured 20 | by this dict. If act_cfg is a sequence of dicts, the first 21 | activation layer will be configured by the first dict and the 22 | second activation layer will be configured by the second dict. 23 | Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, 24 | divisor=6.0)). 25 | """ 26 | 27 | def __init__(self, 28 | channels, 29 | ratio=16, 30 | conv_cfg=None, 31 | act_cfg=(dict(type='ReLU'), 32 | dict(type='HSigmoid', bias=3.0, divisor=6.0))): 33 | super(SELayer, self).__init__() 34 | if isinstance(act_cfg, dict): 35 | act_cfg = (act_cfg, act_cfg) 36 | assert len(act_cfg) == 2 37 | assert mmcv.is_tuple_of(act_cfg, dict) 38 | self.global_avgpool = nn.AdaptiveAvgPool2d(1) 39 | self.conv1 = ConvModule( 40 | in_channels=channels, 41 | out_channels=make_divisible(channels // ratio, 8), 42 | kernel_size=1, 43 | stride=1, 44 | conv_cfg=conv_cfg, 45 | act_cfg=act_cfg[0]) 46 | self.conv2 = ConvModule( 47 | in_channels=make_divisible(channels // ratio, 8), 48 | out_channels=channels, 49 | kernel_size=1, 50 | stride=1, 51 | conv_cfg=conv_cfg, 52 | act_cfg=act_cfg[1]) 53 | 54 | def forward(self, x): 55 | out = self.global_avgpool(x) 56 | out = self.conv1(out) 57 | out = self.conv2(out) 58 | return x * out 59 | -------------------------------------------------------------------------------- /configs.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | 4 | CLUSTER = os.environ.get('CLUSTER') 5 | 6 | if not CLUSTER: 7 | CHECKPOINTS = { 8 | 'VIT_BASE_IN21K': 'jx_vit_base_patch16_224_in21k-e5005f0a.pth', 9 | } 10 | DATASETS = { 11 | 'cifar10': 'path/small_datasets', 12 | 'cifar100': 'path/small_datasets', 13 | 'food101': 'path/small_datasets', 14 | 'svhn': 'path/small_datasets', 15 | 'flowers102': 'path/small_datasets', 16 | 'fgvc_aircraft': 'path/small_datasets', 17 | 'stanford_cars': 'path/small_datasets', 18 | 'dtd': 'path/small_datasets', 19 | 'oxford_iiit_pet': 'path/small_datasets', 20 | 'vtab': 'path/vtab-1k', 21 | 'K400': dict( 22 | TRAIN_ROOT='path/K400', 23 | VAL_ROOT='path/K400', 24 | TRAIN_LIST='path/K400/k400_train.txt', 25 | VAL_LIST='path/K400/k400_val.txt', 26 | NUM_CLASSES=400), 27 | 'HMDB51': dict( 28 | TRAIN_ROOT='path/HMDB51', 29 | VAL_ROOT='path/HMDB51', 30 | TRAIN_LIST='path/HMDB51/hmdb51_split1_train.txt' , 31 | VAL_LIST='path/HMDB51/hmdb51_split1_test.txt', 32 | NUM_CLASSES=51, 33 | ), 34 | } 35 | 36 | 37 | else: # for debug 38 | CHECKPOINTS = { 39 | 'VIT_BASE_IN21K': '/mnt/workspace/workgroup/zhaowangbo.zwb/research/dynamic_PEFT_ECCV2024/jx_vit_base_patch16_224_in21k-e5005f0a.pth', 40 | } 41 | DATASETS = { 42 | 'cifar10': 'path/small_datasets', 43 | 'cifar100': 'path/small_datasets', 44 | 'food101': 'path/small_datasets', 45 | 'svhn': 'path/small_datasets', 46 | 'flowers102': 'path/small_datasets', 47 | 'fgvc_aircraft': 'path/small_datasets', 48 | 'stanford_cars': 'path/small_datasets', 49 | 'dtd': 'path/small_datasets', 50 | 'oxford_iiit_pet': 'path/small_datasets', 51 | 'vtab': 'path/vtab-1k', 52 | 'K400': dict( 53 | TRAIN_ROOT='path/K400', 54 | VAL_ROOT='path/K400', 55 | TRAIN_LIST='path/K400/k400_train.txt', 56 | VAL_LIST='path/K400/k400_val.txt', 57 | NUM_CLASSES=400), 58 | 'HMDB51': dict( 59 | TRAIN_ROOT='path/HMDB51', 60 | VAL_ROOT='path/HMDB51', 61 | TRAIN_LIST='path/HMDB51/hmdb51_split1_train.txt' , 62 | VAL_LIST='path/HMDB51/hmdb51_split1_test.txt', 63 | NUM_CLASSES=51, 64 | ), 65 | } 66 | 67 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/setr_mla_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | 6 | from mmseg.ops import Upsample 7 | from ..builder import HEADS 8 | from .decode_head import BaseDecodeHead 9 | 10 | 11 | @HEADS.register_module() 12 | class SETRMLAHead(BaseDecodeHead): 13 | """Multi level feature aggretation head of SETR. 14 | 15 | MLA head of `SETR `_. 16 | 17 | Args: 18 | mlahead_channels (int): Channels of conv-conv-4x of multi-level feature 19 | aggregation. Default: 128. 20 | up_scale (int): The scale factor of interpolate. Default:4. 21 | """ 22 | 23 | def __init__(self, mla_channels=128, up_scale=4, **kwargs): 24 | super(SETRMLAHead, self).__init__( 25 | input_transform='multiple_select', **kwargs) 26 | self.mla_channels = mla_channels 27 | 28 | num_inputs = len(self.in_channels) 29 | 30 | # Refer to self.cls_seg settings of BaseDecodeHead 31 | assert self.channels == num_inputs * mla_channels 32 | 33 | self.up_convs = nn.ModuleList() 34 | for i in range(num_inputs): 35 | self.up_convs.append( 36 | nn.Sequential( 37 | ConvModule( 38 | in_channels=self.in_channels[i], 39 | out_channels=mla_channels, 40 | kernel_size=3, 41 | padding=1, 42 | norm_cfg=self.norm_cfg, 43 | act_cfg=self.act_cfg), 44 | ConvModule( 45 | in_channels=mla_channels, 46 | out_channels=mla_channels, 47 | kernel_size=3, 48 | padding=1, 49 | norm_cfg=self.norm_cfg, 50 | act_cfg=self.act_cfg), 51 | Upsample( 52 | scale_factor=up_scale, 53 | mode='bilinear', 54 | align_corners=self.align_corners))) 55 | 56 | def forward(self, inputs): 57 | inputs = self._transform_inputs(inputs) 58 | outs = [] 59 | for x, up_conv in zip(inputs, self.up_convs): 60 | outs.append(up_conv(x)) 61 | out = torch.cat(outs, dim=1) 62 | out = self.cls_seg(out) 63 | return out 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | test_case.py 2 | summary.csv 3 | output/ 4 | output_ada/ 5 | work_dirs/* 6 | __MACOSX 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | pip-wheel-metadata/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/cascade_decode_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from abc import ABCMeta, abstractmethod 3 | 4 | from .decode_head import BaseDecodeHead 5 | 6 | 7 | class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): 8 | """Base class for cascade decode head used in 9 | :class:`CascadeEncoderDecoder.""" 10 | 11 | def __init__(self, *args, **kwargs): 12 | super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs) 13 | 14 | @abstractmethod 15 | def forward(self, inputs, prev_output): 16 | """Placeholder of forward function.""" 17 | pass 18 | 19 | def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, 20 | train_cfg): 21 | """Forward function for training. 22 | Args: 23 | inputs (list[Tensor]): List of multi-level img features. 24 | prev_output (Tensor): The output of previous decode head. 25 | img_metas (list[dict]): List of image info dict where each dict 26 | has: 'img_shape', 'scale_factor', 'flip', and may also contain 27 | 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. 28 | For details on the values of these keys see 29 | `mmseg/datasets/pipelines/formatting.py:Collect`. 30 | gt_semantic_seg (Tensor): Semantic segmentation masks 31 | used if the architecture supports semantic segmentation task. 32 | train_cfg (dict): The training config. 33 | 34 | Returns: 35 | dict[str, Tensor]: a dictionary of loss components 36 | """ 37 | seg_logits = self.forward(inputs, prev_output) 38 | losses = self.losses(seg_logits, gt_semantic_seg) 39 | 40 | return losses 41 | 42 | def forward_test(self, inputs, prev_output, img_metas, test_cfg): 43 | """Forward function for testing. 44 | 45 | Args: 46 | inputs (list[Tensor]): List of multi-level img features. 47 | prev_output (Tensor): The output of previous decode head. 48 | img_metas (list[dict]): List of image info dict where each dict 49 | has: 'img_shape', 'scale_factor', 'flip', and may also contain 50 | 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. 51 | For details on the values of these keys see 52 | `mmseg/datasets/pipelines/formatting.py:Collect`. 53 | test_cfg (dict): The testing config. 54 | 55 | Returns: 56 | Tensor: Output segmentation map. 57 | """ 58 | return self.forward(inputs, prev_output) 59 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/sep_fcn_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import DepthwiseSeparableConvModule 3 | 4 | from ..builder import HEADS 5 | from .fcn_head import FCNHead 6 | 7 | 8 | @HEADS.register_module() 9 | class DepthwiseSeparableFCNHead(FCNHead): 10 | """Depthwise-Separable Fully Convolutional Network for Semantic 11 | Segmentation. 12 | 13 | This head is implemented according to `Fast-SCNN: Fast Semantic 14 | Segmentation Network `_. 15 | 16 | Args: 17 | in_channels(int): Number of output channels of FFM. 18 | channels(int): Number of middle-stage channels in the decode head. 19 | concat_input(bool): Whether to concatenate original decode input into 20 | the result of several consecutive convolution layers. 21 | Default: True. 22 | num_classes(int): Used to determine the dimension of 23 | final prediction tensor. 24 | in_index(int): Correspond with 'out_indices' in FastSCNN backbone. 25 | norm_cfg (dict | None): Config of norm layers. 26 | align_corners (bool): align_corners argument of F.interpolate. 27 | Default: False. 28 | loss_decode(dict): Config of loss type and some 29 | relevant additional options. 30 | dw_act_cfg (dict):Activation config of depthwise ConvModule. If it is 31 | 'default', it will be the same as `act_cfg`. Default: None. 32 | """ 33 | 34 | def __init__(self, dw_act_cfg=None, **kwargs): 35 | super(DepthwiseSeparableFCNHead, self).__init__(**kwargs) 36 | self.convs[0] = DepthwiseSeparableConvModule( 37 | self.in_channels, 38 | self.channels, 39 | kernel_size=self.kernel_size, 40 | padding=self.kernel_size // 2, 41 | norm_cfg=self.norm_cfg, 42 | dw_act_cfg=dw_act_cfg) 43 | 44 | for i in range(1, self.num_convs): 45 | self.convs[i] = DepthwiseSeparableConvModule( 46 | self.channels, 47 | self.channels, 48 | kernel_size=self.kernel_size, 49 | padding=self.kernel_size // 2, 50 | norm_cfg=self.norm_cfg, 51 | dw_act_cfg=dw_act_cfg) 52 | 53 | if self.concat_input: 54 | self.conv_cat = DepthwiseSeparableConvModule( 55 | self.in_channels + self.channels, 56 | self.channels, 57 | kernel_size=self.kernel_size, 58 | padding=self.kernel_size // 2, 59 | norm_cfg=self.norm_cfg, 60 | dw_act_cfg=dw_act_cfg) 61 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/fpn_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import numpy as np 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | 6 | from mmseg.ops import Upsample, resize 7 | from ..builder import HEADS 8 | from .decode_head import BaseDecodeHead 9 | 10 | 11 | @HEADS.register_module() 12 | class FPNHead(BaseDecodeHead): 13 | """Panoptic Feature Pyramid Networks. 14 | 15 | This head is the implementation of `Semantic FPN 16 | `_. 17 | 18 | Args: 19 | feature_strides (tuple[int]): The strides for input feature maps. 20 | stack_lateral. All strides suppose to be power of 2. The first 21 | one is of largest resolution. 22 | """ 23 | 24 | def __init__(self, feature_strides, **kwargs): 25 | super(FPNHead, self).__init__( 26 | input_transform='multiple_select', **kwargs) 27 | assert len(feature_strides) == len(self.in_channels) 28 | assert min(feature_strides) == feature_strides[0] 29 | self.feature_strides = feature_strides 30 | 31 | self.scale_heads = nn.ModuleList() 32 | for i in range(len(feature_strides)): 33 | head_length = max( 34 | 1, 35 | int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) 36 | scale_head = [] 37 | for k in range(head_length): 38 | scale_head.append( 39 | ConvModule( 40 | self.in_channels[i] if k == 0 else self.channels, 41 | self.channels, 42 | 3, 43 | padding=1, 44 | conv_cfg=self.conv_cfg, 45 | norm_cfg=self.norm_cfg, 46 | act_cfg=self.act_cfg)) 47 | if feature_strides[i] != feature_strides[0]: 48 | scale_head.append( 49 | Upsample( 50 | scale_factor=2, 51 | mode='bilinear', 52 | align_corners=self.align_corners)) 53 | self.scale_heads.append(nn.Sequential(*scale_head)) 54 | 55 | def forward(self, inputs): 56 | 57 | x = self._transform_inputs(inputs) 58 | 59 | output = self.scale_heads[0](x[0]) 60 | for i in range(1, len(self.feature_strides)): 61 | # non inplace 62 | output = output + resize( 63 | self.scale_heads[i](x[i]), 64 | size=output.shape[2:], 65 | mode='bilinear', 66 | align_corners=self.align_corners) 67 | 68 | output = self.cls_seg(output) 69 | return output 70 | -------------------------------------------------------------------------------- /util/lr_decay.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # -------------------------------------------------------- 7 | # References: 8 | # ELECTRA https://github.com/google-research/electra 9 | # BEiT: https://github.com/microsoft/unilm/tree/master/beit 10 | # -------------------------------------------------------- 11 | 12 | import json 13 | 14 | 15 | def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75): 16 | """ 17 | Parameter groups for layer-wise lr decay 18 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58 19 | """ 20 | param_group_names = {} 21 | param_groups = {} 22 | 23 | num_layers = len(model.blocks) + 1 24 | 25 | layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1)) 26 | 27 | for n, p in model.named_parameters(): 28 | if not p.requires_grad: 29 | continue 30 | 31 | # no decay: all 1D parameters and model specific ones 32 | if p.ndim == 1 or n in no_weight_decay_list: 33 | g_decay = "no_decay" 34 | this_decay = 0. 35 | else: 36 | g_decay = "decay" 37 | this_decay = weight_decay 38 | 39 | layer_id = get_layer_id_for_vit(n, num_layers) 40 | group_name = "layer_%d_%s" % (layer_id, g_decay) 41 | 42 | if group_name not in param_group_names: 43 | this_scale = layer_scales[layer_id] 44 | 45 | param_group_names[group_name] = { 46 | "lr_scale": this_scale, 47 | "weight_decay": this_decay, 48 | "params": [], 49 | } 50 | param_groups[group_name] = { 51 | "lr_scale": this_scale, 52 | "weight_decay": this_decay, 53 | "params": [], 54 | } 55 | 56 | param_group_names[group_name]["params"].append(n) 57 | param_groups[group_name]["params"].append(p) 58 | 59 | # print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2)) 60 | 61 | return list(param_groups.values()) 62 | 63 | 64 | def get_layer_id_for_vit(name, num_layers): 65 | """ 66 | Assign a parameter with its layer id 67 | Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33 68 | """ 69 | if name in ['cls_token', 'pos_embed']: 70 | return 0 71 | elif name.startswith('patch_embed'): 72 | return 0 73 | elif name.startswith('blocks'): 74 | return int(name.split('.')[1]) + 1 75 | else: 76 | return num_layers -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/necks/multilevel_neck.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | from mmcv.cnn import ConvModule, xavier_init 4 | 5 | from mmseg.ops import resize 6 | from ..builder import NECKS 7 | 8 | 9 | @NECKS.register_module() 10 | class MultiLevelNeck(nn.Module): 11 | """MultiLevelNeck. 12 | 13 | A neck structure connect vit backbone and decoder_heads. 14 | 15 | Args: 16 | in_channels (List[int]): Number of input channels per scale. 17 | out_channels (int): Number of output channels (used at each scale). 18 | scales (List[float]): Scale factors for each input feature map. 19 | Default: [0.5, 1, 2, 4] 20 | norm_cfg (dict): Config dict for normalization layer. Default: None. 21 | act_cfg (dict): Config dict for activation layer in ConvModule. 22 | Default: None. 23 | """ 24 | 25 | def __init__(self, 26 | in_channels, 27 | out_channels, 28 | scales=[0.5, 1, 2, 4], 29 | norm_cfg=None, 30 | act_cfg=None): 31 | super(MultiLevelNeck, self).__init__() 32 | assert isinstance(in_channels, list) 33 | self.in_channels = in_channels 34 | self.out_channels = out_channels 35 | self.scales = scales 36 | self.num_outs = len(scales) 37 | self.lateral_convs = nn.ModuleList() 38 | self.convs = nn.ModuleList() 39 | for in_channel in in_channels: 40 | self.lateral_convs.append( 41 | ConvModule( 42 | in_channel, 43 | out_channels, 44 | kernel_size=1, 45 | norm_cfg=norm_cfg, 46 | act_cfg=act_cfg)) 47 | for _ in range(self.num_outs): 48 | self.convs.append( 49 | ConvModule( 50 | out_channels, 51 | out_channels, 52 | kernel_size=3, 53 | padding=1, 54 | stride=1, 55 | norm_cfg=norm_cfg, 56 | act_cfg=act_cfg)) 57 | 58 | # default init_weights for conv(msra) and norm in ConvModule 59 | def init_weights(self): 60 | for m in self.modules(): 61 | if isinstance(m, nn.Conv2d): 62 | xavier_init(m, distribution='uniform') 63 | 64 | def forward(self, inputs): 65 | assert len(inputs) == len(self.in_channels) 66 | inputs = [ 67 | lateral_conv(inputs[i]) 68 | for i, lateral_conv in enumerate(self.lateral_convs) 69 | ] 70 | # for len(inputs) not equal to self.num_outs 71 | if len(inputs) == 1: 72 | inputs = [inputs[0] for _ in range(self.num_outs)] 73 | outs = [] 74 | for i in range(self.num_outs): 75 | x_resize = resize( 76 | inputs[i], scale_factor=self.scales[i], mode='bilinear') 77 | outs.append(self.convs[i](x_resize)) 78 | return tuple(outs) 79 | -------------------------------------------------------------------------------- /video_datasets/video_datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | from util.crop import RandomResizedCrop 3 | from timm.data.constants import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD 4 | import torchvision.transforms as transforms 5 | import torchvision.datasets as datasets 6 | from .k400 import VideoDataset 7 | from .sthv2_dataset import SthV2VideoDataset 8 | import torch 9 | 10 | def build_dataset(args): 11 | if os.path.basename(args.finetune).startswith('VIT_BASE_IN21K'): 12 | _mean = IMAGENET_INCEPTION_MEAN 13 | _std = IMAGENET_INCEPTION_STD 14 | 15 | else: 16 | raise ValueError(os.path.basename(args.finetune)) 17 | 18 | 19 | 20 | if args.dataset == 'K400': 21 | dataset_train = VideoDataset( 22 | list_path=args.data_path[args.dataset]['TRAIN_LIST'], 23 | data_root=args.data_path[args.dataset]['TRAIN_ROOT'], 24 | random_sample=True, 25 | mirror=True, 26 | spatial_size=224, 27 | auto_augment=None, 28 | num_frames=8, 29 | sampling_rate=16, 30 | resize_type='random_short_side_scale_jitter', 31 | scale_range=[1.0, 1.15], 32 | mean=torch.Tensor(_mean), 33 | std=torch.Tensor(_std) 34 | ) 35 | dataset_val = VideoDataset( 36 | list_path=args.data_path[args.dataset]['VAL_LIST'], 37 | data_root=args.data_path[args.dataset]['VAL_ROOT'], 38 | random_sample=False, 39 | spatial_size=224, 40 | num_frames=8, 41 | sampling_rate=16, 42 | num_spatial_views=1, 43 | num_temporal_views=3, 44 | mean=torch.Tensor(_mean), 45 | std=torch.Tensor(_std) 46 | ) 47 | metric = "accuracy" 48 | 49 | 50 | 51 | elif args.dataset == 'SSV2': 52 | dataset_train = SthV2VideoDataset( 53 | list_path=args.data_path[args.dataset]['TRAIN_LIST'], 54 | data_root=args.data_path[args.dataset]['TRAIN_ROOT'], 55 | random_sample=True, 56 | mirror=False, 57 | spatial_size=224, 58 | auto_augment='rand-m7-n4-mstd0.5-inc1', 59 | num_frames=8, 60 | sampling_rate=0, 61 | resize_type='random_resized_crop', 62 | scale_range=[0.08, 1.0], 63 | mean=torch.Tensor(_mean), 64 | std=torch.Tensor(_std) 65 | ) 66 | 67 | 68 | dataset_val = SthV2VideoDataset( 69 | list_path=args.data_path[args.dataset]['VAL_LIST'], 70 | data_root=args.data_path[args.dataset]['VAL_ROOT'], 71 | random_sample=False, 72 | spatial_size=224, 73 | num_frames=8, 74 | sampling_rate=0, 75 | num_spatial_views=3, 76 | num_temporal_views=1, 77 | mean=torch.Tensor(_mean), 78 | std=torch.Tensor(_std) 79 | ) 80 | 81 | metric = "accuracy" 82 | 83 | 84 | else: 85 | raise ValueError(args.dataset) 86 | 87 | return dataset_train, dataset_val, metric 88 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/ops/encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | from torch import nn 4 | from torch.nn import functional as F 5 | 6 | 7 | class Encoding(nn.Module): 8 | """Encoding Layer: a learnable residual encoder. 9 | 10 | Input is of shape (batch_size, channels, height, width). 11 | Output is of shape (batch_size, num_codes, channels). 12 | 13 | Args: 14 | channels: dimension of the features or feature channels 15 | num_codes: number of code words 16 | """ 17 | 18 | def __init__(self, channels, num_codes): 19 | super(Encoding, self).__init__() 20 | # init codewords and smoothing factor 21 | self.channels, self.num_codes = channels, num_codes 22 | std = 1. / ((num_codes * channels)**0.5) 23 | # [num_codes, channels] 24 | self.codewords = nn.Parameter( 25 | torch.empty(num_codes, channels, 26 | dtype=torch.float).uniform_(-std, std), 27 | requires_grad=True) 28 | # [num_codes] 29 | self.scale = nn.Parameter( 30 | torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), 31 | requires_grad=True) 32 | 33 | @staticmethod 34 | def scaled_l2(x, codewords, scale): 35 | num_codes, channels = codewords.size() 36 | batch_size = x.size(0) 37 | reshaped_scale = scale.view((1, 1, num_codes)) 38 | expanded_x = x.unsqueeze(2).expand( 39 | (batch_size, x.size(1), num_codes, channels)) 40 | reshaped_codewords = codewords.view((1, 1, num_codes, channels)) 41 | 42 | scaled_l2_norm = reshaped_scale * ( 43 | expanded_x - reshaped_codewords).pow(2).sum(dim=3) 44 | return scaled_l2_norm 45 | 46 | @staticmethod 47 | def aggregate(assignment_weights, x, codewords): 48 | num_codes, channels = codewords.size() 49 | reshaped_codewords = codewords.view((1, 1, num_codes, channels)) 50 | batch_size = x.size(0) 51 | 52 | expanded_x = x.unsqueeze(2).expand( 53 | (batch_size, x.size(1), num_codes, channels)) 54 | encoded_feat = (assignment_weights.unsqueeze(3) * 55 | (expanded_x - reshaped_codewords)).sum(dim=1) 56 | return encoded_feat 57 | 58 | def forward(self, x): 59 | assert x.dim() == 4 and x.size(1) == self.channels 60 | # [batch_size, channels, height, width] 61 | batch_size = x.size(0) 62 | # [batch_size, height x width, channels] 63 | x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() 64 | # assignment_weights: [batch_size, channels, num_codes] 65 | assignment_weights = F.softmax( 66 | self.scaled_l2(x, self.codewords, self.scale), dim=2) 67 | # aggregate 68 | encoded_feat = self.aggregate(assignment_weights, x, self.codewords) 69 | return encoded_feat 70 | 71 | def __repr__(self): 72 | repr_str = self.__class__.__name__ 73 | repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \ 74 | f'x{self.channels})' 75 | return repr_str 76 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/fcn_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | 6 | from ..builder import HEADS 7 | from .decode_head import BaseDecodeHead 8 | 9 | 10 | @HEADS.register_module() 11 | class FCNHead(BaseDecodeHead): 12 | """Fully Convolution Networks for Semantic Segmentation. 13 | 14 | This head is implemented of `FCNNet `_. 15 | 16 | Args: 17 | num_convs (int): Number of convs in the head. Default: 2. 18 | kernel_size (int): The kernel size for convs in the head. Default: 3. 19 | concat_input (bool): Whether concat the input and output of convs 20 | before classification layer. 21 | dilation (int): The dilation rate for convs in the head. Default: 1. 22 | """ 23 | 24 | def __init__(self, 25 | num_convs=2, 26 | kernel_size=3, 27 | concat_input=True, 28 | dilation=1, 29 | **kwargs): 30 | assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) 31 | self.num_convs = num_convs 32 | self.concat_input = concat_input 33 | self.kernel_size = kernel_size 34 | super(FCNHead, self).__init__(**kwargs) 35 | if num_convs == 0: 36 | assert self.in_channels == self.channels 37 | 38 | conv_padding = (kernel_size // 2) * dilation 39 | convs = [] 40 | convs.append( 41 | ConvModule( 42 | self.in_channels, 43 | self.channels, 44 | kernel_size=kernel_size, 45 | padding=conv_padding, 46 | dilation=dilation, 47 | conv_cfg=self.conv_cfg, 48 | norm_cfg=self.norm_cfg, 49 | act_cfg=self.act_cfg)) 50 | for i in range(num_convs - 1): 51 | convs.append( 52 | ConvModule( 53 | self.channels, 54 | self.channels, 55 | kernel_size=kernel_size, 56 | padding=conv_padding, 57 | dilation=dilation, 58 | conv_cfg=self.conv_cfg, 59 | norm_cfg=self.norm_cfg, 60 | act_cfg=self.act_cfg)) 61 | if num_convs == 0: 62 | self.convs = nn.Identity() 63 | else: 64 | self.convs = nn.Sequential(*convs) 65 | if self.concat_input: 66 | self.conv_cat = ConvModule( 67 | self.in_channels + self.channels, 68 | self.channels, 69 | kernel_size=kernel_size, 70 | padding=kernel_size // 2, 71 | conv_cfg=self.conv_cfg, 72 | norm_cfg=self.norm_cfg, 73 | act_cfg=self.act_cfg) 74 | 75 | def forward(self, inputs): 76 | """Forward function.""" 77 | x = self._transform_inputs(inputs) 78 | output = self.convs(x) 79 | if self.concat_input: 80 | output = self.conv_cat(torch.cat([x, output], dim=1)) 81 | output = self.cls_seg(output) 82 | return output 83 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmcv_custom/apex_runner/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import time 4 | from tempfile import TemporaryDirectory 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.parallel import is_module_wrapper 11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | def save_checkpoint(model, filename, optimizer=None, meta=None): 20 | """Save checkpoint to file. 21 | 22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and 23 | ``optimizer``, ``amp``. By default ``meta`` will contain version 24 | and time info. 25 | 26 | Args: 27 | model (Module): Module whose params are to be saved. 28 | filename (str): Checkpoint filename. 29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 30 | meta (dict, optional): Metadata to be saved in checkpoint. 31 | """ 32 | if meta is None: 33 | meta = {} 34 | elif not isinstance(meta, dict): 35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}') 36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) 37 | 38 | if is_module_wrapper(model): 39 | model = model.module 40 | 41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None: 42 | # save class name to the meta 43 | meta.update(CLASSES=model.CLASSES) 44 | 45 | checkpoint = { 46 | 'meta': meta, 47 | 'state_dict': weights_to_cpu(get_state_dict(model)) 48 | } 49 | # save optimizer state dict in the checkpoint 50 | if isinstance(optimizer, Optimizer): 51 | checkpoint['optimizer'] = optimizer.state_dict() 52 | elif isinstance(optimizer, dict): 53 | checkpoint['optimizer'] = {} 54 | for name, optim in optimizer.items(): 55 | checkpoint['optimizer'][name] = optim.state_dict() 56 | 57 | # # save amp state dict in the checkpoint 58 | # checkpoint['amp'] = apex.amp.state_dict() 59 | 60 | if filename.startswith('pavi://'): 61 | try: 62 | from pavi import modelcloud 63 | from pavi.exception import NodeNotFoundError 64 | except ImportError: 65 | raise ImportError( 66 | 'Please install pavi to load checkpoint from modelcloud.') 67 | model_path = filename[7:] 68 | root = modelcloud.Folder() 69 | model_dir, model_name = osp.split(model_path) 70 | try: 71 | model = modelcloud.get(model_dir) 72 | except NodeNotFoundError: 73 | model = root.create_training_model(model_dir) 74 | with TemporaryDirectory() as tmp_dir: 75 | checkpoint_file = osp.join(tmp_dir, model_name) 76 | with open(checkpoint_file, 'wb') as f: 77 | torch.save(checkpoint, f) 78 | f.flush() 79 | model.create_file(checkpoint_file, name=model_name) 80 | else: 81 | mmcv.mkdir_or_exist(osp.dirname(filename)) 82 | # immediately flush buffer 83 | with open(filename, 'wb') as f: 84 | torch.save(checkpoint, f) 85 | f.flush() 86 | -------------------------------------------------------------------------------- /datasets/functional.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import cv2 3 | import numpy as np 4 | import PIL 5 | import torch 6 | 7 | 8 | def _is_tensor_clip(clip): 9 | return torch.is_tensor(clip) and clip.ndimension() == 4 10 | 11 | 12 | def crop_clip(clip, min_h, min_w, h, w): 13 | if isinstance(clip[0], np.ndarray): 14 | cropped = [img[min_h:min_h + h, min_w:min_w + w, :] for img in clip] 15 | 16 | elif isinstance(clip[0], PIL.Image.Image): 17 | cropped = [ 18 | img.crop((min_w, min_h, min_w + w, min_h + h)) for img in clip 19 | ] 20 | else: 21 | raise TypeError('Expected numpy.ndarray or PIL.Image' + 22 | 'but got list of {0}'.format(type(clip[0]))) 23 | return cropped 24 | 25 | 26 | def resize_clip(clip, size, interpolation='bilinear'): 27 | if isinstance(clip[0], np.ndarray): 28 | if isinstance(size, numbers.Number): 29 | im_h, im_w, im_c = clip[0].shape 30 | # Min spatial dim already matches minimal size 31 | if (im_w <= im_h and im_w == size) or (im_h <= im_w 32 | and im_h == size): 33 | return clip 34 | new_h, new_w = get_resize_sizes(im_h, im_w, size) 35 | size = (new_w, new_h) 36 | else: 37 | size = size[0], size[1] 38 | if interpolation == 'bilinear': 39 | np_inter = cv2.INTER_LINEAR 40 | else: 41 | np_inter = cv2.INTER_NEAREST 42 | scaled = [ 43 | cv2.resize(img, size, interpolation=np_inter) for img in clip 44 | ] 45 | elif isinstance(clip[0], PIL.Image.Image): 46 | if isinstance(size, numbers.Number): 47 | im_w, im_h = clip[0].size 48 | # Min spatial dim already matches minimal size 49 | if (im_w <= im_h and im_w == size) or (im_h <= im_w 50 | and im_h == size): 51 | return clip 52 | new_h, new_w = get_resize_sizes(im_h, im_w, size) 53 | size = (new_w, new_h) 54 | else: 55 | size = size[1], size[0] 56 | if interpolation == 'bilinear': 57 | pil_inter = PIL.Image.BILINEAR 58 | else: 59 | pil_inter = PIL.Image.NEAREST 60 | scaled = [img.resize(size, pil_inter) for img in clip] 61 | else: 62 | raise TypeError('Expected numpy.ndarray or PIL.Image' + 63 | 'but got list of {0}'.format(type(clip[0]))) 64 | return scaled 65 | 66 | 67 | def get_resize_sizes(im_h, im_w, size): 68 | if im_w < im_h: 69 | ow = size 70 | oh = int(size * im_h / im_w) 71 | else: 72 | oh = size 73 | ow = int(size * im_w / im_h) 74 | return oh, ow 75 | 76 | 77 | def normalize(clip, mean, std, inplace=False): 78 | if not _is_tensor_clip(clip): 79 | raise TypeError('tensor is not a torch clip.') 80 | 81 | if not inplace: 82 | clip = clip.clone() 83 | 84 | dtype = clip.dtype 85 | mean = torch.as_tensor(mean, dtype=dtype, device=clip.device) 86 | std = torch.as_tensor(std, dtype=dtype, device=clip.device) 87 | clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None]) 88 | 89 | return clip 90 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/setr_up_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | from mmcv.cnn import ConvModule, build_norm_layer 4 | 5 | from mmseg.ops import Upsample 6 | from ..builder import HEADS 7 | from .decode_head import BaseDecodeHead 8 | 9 | 10 | @HEADS.register_module() 11 | class SETRUPHead(BaseDecodeHead): 12 | """Naive upsampling head and Progressive upsampling head of SETR. 13 | 14 | Naive or PUP head of `SETR `_. 15 | 16 | Args: 17 | norm_layer (dict): Config dict for input normalization. 18 | Default: norm_layer=dict(type='LN', eps=1e-6, requires_grad=True). 19 | num_convs (int): Number of decoder convolutions. Default: 1. 20 | up_scale (int): The scale factor of interpolate. Default:4. 21 | kernel_size (int): The kernel size of convolution when decoding 22 | feature information from backbone. Default: 3. 23 | init_cfg (dict | list[dict] | None): Initialization config dict. 24 | Default: dict( 25 | type='Constant', val=1.0, bias=0, layer='LayerNorm'). 26 | """ 27 | 28 | def __init__(self, 29 | norm_layer=dict(type='LN', eps=1e-6, requires_grad=True), 30 | num_convs=1, 31 | up_scale=4, 32 | kernel_size=3, 33 | init_cfg=[ 34 | dict(type='Constant', val=1.0, bias=0, layer='LayerNorm'), 35 | dict( 36 | type='Normal', 37 | std=0.01, 38 | override=dict(name='conv_seg')) 39 | ], 40 | **kwargs): 41 | 42 | assert kernel_size in [1, 3], 'kernel_size must be 1 or 3.' 43 | 44 | super(SETRUPHead, self).__init__(init_cfg=init_cfg, **kwargs) 45 | 46 | assert isinstance(self.in_channels, int) 47 | 48 | _, self.norm = build_norm_layer(norm_layer, self.in_channels) 49 | 50 | self.up_convs = nn.ModuleList() 51 | in_channels = self.in_channels 52 | out_channels = self.channels 53 | for _ in range(num_convs): 54 | self.up_convs.append( 55 | nn.Sequential( 56 | ConvModule( 57 | in_channels=in_channels, 58 | out_channels=out_channels, 59 | kernel_size=kernel_size, 60 | stride=1, 61 | padding=int(kernel_size - 1) // 2, 62 | norm_cfg=self.norm_cfg, 63 | act_cfg=self.act_cfg), 64 | Upsample( 65 | scale_factor=up_scale, 66 | mode='bilinear', 67 | align_corners=self.align_corners))) 68 | in_channels = out_channels 69 | 70 | def forward(self, x): 71 | x = self._transform_inputs(x) 72 | 73 | n, c, h, w = x.shape 74 | x = x.reshape(n, c, h * w).transpose(2, 1).contiguous() 75 | x = self.norm(x) 76 | x = x.transpose(1, 2).reshape(n, c, h, w).contiguous() 77 | 78 | for up_conv in self.up_convs: 79 | x = up_conv(x) 80 | out = self.cls_seg(x) 81 | return out 82 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | 4 | 5 | def accuracy(pred, target, topk=1, thresh=None): 6 | """Calculate accuracy according to the prediction and target. 7 | 8 | Args: 9 | pred (torch.Tensor): The model prediction, shape (N, num_class, ...) 10 | target (torch.Tensor): The target of each prediction, shape (N, , ...) 11 | topk (int | tuple[int], optional): If the predictions in ``topk`` 12 | matches the target, the predictions will be regarded as 13 | correct ones. Defaults to 1. 14 | thresh (float, optional): If not None, predictions with scores under 15 | this threshold are considered incorrect. Default to None. 16 | 17 | Returns: 18 | float | tuple[float]: If the input ``topk`` is a single integer, 19 | the function will return a single float as accuracy. If 20 | ``topk`` is a tuple containing multiple integers, the 21 | function will return a tuple containing accuracies of 22 | each ``topk`` number. 23 | """ 24 | assert isinstance(topk, (int, tuple)) 25 | if isinstance(topk, int): 26 | topk = (topk, ) 27 | return_single = True 28 | else: 29 | return_single = False 30 | 31 | maxk = max(topk) 32 | if pred.size(0) == 0: 33 | accu = [pred.new_tensor(0.) for i in range(len(topk))] 34 | return accu[0] if return_single else accu 35 | assert pred.ndim == target.ndim + 1 36 | assert pred.size(0) == target.size(0) 37 | assert maxk <= pred.size(1), \ 38 | f'maxk {maxk} exceeds pred dimension {pred.size(1)}' 39 | pred_value, pred_label = pred.topk(maxk, dim=1) 40 | # transpose to shape (maxk, N, ...) 41 | pred_label = pred_label.transpose(0, 1) 42 | correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label)) 43 | if thresh is not None: 44 | # Only prediction values larger than thresh are counted as correct 45 | correct = correct & (pred_value > thresh).t() 46 | res = [] 47 | for k in topk: 48 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 49 | res.append(correct_k.mul_(100.0 / target.numel())) 50 | return res[0] if return_single else res 51 | 52 | 53 | class Accuracy(nn.Module): 54 | """Accuracy calculation module.""" 55 | 56 | def __init__(self, topk=(1, ), thresh=None): 57 | """Module to calculate the accuracy. 58 | 59 | Args: 60 | topk (tuple, optional): The criterion used to calculate the 61 | accuracy. Defaults to (1,). 62 | thresh (float, optional): If not None, predictions with scores 63 | under this threshold are considered incorrect. Default to None. 64 | """ 65 | super().__init__() 66 | self.topk = topk 67 | self.thresh = thresh 68 | 69 | def forward(self, pred, target): 70 | """Forward function to calculate accuracy. 71 | 72 | Args: 73 | pred (torch.Tensor): Prediction of models. 74 | target (torch.Tensor): Target for each prediction. 75 | 76 | Returns: 77 | tuple[float]: The accuracies under different topk criterions. 78 | """ 79 | return accuracy(pred, target, self.topk, self.thresh) 80 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/lraspp_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv import is_tuple_of 5 | from mmcv.cnn import ConvModule 6 | 7 | from mmseg.ops import resize 8 | from ..builder import HEADS 9 | from .decode_head import BaseDecodeHead 10 | 11 | 12 | @HEADS.register_module() 13 | class LRASPPHead(BaseDecodeHead): 14 | """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3. 15 | 16 | This head is the improved implementation of `Searching for MobileNetV3 17 | `_. 18 | 19 | Args: 20 | branch_channels (tuple[int]): The number of output channels in every 21 | each branch. Default: (32, 64). 22 | """ 23 | 24 | def __init__(self, branch_channels=(32, 64), **kwargs): 25 | super(LRASPPHead, self).__init__(**kwargs) 26 | if self.input_transform != 'multiple_select': 27 | raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform ' 28 | f'must be \'multiple_select\'. But received ' 29 | f'\'{self.input_transform}\'') 30 | assert is_tuple_of(branch_channels, int) 31 | assert len(branch_channels) == len(self.in_channels) - 1 32 | self.branch_channels = branch_channels 33 | 34 | self.convs = nn.Sequential() 35 | self.conv_ups = nn.Sequential() 36 | for i in range(len(branch_channels)): 37 | self.convs.add_module( 38 | f'conv{i}', 39 | nn.Conv2d( 40 | self.in_channels[i], branch_channels[i], 1, bias=False)) 41 | self.conv_ups.add_module( 42 | f'conv_up{i}', 43 | ConvModule( 44 | self.channels + branch_channels[i], 45 | self.channels, 46 | 1, 47 | norm_cfg=self.norm_cfg, 48 | act_cfg=self.act_cfg, 49 | bias=False)) 50 | 51 | self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1) 52 | 53 | self.aspp_conv = ConvModule( 54 | self.in_channels[-1], 55 | self.channels, 56 | 1, 57 | norm_cfg=self.norm_cfg, 58 | act_cfg=self.act_cfg, 59 | bias=False) 60 | self.image_pool = nn.Sequential( 61 | nn.AvgPool2d(kernel_size=49, stride=(16, 20)), 62 | ConvModule( 63 | self.in_channels[2], 64 | self.channels, 65 | 1, 66 | act_cfg=dict(type='Sigmoid'), 67 | bias=False)) 68 | 69 | def forward(self, inputs): 70 | """Forward function.""" 71 | inputs = self._transform_inputs(inputs) 72 | 73 | x = inputs[-1] 74 | 75 | x = self.aspp_conv(x) * resize( 76 | self.image_pool(x), 77 | size=x.size()[2:], 78 | mode='bilinear', 79 | align_corners=self.align_corners) 80 | x = self.conv_up_input(x) 81 | 82 | for i in range(len(self.branch_channels) - 1, -1, -1): 83 | x = resize( 84 | x, 85 | size=inputs[i].size()[2:], 86 | mode='bilinear', 87 | align_corners=self.align_corners) 88 | x = torch.cat([x, self.convs[i](inputs[i])], 1) 89 | x = self.conv_ups[i](x) 90 | 91 | return self.cls_seg(x) 92 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/segmentors/cascade_encoder_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from torch import nn 3 | 4 | from mmseg.core import add_prefix 5 | from mmseg.ops import resize 6 | from .. import builder 7 | from ..builder import SEGMENTORS 8 | from .encoder_decoder import EncoderDecoder 9 | 10 | 11 | @SEGMENTORS.register_module() 12 | class CascadeEncoderDecoder(EncoderDecoder): 13 | """Cascade Encoder Decoder segmentors. 14 | 15 | CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of 16 | CascadeEncoderDecoder are cascaded. The output of previous decoder_head 17 | will be the input of next decoder_head. 18 | """ 19 | 20 | def __init__(self, 21 | num_stages, 22 | backbone, 23 | decode_head, 24 | neck=None, 25 | auxiliary_head=None, 26 | train_cfg=None, 27 | test_cfg=None, 28 | pretrained=None, 29 | init_cfg=None): 30 | self.num_stages = num_stages 31 | super(CascadeEncoderDecoder, self).__init__( 32 | backbone=backbone, 33 | decode_head=decode_head, 34 | neck=neck, 35 | auxiliary_head=auxiliary_head, 36 | train_cfg=train_cfg, 37 | test_cfg=test_cfg, 38 | pretrained=pretrained, 39 | init_cfg=init_cfg) 40 | 41 | def _init_decode_head(self, decode_head): 42 | """Initialize ``decode_head``""" 43 | assert isinstance(decode_head, list) 44 | assert len(decode_head) == self.num_stages 45 | self.decode_head = nn.ModuleList() 46 | for i in range(self.num_stages): 47 | self.decode_head.append(builder.build_head(decode_head[i])) 48 | self.align_corners = self.decode_head[-1].align_corners 49 | self.num_classes = self.decode_head[-1].num_classes 50 | 51 | def encode_decode(self, img, img_metas): 52 | """Encode images with backbone and decode into a semantic segmentation 53 | map of the same size as input.""" 54 | x = self.extract_feat(img) 55 | out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg) 56 | for i in range(1, self.num_stages): 57 | out = self.decode_head[i].forward_test(x, out, img_metas, 58 | self.test_cfg) 59 | out = resize( 60 | input=out, 61 | size=img.shape[2:], 62 | mode='bilinear', 63 | align_corners=self.align_corners) 64 | return out 65 | 66 | def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): 67 | """Run forward function and calculate loss for decode head in 68 | training.""" 69 | losses = dict() 70 | 71 | loss_decode = self.decode_head[0].forward_train( 72 | x, img_metas, gt_semantic_seg, self.train_cfg) 73 | 74 | losses.update(add_prefix(loss_decode, 'decode_0')) 75 | 76 | for i in range(1, self.num_stages): 77 | # forward test again, maybe unnecessary for most methods. 78 | prev_outputs = self.decode_head[i - 1].forward_test( 79 | x, img_metas, self.test_cfg) 80 | loss_decode = self.decode_head[i].forward_train( 81 | x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg) 82 | losses.update(add_prefix(loss_decode, f'decode_{i}')) 83 | 84 | return losses 85 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/README.md: -------------------------------------------------------------------------------- 1 | # ADE20k Semantic segmentation with BEiT 2 | 3 | ## Getting started 4 | 5 | 1. Install the [mmsegmentation](https://github.com/open-mmlab/mmsegmentation) library and some required packages. 6 | 7 | ```bash 8 | pip install mmcv-full==1.3.0 mmsegmentation==0.11.0 9 | pip install scipy timm==0.3.2 10 | ``` 11 | 12 | 2. Install [apex](https://github.com/NVIDIA/apex) for mixed-precision training 13 | 14 | ```bash 15 | git clone https://github.com/NVIDIA/apex 16 | cd apex 17 | pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 18 | ``` 19 | 20 | 3. Follow the guide in [mmseg](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#ade20k) to prepare the ADE20k dataset. 21 | 22 | 23 | ## Fine-tuning 24 | 25 | Command format: 26 | ``` 27 | tools/dist_train.sh --work-dir --seed 0 --deterministic --options model.pretrained= 28 | ``` 29 | 30 | For example, using a BEiT-base backbone with UperNet: 31 | ```bash 32 | bash tools/dist_train.sh \ 33 | configs/beit/upernet/upernet_beit_base_12_640_slide_160k_ade20k_pt2ft.py 8 \ 34 | --work-dir /path/to/save --seed 0 --deterministic \ 35 | --options model.pretrained=https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_224_pt22k_ft22k.pth?sv=2021-10-04&st=2023-06-08T11%3A16%3A02Z&se=2033-06-09T11%3A16%3A00Z&sr=c&sp=r&sig=N4pfCVmSeq4L4tS8QbrFVsX6f6q844eft8xSuXdxU48%3D 36 | ``` 37 | 38 | More config files can be found at [`configs/beit/upernet`](configs/beit/upernet). 39 | 40 | 41 | ## Evaluation 42 | 43 | Command format: 44 | ``` 45 | tools/dist_test.sh --eval mIoU 46 | ``` 47 | 48 | For example, evaluate a BEiT-base backbone with UperNet: 49 | ```bash 50 | bash tools/dist_test.sh configs/beit/upernet/upernet_beit_base_12_640_slide_160k_ade20k_pt2ft.py \ 51 | https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_640_pt22k_ft22ktoade20k.pth?sv=2021-10-04&st=2023-06-08T11%3A16%3A02Z&se=2033-06-09T11%3A16%3A00Z&sr=c&sp=r&sig=N4pfCVmSeq4L4tS8QbrFVsX6f6q844eft8xSuXdxU48%3D 4 --eval mIoU 52 | ``` 53 | 54 | Expected results: 55 | ``` 56 | +--------+-------+-------+-------+ 57 | | Scope | mIoU | mAcc | aAcc | 58 | +--------+-------+-------+-------+ 59 | | global | 53.61 | 64.82 | 84.62 | 60 | +--------+-------+-------+-------+ 61 | ``` 62 | 63 | Multi-scale + flip (`\*_ms.py`) 64 | ``` 65 | bash tools/dist_test.sh configs/beit/upernet/upernet_beit_base_12_640_slide_160k_ade20k_ms.py \ 66 | https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_640_pt22k_ft22ktoade20k.pth?sv=2021-10-04&st=2023-06-08T11%3A16%3A02Z&se=2033-06-09T11%3A16%3A00Z&sr=c&sp=r&sig=N4pfCVmSeq4L4tS8QbrFVsX6f6q844eft8xSuXdxU48%3D 4 --eval mIoU 67 | ``` 68 | 69 | Expected results: 70 | ``` 71 | +--------+-------+-------+------+ 72 | | Scope | mIoU | mAcc | aAcc | 73 | +--------+-------+-------+------+ 74 | | global | 54.26 | 65.28 | 84.9 | 75 | +--------+-------+-------+------+ 76 | ``` 77 | 78 | --- 79 | 80 | ## Acknowledgment 81 | 82 | This code is built using the [mmsegmentation](https://github.com/open-mmlab/mmsegmentation) library, [Timm](https://github.com/rwightman/pytorch-image-models) library, the [Swin](https://github.com/microsoft/Swin-Transformer) repository, [XCiT](https://github.com/facebookresearch/xcit) and the [SETR](https://github.com/fudan-zvg/SETR) repository. 83 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/datasets/loveda.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | import mmcv 5 | import numpy as np 6 | from PIL import Image 7 | 8 | from .builder import DATASETS 9 | from .custom import CustomDataset 10 | 11 | 12 | @DATASETS.register_module() 13 | class LoveDADataset(CustomDataset): 14 | """LoveDA dataset. 15 | 16 | In segmentation map annotation for LoveDA, 0 is the ignore index. 17 | ``reduce_zero_label`` should be set to True. The ``img_suffix`` and 18 | ``seg_map_suffix`` are both fixed to '.png'. 19 | """ 20 | CLASSES = ('background', 'building', 'road', 'water', 'barren', 'forest', 21 | 'agricultural') 22 | 23 | PALETTE = [[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255], 24 | [159, 129, 183], [0, 255, 0], [255, 195, 128]] 25 | 26 | def __init__(self, **kwargs): 27 | super(LoveDADataset, self).__init__( 28 | img_suffix='.png', 29 | seg_map_suffix='.png', 30 | reduce_zero_label=True, 31 | **kwargs) 32 | 33 | def results2img(self, results, imgfile_prefix, indices=None): 34 | """Write the segmentation results to images. 35 | 36 | Args: 37 | results (list[ndarray]): Testing results of the 38 | dataset. 39 | imgfile_prefix (str): The filename prefix of the png files. 40 | If the prefix is "somepath/xxx", 41 | the png files will be named "somepath/xxx.png". 42 | indices (list[int], optional): Indices of input results, if not 43 | set, all the indices of the dataset will be used. 44 | Default: None. 45 | 46 | Returns: 47 | list[str: str]: result txt files which contains corresponding 48 | semantic segmentation images. 49 | """ 50 | 51 | mmcv.mkdir_or_exist(imgfile_prefix) 52 | result_files = [] 53 | for result, idx in zip(results, indices): 54 | 55 | filename = self.img_infos[idx]['filename'] 56 | basename = osp.splitext(osp.basename(filename))[0] 57 | 58 | png_filename = osp.join(imgfile_prefix, f'{basename}.png') 59 | 60 | # The index range of official requirement is from 0 to 6. 61 | output = Image.fromarray(result.astype(np.uint8)) 62 | output.save(png_filename) 63 | result_files.append(png_filename) 64 | 65 | return result_files 66 | 67 | def format_results(self, results, imgfile_prefix, indices=None): 68 | """Format the results into dir (standard format for LoveDA evaluation). 69 | 70 | Args: 71 | results (list): Testing results of the dataset. 72 | imgfile_prefix (str): The prefix of images files. It 73 | includes the file path and the prefix of filename, e.g., 74 | "a/b/prefix". 75 | indices (list[int], optional): Indices of input results, 76 | if not set, all the indices of the dataset will be used. 77 | Default: None. 78 | 79 | Returns: 80 | tuple: (result_files, tmp_dir), result_files is a list containing 81 | the image paths, tmp_dir is the temporal directory created 82 | for saving json/png files when img_prefix is not specified. 83 | """ 84 | if indices is None: 85 | indices = list(range(len(self))) 86 | 87 | assert isinstance(results, list), 'results must be a list.' 88 | assert isinstance(indices, list), 'indices must be a list.' 89 | 90 | result_files = self.results2img(results, imgfile_prefix, indices) 91 | 92 | return result_files 93 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/utils/res_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import build_conv_layer, build_norm_layer 3 | from mmcv.runner import Sequential 4 | from torch import nn as nn 5 | 6 | 7 | class ResLayer(Sequential): 8 | """ResLayer to build ResNet style backbone. 9 | 10 | Args: 11 | block (nn.Module): block used to build ResLayer. 12 | inplanes (int): inplanes of block. 13 | planes (int): planes of block. 14 | num_blocks (int): number of blocks. 15 | stride (int): stride of the first block. Default: 1 16 | avg_down (bool): Use AvgPool instead of stride conv when 17 | downsampling in the bottleneck. Default: False 18 | conv_cfg (dict): dictionary to construct and config conv layer. 19 | Default: None 20 | norm_cfg (dict): dictionary to construct and config norm layer. 21 | Default: dict(type='BN') 22 | multi_grid (int | None): Multi grid dilation rates of last 23 | stage. Default: None 24 | contract_dilation (bool): Whether contract first dilation of each layer 25 | Default: False 26 | """ 27 | 28 | def __init__(self, 29 | block, 30 | inplanes, 31 | planes, 32 | num_blocks, 33 | stride=1, 34 | dilation=1, 35 | avg_down=False, 36 | conv_cfg=None, 37 | norm_cfg=dict(type='BN'), 38 | multi_grid=None, 39 | contract_dilation=False, 40 | **kwargs): 41 | self.block = block 42 | 43 | downsample = None 44 | if stride != 1 or inplanes != planes * block.expansion: 45 | downsample = [] 46 | conv_stride = stride 47 | if avg_down: 48 | conv_stride = 1 49 | downsample.append( 50 | nn.AvgPool2d( 51 | kernel_size=stride, 52 | stride=stride, 53 | ceil_mode=True, 54 | count_include_pad=False)) 55 | downsample.extend([ 56 | build_conv_layer( 57 | conv_cfg, 58 | inplanes, 59 | planes * block.expansion, 60 | kernel_size=1, 61 | stride=conv_stride, 62 | bias=False), 63 | build_norm_layer(norm_cfg, planes * block.expansion)[1] 64 | ]) 65 | downsample = nn.Sequential(*downsample) 66 | 67 | layers = [] 68 | if multi_grid is None: 69 | if dilation > 1 and contract_dilation: 70 | first_dilation = dilation // 2 71 | else: 72 | first_dilation = dilation 73 | else: 74 | first_dilation = multi_grid[0] 75 | layers.append( 76 | block( 77 | inplanes=inplanes, 78 | planes=planes, 79 | stride=stride, 80 | dilation=first_dilation, 81 | downsample=downsample, 82 | conv_cfg=conv_cfg, 83 | norm_cfg=norm_cfg, 84 | **kwargs)) 85 | inplanes = planes * block.expansion 86 | for i in range(1, num_blocks): 87 | layers.append( 88 | block( 89 | inplanes=inplanes, 90 | planes=planes, 91 | stride=1, 92 | dilation=dilation if multi_grid is None else multi_grid[i], 93 | conv_cfg=conv_cfg, 94 | norm_cfg=norm_cfg, 95 | **kwargs)) 96 | super(ResLayer, self).__init__(*layers) 97 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/core/seg/sampler/ohem_pixel_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from ..builder import PIXEL_SAMPLERS 7 | from .base_pixel_sampler import BasePixelSampler 8 | 9 | 10 | @PIXEL_SAMPLERS.register_module() 11 | class OHEMPixelSampler(BasePixelSampler): 12 | """Online Hard Example Mining Sampler for segmentation. 13 | 14 | Args: 15 | context (nn.Module): The context of sampler, subclass of 16 | :obj:`BaseDecodeHead`. 17 | thresh (float, optional): The threshold for hard example selection. 18 | Below which, are prediction with low confidence. If not 19 | specified, the hard examples will be pixels of top ``min_kept`` 20 | loss. Default: None. 21 | min_kept (int, optional): The minimum number of predictions to keep. 22 | Default: 100000. 23 | """ 24 | 25 | def __init__(self, context, thresh=None, min_kept=100000): 26 | super(OHEMPixelSampler, self).__init__() 27 | self.context = context 28 | assert min_kept > 1 29 | self.thresh = thresh 30 | self.min_kept = min_kept 31 | 32 | def sample(self, seg_logit, seg_label): 33 | """Sample pixels that have high loss or with low prediction confidence. 34 | 35 | Args: 36 | seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W) 37 | seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W) 38 | 39 | Returns: 40 | torch.Tensor: segmentation weight, shape (N, H, W) 41 | """ 42 | with torch.no_grad(): 43 | assert seg_logit.shape[2:] == seg_label.shape[2:] 44 | assert seg_label.shape[1] == 1 45 | seg_label = seg_label.squeeze(1).long() 46 | batch_kept = self.min_kept * seg_label.size(0) 47 | valid_mask = seg_label != self.context.ignore_index 48 | seg_weight = seg_logit.new_zeros(size=seg_label.size()) 49 | valid_seg_weight = seg_weight[valid_mask] 50 | if self.thresh is not None: 51 | seg_prob = F.softmax(seg_logit, dim=1) 52 | 53 | tmp_seg_label = seg_label.clone().unsqueeze(1) 54 | tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 55 | seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1) 56 | sort_prob, sort_indices = seg_prob[valid_mask].sort() 57 | 58 | if sort_prob.numel() > 0: 59 | min_threshold = sort_prob[min(batch_kept, 60 | sort_prob.numel() - 1)] 61 | else: 62 | min_threshold = 0.0 63 | threshold = max(min_threshold, self.thresh) 64 | valid_seg_weight[seg_prob[valid_mask] < threshold] = 1. 65 | else: 66 | if not isinstance(self.context.loss_decode, nn.ModuleList): 67 | losses_decode = [self.context.loss_decode] 68 | else: 69 | losses_decode = self.context.loss_decode 70 | losses = 0.0 71 | for loss_module in losses_decode: 72 | losses += loss_module( 73 | seg_logit, 74 | seg_label, 75 | weight=None, 76 | ignore_index=self.context.ignore_index, 77 | reduction_override='none') 78 | 79 | # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa 80 | _, sort_indices = losses[valid_mask].sort(descending=True) 81 | valid_seg_weight[sort_indices[:batch_kept]] = 1. 82 | 83 | seg_weight[valid_mask] = valid_seg_weight 84 | 85 | return seg_weight 86 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/psp_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | 6 | from mmseg.ops import resize 7 | from ..builder import HEADS 8 | from .decode_head import BaseDecodeHead 9 | 10 | 11 | class PPM(nn.ModuleList): 12 | """Pooling Pyramid Module used in PSPNet. 13 | 14 | Args: 15 | pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid 16 | Module. 17 | in_channels (int): Input channels. 18 | channels (int): Channels after modules, before conv_seg. 19 | conv_cfg (dict|None): Config of conv layers. 20 | norm_cfg (dict|None): Config of norm layers. 21 | act_cfg (dict): Config of activation layers. 22 | align_corners (bool): align_corners argument of F.interpolate. 23 | """ 24 | 25 | def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, 26 | act_cfg, align_corners, **kwargs): 27 | super(PPM, self).__init__() 28 | self.pool_scales = pool_scales 29 | self.align_corners = align_corners 30 | self.in_channels = in_channels 31 | self.channels = channels 32 | self.conv_cfg = conv_cfg 33 | self.norm_cfg = norm_cfg 34 | self.act_cfg = act_cfg 35 | for pool_scale in pool_scales: 36 | self.append( 37 | nn.Sequential( 38 | nn.AdaptiveAvgPool2d(pool_scale), 39 | ConvModule( 40 | self.in_channels, 41 | self.channels, 42 | 1, 43 | conv_cfg=self.conv_cfg, 44 | norm_cfg=self.norm_cfg, 45 | act_cfg=self.act_cfg, 46 | **kwargs))) 47 | 48 | def forward(self, x): 49 | """Forward function.""" 50 | ppm_outs = [] 51 | for ppm in self: 52 | ppm_out = ppm(x) 53 | upsampled_ppm_out = resize( 54 | ppm_out, 55 | size=x.size()[2:], 56 | mode='bilinear', 57 | align_corners=self.align_corners) 58 | ppm_outs.append(upsampled_ppm_out) 59 | return ppm_outs 60 | 61 | 62 | @HEADS.register_module() 63 | class PSPHead(BaseDecodeHead): 64 | """Pyramid Scene Parsing Network. 65 | 66 | This head is the implementation of 67 | `PSPNet `_. 68 | 69 | Args: 70 | pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid 71 | Module. Default: (1, 2, 3, 6). 72 | """ 73 | 74 | def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): 75 | super(PSPHead, self).__init__(**kwargs) 76 | assert isinstance(pool_scales, (list, tuple)) 77 | self.pool_scales = pool_scales 78 | self.psp_modules = PPM( 79 | self.pool_scales, 80 | self.in_channels, 81 | self.channels, 82 | conv_cfg=self.conv_cfg, 83 | norm_cfg=self.norm_cfg, 84 | act_cfg=self.act_cfg, 85 | align_corners=self.align_corners) 86 | self.bottleneck = ConvModule( 87 | self.in_channels + len(pool_scales) * self.channels, 88 | self.channels, 89 | 3, 90 | padding=1, 91 | conv_cfg=self.conv_cfg, 92 | norm_cfg=self.norm_cfg, 93 | act_cfg=self.act_cfg) 94 | 95 | def forward(self, inputs): 96 | """Forward function.""" 97 | x = self._transform_inputs(inputs) 98 | psp_outs = [x] 99 | psp_outs.extend(self.psp_modules(x)) 100 | psp_outs = torch.cat(psp_outs, dim=1) 101 | output = self.bottleneck(psp_outs) 102 | output = self.cls_seg(output) 103 | return output 104 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmcv_custom/layer_decay_optimizer_constructor.py: -------------------------------------------------------------------------------- 1 | import json 2 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor 3 | from mmcv.runner import get_dist_info 4 | 5 | 6 | def get_num_layer_for_vit(var_name, num_max_layer): 7 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"): 8 | return 0 9 | elif var_name.startswith("backbone.patch_embed"): 10 | return 0 11 | elif var_name.startswith("backbone.blocks"): 12 | layer_id = int(var_name.split('.')[2]) 13 | return layer_id + 1 14 | else: 15 | return num_max_layer - 1 16 | 17 | 18 | @OPTIMIZER_BUILDERS.register_module() 19 | class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor): 20 | def add_params(self, params, module, prefix='', is_dcn_module=None): 21 | """Add all parameters of module to the params list. 22 | The parameters of the given module will be added to the list of param 23 | groups, with specific rules defined by paramwise_cfg. 24 | Args: 25 | params (list[dict]): A list of param groups, it will be modified 26 | in place. 27 | module (nn.Module): The module to be added. 28 | prefix (str): The prefix of the module 29 | is_dcn_module (int|float|None): If the current module is a 30 | submodule of DCN, `is_dcn_module` will be passed to 31 | control conv_offset layer's learning rate. Defaults to None. 32 | """ 33 | parameter_groups = {} 34 | print(self.paramwise_cfg) 35 | num_layers = self.paramwise_cfg.get('num_layers') + 2 36 | layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate') 37 | print("Build LayerDecayOptimizerConstructor %f - %d" % (layer_decay_rate, num_layers)) 38 | weight_decay = self.base_wd 39 | 40 | for name, param in module.named_parameters(): 41 | if not param.requires_grad: 42 | continue # frozen weights 43 | if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'): 44 | group_name = "no_decay" 45 | this_weight_decay = 0. 46 | else: 47 | group_name = "decay" 48 | this_weight_decay = weight_decay 49 | 50 | layer_id = get_num_layer_for_vit(name, num_layers) 51 | group_name = "layer_%d_%s" % (layer_id, group_name) 52 | 53 | if group_name not in parameter_groups: 54 | scale = layer_decay_rate ** (num_layers - layer_id - 1) 55 | 56 | parameter_groups[group_name] = { 57 | "weight_decay": this_weight_decay, 58 | "params": [], 59 | "param_names": [], 60 | "lr_scale": scale, 61 | "group_name": group_name, 62 | "lr": scale * self.base_lr, 63 | } 64 | 65 | parameter_groups[group_name]["params"].append(param) 66 | parameter_groups[group_name]["param_names"].append(name) 67 | rank, _ = get_dist_info() 68 | if rank == 0: 69 | to_display = {} 70 | for key in parameter_groups: 71 | to_display[key] = { 72 | "param_names": parameter_groups[key]["param_names"], 73 | "lr_scale": parameter_groups[key]["lr_scale"], 74 | "lr": parameter_groups[key]["lr"], 75 | "weight_decay": parameter_groups[key]["weight_decay"], 76 | } 77 | print("Param groups = %s" % json.dumps(to_display, indent=2)) 78 | 79 | # state_dict = module.state_dict() 80 | # for group_name in parameter_groups: 81 | # group = parameter_groups[group_name] 82 | # for name in group["param_names"]: 83 | # group["params"].append(state_dict[name]) 84 | params.extend(parameter_groups.values()) 85 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/aspp_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | 6 | from mmseg.ops import resize 7 | from ..builder import HEADS 8 | from .decode_head import BaseDecodeHead 9 | 10 | 11 | class ASPPModule(nn.ModuleList): 12 | """Atrous Spatial Pyramid Pooling (ASPP) Module. 13 | 14 | Args: 15 | dilations (tuple[int]): Dilation rate of each layer. 16 | in_channels (int): Input channels. 17 | channels (int): Channels after modules, before conv_seg. 18 | conv_cfg (dict|None): Config of conv layers. 19 | norm_cfg (dict|None): Config of norm layers. 20 | act_cfg (dict): Config of activation layers. 21 | """ 22 | 23 | def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, 24 | act_cfg): 25 | super(ASPPModule, self).__init__() 26 | self.dilations = dilations 27 | self.in_channels = in_channels 28 | self.channels = channels 29 | self.conv_cfg = conv_cfg 30 | self.norm_cfg = norm_cfg 31 | self.act_cfg = act_cfg 32 | for dilation in dilations: 33 | self.append( 34 | ConvModule( 35 | self.in_channels, 36 | self.channels, 37 | 1 if dilation == 1 else 3, 38 | dilation=dilation, 39 | padding=0 if dilation == 1 else dilation, 40 | conv_cfg=self.conv_cfg, 41 | norm_cfg=self.norm_cfg, 42 | act_cfg=self.act_cfg)) 43 | 44 | def forward(self, x): 45 | """Forward function.""" 46 | aspp_outs = [] 47 | for aspp_module in self: 48 | aspp_outs.append(aspp_module(x)) 49 | 50 | return aspp_outs 51 | 52 | 53 | @HEADS.register_module() 54 | class ASPPHead(BaseDecodeHead): 55 | """Rethinking Atrous Convolution for Semantic Image Segmentation. 56 | 57 | This head is the implementation of `DeepLabV3 58 | `_. 59 | 60 | Args: 61 | dilations (tuple[int]): Dilation rates for ASPP module. 62 | Default: (1, 6, 12, 18). 63 | """ 64 | 65 | def __init__(self, dilations=(1, 6, 12, 18), **kwargs): 66 | super(ASPPHead, self).__init__(**kwargs) 67 | assert isinstance(dilations, (list, tuple)) 68 | self.dilations = dilations 69 | self.image_pool = nn.Sequential( 70 | nn.AdaptiveAvgPool2d(1), 71 | ConvModule( 72 | self.in_channels, 73 | self.channels, 74 | 1, 75 | conv_cfg=self.conv_cfg, 76 | norm_cfg=self.norm_cfg, 77 | act_cfg=self.act_cfg)) 78 | self.aspp_modules = ASPPModule( 79 | dilations, 80 | self.in_channels, 81 | self.channels, 82 | conv_cfg=self.conv_cfg, 83 | norm_cfg=self.norm_cfg, 84 | act_cfg=self.act_cfg) 85 | self.bottleneck = ConvModule( 86 | (len(dilations) + 1) * self.channels, 87 | self.channels, 88 | 3, 89 | padding=1, 90 | conv_cfg=self.conv_cfg, 91 | norm_cfg=self.norm_cfg, 92 | act_cfg=self.act_cfg) 93 | 94 | def forward(self, inputs): 95 | """Forward function.""" 96 | x = self._transform_inputs(inputs) 97 | aspp_outs = [ 98 | resize( 99 | self.image_pool(x), 100 | size=x.size()[2:], 101 | mode='bilinear', 102 | align_corners=self.align_corners) 103 | ] 104 | aspp_outs.extend(self.aspp_modules(x)) 105 | aspp_outs = torch.cat(aspp_outs, dim=1) 106 | output = self.bottleneck(aspp_outs) 107 | output = self.cls_seg(output) 108 | return output 109 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/sep_aspp_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule 5 | 6 | from mmseg.ops import resize 7 | from ..builder import HEADS 8 | from .aspp_head import ASPPHead, ASPPModule 9 | 10 | 11 | class DepthwiseSeparableASPPModule(ASPPModule): 12 | """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable 13 | conv.""" 14 | 15 | def __init__(self, **kwargs): 16 | super(DepthwiseSeparableASPPModule, self).__init__(**kwargs) 17 | for i, dilation in enumerate(self.dilations): 18 | if dilation > 1: 19 | self[i] = DepthwiseSeparableConvModule( 20 | self.in_channels, 21 | self.channels, 22 | 3, 23 | dilation=dilation, 24 | padding=dilation, 25 | norm_cfg=self.norm_cfg, 26 | act_cfg=self.act_cfg) 27 | 28 | 29 | @HEADS.register_module() 30 | class DepthwiseSeparableASPPHead(ASPPHead): 31 | """Encoder-Decoder with Atrous Separable Convolution for Semantic Image 32 | Segmentation. 33 | 34 | This head is the implementation of `DeepLabV3+ 35 | `_. 36 | 37 | Args: 38 | c1_in_channels (int): The input channels of c1 decoder. If is 0, 39 | the no decoder will be used. 40 | c1_channels (int): The intermediate channels of c1 decoder. 41 | """ 42 | 43 | def __init__(self, c1_in_channels, c1_channels, **kwargs): 44 | super(DepthwiseSeparableASPPHead, self).__init__(**kwargs) 45 | assert c1_in_channels >= 0 46 | self.aspp_modules = DepthwiseSeparableASPPModule( 47 | dilations=self.dilations, 48 | in_channels=self.in_channels, 49 | channels=self.channels, 50 | conv_cfg=self.conv_cfg, 51 | norm_cfg=self.norm_cfg, 52 | act_cfg=self.act_cfg) 53 | if c1_in_channels > 0: 54 | self.c1_bottleneck = ConvModule( 55 | c1_in_channels, 56 | c1_channels, 57 | 1, 58 | conv_cfg=self.conv_cfg, 59 | norm_cfg=self.norm_cfg, 60 | act_cfg=self.act_cfg) 61 | else: 62 | self.c1_bottleneck = None 63 | self.sep_bottleneck = nn.Sequential( 64 | DepthwiseSeparableConvModule( 65 | self.channels + c1_channels, 66 | self.channels, 67 | 3, 68 | padding=1, 69 | norm_cfg=self.norm_cfg, 70 | act_cfg=self.act_cfg), 71 | DepthwiseSeparableConvModule( 72 | self.channels, 73 | self.channels, 74 | 3, 75 | padding=1, 76 | norm_cfg=self.norm_cfg, 77 | act_cfg=self.act_cfg)) 78 | 79 | def forward(self, inputs): 80 | """Forward function.""" 81 | x = self._transform_inputs(inputs) 82 | aspp_outs = [ 83 | resize( 84 | self.image_pool(x), 85 | size=x.size()[2:], 86 | mode='bilinear', 87 | align_corners=self.align_corners) 88 | ] 89 | aspp_outs.extend(self.aspp_modules(x)) 90 | aspp_outs = torch.cat(aspp_outs, dim=1) 91 | output = self.bottleneck(aspp_outs) 92 | if self.c1_bottleneck is not None: 93 | c1_output = self.c1_bottleneck(inputs[0]) 94 | output = resize( 95 | input=output, 96 | size=c1_output.shape[2:], 97 | mode='bilinear', 98 | align_corners=self.align_corners) 99 | output = torch.cat([output, c1_output], dim=1) 100 | output = self.sep_bottleneck(output) 101 | output = self.cls_seg(output) 102 | return output 103 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/stdc_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | from ..builder import HEADS 6 | from .fcn_head import FCNHead 7 | 8 | 9 | @HEADS.register_module() 10 | class STDCHead(FCNHead): 11 | """This head is the implementation of `Rethinking BiSeNet For Real-time 12 | Semantic Segmentation `_. 13 | 14 | Args: 15 | boundary_threshold (float): The threshold of calculating boundary. 16 | Default: 0.1. 17 | """ 18 | 19 | def __init__(self, boundary_threshold=0.1, **kwargs): 20 | super(STDCHead, self).__init__(**kwargs) 21 | self.boundary_threshold = boundary_threshold 22 | # Using register buffer to make laplacian kernel on the same 23 | # device of `seg_label`. 24 | self.register_buffer( 25 | 'laplacian_kernel', 26 | torch.tensor([-1, -1, -1, -1, 8, -1, -1, -1, -1], 27 | dtype=torch.float32, 28 | requires_grad=False).reshape((1, 1, 3, 3))) 29 | self.fusion_kernel = torch.nn.Parameter( 30 | torch.tensor([[6. / 10], [3. / 10], [1. / 10]], 31 | dtype=torch.float32).reshape(1, 3, 1, 1), 32 | requires_grad=False) 33 | 34 | def losses(self, seg_logit, seg_label): 35 | """Compute Detail Aggregation Loss.""" 36 | # Note: The paper claims `fusion_kernel` is a trainable 1x1 conv 37 | # parameters. However, it is a constant in original repo and other 38 | # codebase because it would not be added into computation graph 39 | # after threshold operation. 40 | seg_label = seg_label.float() 41 | boundary_targets = F.conv2d( 42 | seg_label, self.laplacian_kernel, padding=1) 43 | boundary_targets = boundary_targets.clamp(min=0) 44 | boundary_targets[boundary_targets > self.boundary_threshold] = 1 45 | boundary_targets[boundary_targets <= self.boundary_threshold] = 0 46 | 47 | boundary_targets_x2 = F.conv2d( 48 | seg_label, self.laplacian_kernel, stride=2, padding=1) 49 | boundary_targets_x2 = boundary_targets_x2.clamp(min=0) 50 | 51 | boundary_targets_x4 = F.conv2d( 52 | seg_label, self.laplacian_kernel, stride=4, padding=1) 53 | boundary_targets_x4 = boundary_targets_x4.clamp(min=0) 54 | 55 | boundary_targets_x4_up = F.interpolate( 56 | boundary_targets_x4, boundary_targets.shape[2:], mode='nearest') 57 | boundary_targets_x2_up = F.interpolate( 58 | boundary_targets_x2, boundary_targets.shape[2:], mode='nearest') 59 | 60 | boundary_targets_x2_up[ 61 | boundary_targets_x2_up > self.boundary_threshold] = 1 62 | boundary_targets_x2_up[ 63 | boundary_targets_x2_up <= self.boundary_threshold] = 0 64 | 65 | boundary_targets_x4_up[ 66 | boundary_targets_x4_up > self.boundary_threshold] = 1 67 | boundary_targets_x4_up[ 68 | boundary_targets_x4_up <= self.boundary_threshold] = 0 69 | 70 | boudary_targets_pyramids = torch.stack( 71 | (boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up), 72 | dim=1) 73 | 74 | boudary_targets_pyramids = boudary_targets_pyramids.squeeze(2) 75 | boudary_targets_pyramid = F.conv2d(boudary_targets_pyramids, 76 | self.fusion_kernel) 77 | 78 | boudary_targets_pyramid[ 79 | boudary_targets_pyramid > self.boundary_threshold] = 1 80 | boudary_targets_pyramid[ 81 | boudary_targets_pyramid <= self.boundary_threshold] = 0 82 | 83 | seg_logit = F.interpolate( 84 | seg_logit, 85 | boundary_targets.shape[2:], 86 | mode='bilinear', 87 | align_corners=True) 88 | loss = super(STDCHead, self).losses(seg_logit, 89 | boudary_targets_pyramid.long()) 90 | return loss 91 | -------------------------------------------------------------------------------- /datasets/food101.py: -------------------------------------------------------------------------------- 1 | # copy-paste from https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py 2 | import json 3 | from pathlib import Path 4 | from typing import Any, Tuple, Callable, Optional 5 | 6 | import PIL.Image 7 | 8 | from torchvision.datasets.utils import verify_str_arg, download_and_extract_archive 9 | from torchvision.datasets.vision import VisionDataset 10 | 11 | 12 | class Food101(VisionDataset): 13 | """`The Food-101 Data Set `_. 14 | 15 | The Food-101 is a challenging data set of 101 food categories, with 101'000 images. 16 | For each class, 250 manually reviewed test images are provided as well as 750 training images. 17 | On purpose, the training images were not cleaned, and thus still contain some amount of noise. 18 | This comes mostly in the form of intense colors and sometimes wrong labels. All images were 19 | rescaled to have a maximum side length of 512 pixels. 20 | 21 | 22 | Args: 23 | root (string): Root directory of the dataset. 24 | split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``. 25 | transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed 26 | version. E.g, ``transforms.RandomCrop``. 27 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 28 | download (bool, optional): If True, downloads the dataset from the internet and 29 | puts it in root directory. If dataset is already downloaded, it is not 30 | downloaded again. Default is False. 31 | """ 32 | 33 | _URL = "http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz" 34 | _MD5 = "85eeb15f3717b99a5da872d97d918f87" 35 | 36 | def __init__( 37 | self, 38 | root: str, 39 | split: str = "train", 40 | transform: Optional[Callable] = None, 41 | target_transform: Optional[Callable] = None, 42 | download: bool = False, 43 | ) -> None: 44 | super().__init__(root, transform=transform, target_transform=target_transform) 45 | self._split = verify_str_arg(split, "split", ("train", "test")) 46 | self._base_folder = Path(self.root) / "food-101" 47 | self._meta_folder = self._base_folder / "meta" 48 | self._images_folder = self._base_folder / "images" 49 | 50 | if download: 51 | self._download() 52 | 53 | if not self._check_exists(): 54 | raise RuntimeError("Dataset not found. You can use download=True to download it") 55 | 56 | self._labels = [] 57 | self._image_files = [] 58 | with open(self._meta_folder / f"{split}.json") as f: 59 | metadata = json.loads(f.read()) 60 | 61 | self.classes = sorted(metadata.keys()) 62 | self.class_to_idx = dict(zip(self.classes, range(len(self.classes)))) 63 | 64 | for class_label, im_rel_paths in metadata.items(): 65 | self._labels += [self.class_to_idx[class_label]] * len(im_rel_paths) 66 | self._image_files += [ 67 | self._images_folder.joinpath(*f"{im_rel_path}.jpg".split("/")) for im_rel_path in im_rel_paths 68 | ] 69 | 70 | def __len__(self) -> int: 71 | return len(self._image_files) 72 | 73 | def __getitem__(self, idx) -> Tuple[Any, Any]: 74 | image_file, label = self._image_files[idx], self._labels[idx] 75 | image = PIL.Image.open(image_file).convert("RGB") 76 | 77 | if self.transform: 78 | image = self.transform(image) 79 | 80 | if self.target_transform: 81 | label = self.target_transform(label) 82 | 83 | return image, label 84 | 85 | def extra_repr(self) -> str: 86 | return f"split={self._split}" 87 | 88 | def _check_exists(self) -> bool: 89 | return all(folder.exists() and folder.is_dir() for folder in (self._meta_folder, self._images_folder)) 90 | 91 | def _download(self) -> None: 92 | if self._check_exists(): 93 | return 94 | download_and_extract_archive(self._URL, download_root=self.root, md5=self._MD5) 95 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import functools 3 | 4 | import mmcv 5 | import numpy as np 6 | import torch.nn.functional as F 7 | 8 | 9 | def get_class_weight(class_weight): 10 | """Get class weight for loss function. 11 | 12 | Args: 13 | class_weight (list[float] | str | None): If class_weight is a str, 14 | take it as a file name and read from it. 15 | """ 16 | if isinstance(class_weight, str): 17 | # take it as a file path 18 | if class_weight.endswith('.npy'): 19 | class_weight = np.load(class_weight) 20 | else: 21 | # pkl, json or yaml 22 | class_weight = mmcv.load(class_weight) 23 | 24 | return class_weight 25 | 26 | 27 | def reduce_loss(loss, reduction): 28 | """Reduce loss as specified. 29 | 30 | Args: 31 | loss (Tensor): Elementwise loss tensor. 32 | reduction (str): Options are "none", "mean" and "sum". 33 | 34 | Return: 35 | Tensor: Reduced loss tensor. 36 | """ 37 | reduction_enum = F._Reduction.get_enum(reduction) 38 | # none: 0, elementwise_mean:1, sum: 2 39 | if reduction_enum == 0: 40 | return loss 41 | elif reduction_enum == 1: 42 | return loss.mean() 43 | elif reduction_enum == 2: 44 | return loss.sum() 45 | 46 | 47 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 48 | """Apply element-wise weight and reduce loss. 49 | 50 | Args: 51 | loss (Tensor): Element-wise loss. 52 | weight (Tensor): Element-wise weights. 53 | reduction (str): Same as built-in losses of PyTorch. 54 | avg_factor (float): Average factor when computing the mean of losses. 55 | 56 | Returns: 57 | Tensor: Processed loss values. 58 | """ 59 | # if weight is specified, apply element-wise weight 60 | if weight is not None: 61 | assert weight.dim() == loss.dim() 62 | if weight.dim() > 1: 63 | assert weight.size(1) == 1 or weight.size(1) == loss.size(1) 64 | loss = loss * weight 65 | 66 | # if avg_factor is not specified, just reduce the loss 67 | if avg_factor is None: 68 | loss = reduce_loss(loss, reduction) 69 | else: 70 | # if reduction is mean, then average the loss by avg_factor 71 | if reduction == 'mean': 72 | loss = loss.sum() / avg_factor 73 | # if reduction is 'none', then do nothing, otherwise raise an error 74 | elif reduction != 'none': 75 | raise ValueError('avg_factor can not be used with reduction="sum"') 76 | return loss 77 | 78 | 79 | def weighted_loss(loss_func): 80 | """Create a weighted version of a given loss function. 81 | 82 | To use this decorator, the loss function must have the signature like 83 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 84 | element-wise loss without any reduction. This decorator will add weight 85 | and reduction arguments to the function. The decorated function will have 86 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 87 | avg_factor=None, **kwargs)`. 88 | 89 | :Example: 90 | 91 | >>> import torch 92 | >>> @weighted_loss 93 | >>> def l1_loss(pred, target): 94 | >>> return (pred - target).abs() 95 | 96 | >>> pred = torch.Tensor([0, 2, 3]) 97 | >>> target = torch.Tensor([1, 1, 1]) 98 | >>> weight = torch.Tensor([1, 0, 1]) 99 | 100 | >>> l1_loss(pred, target) 101 | tensor(1.3333) 102 | >>> l1_loss(pred, target, weight) 103 | tensor(1.) 104 | >>> l1_loss(pred, target, reduction='none') 105 | tensor([1., 1., 2.]) 106 | >>> l1_loss(pred, target, weight, avg_factor=2) 107 | tensor(1.5000) 108 | """ 109 | 110 | @functools.wraps(loss_func) 111 | def wrapper(pred, 112 | target, 113 | weight=None, 114 | reduction='mean', 115 | avg_factor=None, 116 | **kwargs): 117 | # get element-wise loss 118 | loss = loss_func(pred, target, **kwargs) 119 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 120 | return loss 121 | 122 | return wrapper 123 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmcv_custom/apex_runner/apex_iter_based_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import platform 4 | import shutil 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.runner import RUNNERS, IterBasedRunner 11 | from .checkpoint import save_checkpoint 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | @RUNNERS.register_module() 20 | class IterBasedRunnerAmp(IterBasedRunner): 21 | """Iteration-based Runner with AMP support. 22 | 23 | This runner train models iteration by iteration. 24 | """ 25 | 26 | def save_checkpoint(self, 27 | out_dir, 28 | filename_tmpl='iter_{}.pth', 29 | meta=None, 30 | save_optimizer=True, 31 | create_symlink=False): 32 | """Save checkpoint to file. 33 | 34 | Args: 35 | out_dir (str): Directory to save checkpoint files. 36 | filename_tmpl (str, optional): Checkpoint file template. 37 | Defaults to 'iter_{}.pth'. 38 | meta (dict, optional): Metadata to be saved in checkpoint. 39 | Defaults to None. 40 | save_optimizer (bool, optional): Whether save optimizer. 41 | Defaults to True. 42 | create_symlink (bool, optional): Whether create symlink to the 43 | latest checkpoint file. Defaults to True. 44 | """ 45 | if meta is None: 46 | meta = dict(iter=self.iter + 1, epoch=self.epoch + 1) 47 | elif isinstance(meta, dict): 48 | meta.update(iter=self.iter + 1, epoch=self.epoch + 1) 49 | else: 50 | raise TypeError( 51 | f'meta should be a dict or None, but got {type(meta)}') 52 | if self.meta is not None: 53 | meta.update(self.meta) 54 | 55 | filename = filename_tmpl.format(self.iter + 1) 56 | filepath = osp.join(out_dir, filename) 57 | optimizer = self.optimizer if save_optimizer else None 58 | save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) 59 | # in some environments, `os.symlink` is not supported, you may need to 60 | # set `create_symlink` to False 61 | # if create_symlink: 62 | # dst_file = osp.join(out_dir, 'latest.pth') 63 | # if platform.system() != 'Windows': 64 | # mmcv.symlink(filename, dst_file) 65 | # else: 66 | # shutil.copy(filepath, dst_file) 67 | 68 | def resume(self, 69 | checkpoint, 70 | resume_optimizer=True, 71 | map_location='default'): 72 | if map_location == 'default': 73 | if torch.cuda.is_available(): 74 | device_id = torch.cuda.current_device() 75 | checkpoint = self.load_checkpoint( 76 | checkpoint, 77 | map_location=lambda storage, loc: storage.cuda(device_id)) 78 | else: 79 | checkpoint = self.load_checkpoint(checkpoint) 80 | else: 81 | checkpoint = self.load_checkpoint( 82 | checkpoint, map_location=map_location) 83 | 84 | self._epoch = checkpoint['meta']['epoch'] 85 | self._iter = checkpoint['meta']['iter'] 86 | self._inner_iter = checkpoint['meta']['iter'] 87 | if 'optimizer' in checkpoint and resume_optimizer: 88 | if isinstance(self.optimizer, Optimizer): 89 | self.optimizer.load_state_dict(checkpoint['optimizer']) 90 | elif isinstance(self.optimizer, dict): 91 | for k in self.optimizer.keys(): 92 | self.optimizer[k].load_state_dict( 93 | checkpoint['optimizer'][k]) 94 | else: 95 | raise TypeError( 96 | 'Optimizer should be dict or torch.optim.Optimizer ' 97 | f'but got {type(self.optimizer)}') 98 | 99 | if 'amp' in checkpoint: 100 | apex.amp.load_state_dict(checkpoint['amp']) 101 | self.logger.info('load amp state dict') 102 | 103 | self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}') 104 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.15.0 2 | accelerate==0.27.2 3 | addict==2.4.0 4 | aliyun-python-sdk-core==2.13.36 5 | aliyun-python-sdk-kms==2.16.2 6 | antlr4-python3-runtime==4.9.3 7 | array-record==0.5.0 8 | astunparse==1.6.3 9 | av==10.0.0 10 | black==24.2.0 11 | brotlipy==0.7.0 12 | cachetools==5.3.1 13 | certifi @ file:///croot/certifi_1690232220950/work/certifi 14 | cffi @ file:///croot/cffi_1670423208954/work 15 | charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work 16 | click==8.1.7 17 | cloudpickle==3.0.0 18 | cmake==3.28.3 19 | colorama==0.4.6 20 | contourpy==1.2.0 21 | crcmod==1.7 22 | cryptography @ file:///croot/cryptography_1694444244250/work 23 | cycler==0.12.1 24 | decord==0.6.0 25 | diffusers==0.26.3 26 | dm-tree==0.1.8 27 | easydict==1.11 28 | einops==0.6.1 29 | etils==1.5.2 30 | filelock==3.12.4 31 | flatbuffers==1.12 32 | fonttools==4.44.0 33 | fsspec==2023.9.1 34 | ftfy==6.1.1 35 | fvcore==0.1.5.post20221221 36 | gast==0.4.0 37 | google-auth==2.23.0 38 | google-auth-oauthlib==0.4.6 39 | google-pasta==0.2.0 40 | googleapis-common-protos==1.62.0 41 | grpcio==1.34.1 42 | h5py==3.1.0 43 | huggingface-hub==0.20.3 44 | hydra-core==1.3.2 45 | idna @ file:///croot/idna_1666125576474/work 46 | importlib-metadata==6.8.0 47 | importlib-resources==6.1.1 48 | iopath==0.1.9 49 | Jinja2==3.1.3 50 | jmespath==0.10.0 51 | joblib==1.3.2 52 | keras==2.5.0rc0 53 | keras-nightly==2.5.0.dev2021032900 54 | Keras-Preprocessing==1.1.2 55 | kiwisolver==1.4.5 56 | libclang==16.0.6 57 | lit==17.0.6 58 | Markdown==3.4.4 59 | markdown-it-py==3.0.0 60 | MarkupSafe==2.1.3 61 | matplotlib==3.8.1 62 | mdurl==0.1.2 63 | mkl-fft @ file:///croot/mkl_fft_1695058164594/work 64 | mkl-random @ file:///croot/mkl_random_1695059800811/work 65 | mkl-service==2.4.0 66 | ml-dtypes==0.2.0 67 | mmcv-full==1.4.2 68 | model-index==0.1.11 69 | mpmath==1.3.0 70 | mypy-extensions==1.0.0 71 | namex==0.0.7 72 | networkx==3.2.1 73 | numpy==1.22.4 74 | nvidia-cublas-cu11==11.10.3.66 75 | nvidia-cuda-cupti-cu11==11.7.101 76 | nvidia-cuda-nvrtc-cu11==11.7.99 77 | nvidia-cuda-runtime-cu11==11.7.99 78 | nvidia-cudnn-cu11==8.5.0.96 79 | nvidia-cufft-cu11==10.9.0.58 80 | nvidia-curand-cu11==10.2.10.91 81 | nvidia-cusolver-cu11==11.4.0.1 82 | nvidia-cusparse-cu11==11.7.4.91 83 | nvidia-nccl-cu11==2.14.3 84 | nvidia-nvtx-cu11==11.7.91 85 | oauthlib==3.2.2 86 | omegaconf==2.3.0 87 | opencv-python==4.8.0.76 88 | opendatalab==0.0.10 89 | openmim==0.3.9 90 | openxlab==0.0.25 91 | opt-einsum==3.3.0 92 | ordered-set==4.1.0 93 | oss2==2.17.0 94 | packaging==23.1 95 | pandas==2.1.1 96 | pathspec==0.12.1 97 | Pillow @ file:///croot/pillow_1695134008276/work 98 | platformdirs==4.2.0 99 | portalocker==2.8.2 100 | prettytable==3.9.0 101 | promise==2.3 102 | protobuf==3.20.3 103 | psutil==5.9.6 104 | pyasn1==0.5.0 105 | pyasn1-modules==0.3.0 106 | pycocotools==2.0.7 107 | pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work 108 | pycryptodome==3.19.0 109 | Pygments==2.16.1 110 | pyOpenSSL @ file:///croot/pyopenssl_1690223430423/work 111 | pyparsing==3.1.1 112 | PySocks @ file:///tmp/build/80754af9/pysocks_1605305812635/work 113 | python-dateutil==2.8.2 114 | pytz==2023.3.post1 115 | PyYAML @ file:///croot/pyyaml_1670514731622/work 116 | regex @ file:///tmp/abs_41f5bce5-0a2e-45aa-b231-1fd2fbd57753gfpe6sjm/croots/recipe/regex_1658257178822/work 117 | requests==2.28.2 118 | requests-oauthlib==1.3.1 119 | rich==13.4.2 120 | rsa==4.9 121 | safetensors==0.3.3 122 | scikit-learn==1.3.2 123 | scipy==1.11.3 124 | seaborn==0.13.0 125 | shapely==2.0.2 126 | simplejson==3.19.2 127 | six==1.15.0 128 | sympy==1.12 129 | tabulate==0.9.0 130 | tensorboard==2.11.2 131 | tensorboard-data-server==0.6.1 132 | tensorboard-plugin-wit==1.8.1 133 | tensorflow==2.5.0 134 | tensorflow-addons==0.23.0 135 | tensorflow-datasets==4.9.3 136 | tensorflow-estimator==2.5.0 137 | tensorflow-io-gcs-filesystem==0.34.0 138 | tensorflow-metadata==1.14.0 139 | termcolor==1.1.0 140 | terminaltables==3.1.10 141 | threadpoolctl==3.2.0 142 | timm==0.9.12 143 | toml==0.10.2 144 | tomli==2.0.1 145 | torch==1.8.0 146 | torchaudio==0.8.0 147 | torchvision==0.9.0 148 | tqdm==4.65.0 149 | triton==2.0.0 150 | typeguard==2.13.3 151 | typing_extensions==4.9.0 152 | tzdata==2023.3 153 | urllib3 @ file:///croot/urllib3_1686163155763/work 154 | wcwidth==0.2.6 155 | Werkzeug==2.3.7 156 | wrapt==1.12.1 157 | yacs==0.1.8 158 | yapf==0.33.0 159 | zipp==3.15.0 160 | -------------------------------------------------------------------------------- /datasets/dtd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | from typing import Any, Callable, Optional, Tuple 4 | 5 | import PIL.Image 6 | 7 | from torchvision.datasets.utils import download_and_extract_archive, verify_str_arg 8 | from torchvision.datasets.vision import VisionDataset 9 | 10 | 11 | class DTD(VisionDataset): 12 | """`Describable Textures Dataset (DTD) `_. 13 | 14 | Args: 15 | root (string): Root directory of the dataset. 16 | split (string, optional): The dataset split, supports ``"train"`` (default), ``"val"``, or ``"test"``. 17 | partition (int, optional): The dataset partition. Should be ``1 <= partition <= 10``. Defaults to ``1``. 18 | 19 | .. note:: 20 | 21 | The partition only changes which split each image belongs to. Thus, regardless of the selected 22 | partition, combining all splits will result in all images. 23 | 24 | transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed 25 | version. E.g, ``transforms.RandomCrop``. 26 | target_transform (callable, optional): A function/transform that takes in the target and transforms it. 27 | download (bool, optional): If True, downloads the dataset from the internet and 28 | puts it in root directory. If dataset is already downloaded, it is not 29 | downloaded again. Default is False. 30 | """ 31 | 32 | _URL = "https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz" 33 | _MD5 = "fff73e5086ae6bdbea199a49dfb8a4c1" 34 | 35 | def __init__( 36 | self, 37 | root: str, 38 | split: str = "train", 39 | partition: int = 1, 40 | transform: Optional[Callable] = None, 41 | target_transform: Optional[Callable] = None, 42 | download: bool = False, 43 | ) -> None: 44 | self._split = verify_str_arg(split, "split", ("train", "val", "test")) 45 | if not isinstance(partition, int) and not (1 <= partition <= 10): 46 | raise ValueError( 47 | f"Parameter 'partition' should be an integer with `1 <= partition <= 10`, " 48 | f"but got {partition} instead" 49 | ) 50 | self._partition = partition 51 | 52 | super().__init__(root, transform=transform, target_transform=target_transform) 53 | self._base_folder = pathlib.Path(self.root) / type(self).__name__.lower() 54 | self._data_folder = self._base_folder / "dtd" 55 | self._meta_folder = self._data_folder / "labels" 56 | self._images_folder = self._data_folder / "images" 57 | 58 | if download: 59 | self._download() 60 | 61 | if not self._check_exists(): 62 | raise RuntimeError("Dataset not found. You can use download=True to download it") 63 | 64 | self._image_files = [] 65 | classes = [] 66 | with open(self._meta_folder / f"{self._split}{self._partition}.txt") as file: 67 | for line in file: 68 | cls, name = line.strip().split("/") 69 | self._image_files.append(self._images_folder.joinpath(cls, name)) 70 | classes.append(cls) 71 | 72 | self.classes = sorted(set(classes)) 73 | self.class_to_idx = dict(zip(self.classes, range(len(self.classes)))) 74 | self._labels = [self.class_to_idx[cls] for cls in classes] 75 | 76 | def __len__(self) -> int: 77 | return len(self._image_files) 78 | 79 | def __getitem__(self, idx: int) -> Tuple[Any, Any]: 80 | image_file, label = self._image_files[idx], self._labels[idx] 81 | image = PIL.Image.open(image_file).convert("RGB") 82 | 83 | if self.transform: 84 | image = self.transform(image) 85 | 86 | if self.target_transform: 87 | label = self.target_transform(label) 88 | 89 | return image, label 90 | 91 | def extra_repr(self) -> str: 92 | return f"split={self._split}, partition={self._partition}" 93 | 94 | def _check_exists(self) -> bool: 95 | return os.path.exists(self._data_folder) and os.path.isdir(self._data_folder) 96 | 97 | def _download(self) -> None: 98 | if self._check_exists(): 99 | return 100 | download_and_extract_archive(self._URL, download_root=str(self._base_folder), md5=self._MD5) 101 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/necks/mla_neck.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | from mmcv.cnn import ConvModule, build_norm_layer 4 | 5 | from ..builder import NECKS 6 | 7 | 8 | class MLAModule(nn.Module): 9 | 10 | def __init__(self, 11 | in_channels=[1024, 1024, 1024, 1024], 12 | out_channels=256, 13 | norm_cfg=None, 14 | act_cfg=None): 15 | super(MLAModule, self).__init__() 16 | self.channel_proj = nn.ModuleList() 17 | for i in range(len(in_channels)): 18 | self.channel_proj.append( 19 | ConvModule( 20 | in_channels=in_channels[i], 21 | out_channels=out_channels, 22 | kernel_size=1, 23 | norm_cfg=norm_cfg, 24 | act_cfg=act_cfg)) 25 | self.feat_extract = nn.ModuleList() 26 | for i in range(len(in_channels)): 27 | self.feat_extract.append( 28 | ConvModule( 29 | in_channels=out_channels, 30 | out_channels=out_channels, 31 | kernel_size=3, 32 | padding=1, 33 | norm_cfg=norm_cfg, 34 | act_cfg=act_cfg)) 35 | 36 | def forward(self, inputs): 37 | 38 | # feat_list -> [p2, p3, p4, p5] 39 | feat_list = [] 40 | for x, conv in zip(inputs, self.channel_proj): 41 | feat_list.append(conv(x)) 42 | 43 | # feat_list -> [p5, p4, p3, p2] 44 | # mid_list -> [m5, m4, m3, m2] 45 | feat_list = feat_list[::-1] 46 | mid_list = [] 47 | for feat in feat_list: 48 | if len(mid_list) == 0: 49 | mid_list.append(feat) 50 | else: 51 | mid_list.append(mid_list[-1] + feat) 52 | 53 | # mid_list -> [m5, m4, m3, m2] 54 | # out_list -> [o2, o3, o4, o5] 55 | out_list = [] 56 | for mid, conv in zip(mid_list, self.feat_extract): 57 | out_list.append(conv(mid)) 58 | 59 | return tuple(out_list) 60 | 61 | 62 | @NECKS.register_module() 63 | class MLANeck(nn.Module): 64 | """Multi-level Feature Aggregation. 65 | 66 | This neck is `The Multi-level Feature Aggregation construction of 67 | SETR `_. 68 | 69 | 70 | Args: 71 | in_channels (List[int]): Number of input channels per scale. 72 | out_channels (int): Number of output channels (used at each scale). 73 | norm_layer (dict): Config dict for input normalization. 74 | Default: norm_layer=dict(type='LN', eps=1e-6, requires_grad=True). 75 | norm_cfg (dict): Config dict for normalization layer. Default: None. 76 | act_cfg (dict): Config dict for activation layer in ConvModule. 77 | Default: None. 78 | """ 79 | 80 | def __init__(self, 81 | in_channels, 82 | out_channels, 83 | norm_layer=dict(type='LN', eps=1e-6, requires_grad=True), 84 | norm_cfg=None, 85 | act_cfg=None): 86 | super(MLANeck, self).__init__() 87 | assert isinstance(in_channels, list) 88 | self.in_channels = in_channels 89 | self.out_channels = out_channels 90 | 91 | # In order to build general vision transformer backbone, we have to 92 | # move MLA to neck. 93 | self.norm = nn.ModuleList([ 94 | build_norm_layer(norm_layer, in_channels[i])[1] 95 | for i in range(len(in_channels)) 96 | ]) 97 | 98 | self.mla = MLAModule( 99 | in_channels=in_channels, 100 | out_channels=out_channels, 101 | norm_cfg=norm_cfg, 102 | act_cfg=act_cfg) 103 | 104 | def forward(self, inputs): 105 | assert len(inputs) == len(self.in_channels) 106 | 107 | # Convert from nchw to nlc 108 | outs = [] 109 | for i in range(len(inputs)): 110 | x = inputs[i] 111 | n, c, h, w = x.shape 112 | x = x.reshape(n, c, h * w).transpose(2, 1).contiguous() 113 | x = self.norm[i](x) 114 | x = x.transpose(1, 2).reshape(n, c, h, w).contiguous() 115 | outs.append(x) 116 | 117 | outs = self.mla(outs) 118 | return tuple(outs) 119 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/utils/up_conv_block.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule, build_upsample_layer 5 | 6 | 7 | class UpConvBlock(nn.Module): 8 | """Upsample convolution block in decoder for UNet. 9 | 10 | This upsample convolution block consists of one upsample module 11 | followed by one convolution block. The upsample module expands the 12 | high-level low-resolution feature map and the convolution block fuses 13 | the upsampled high-level low-resolution feature map and the low-level 14 | high-resolution feature map from encoder. 15 | 16 | Args: 17 | conv_block (nn.Sequential): Sequential of convolutional layers. 18 | in_channels (int): Number of input channels of the high-level 19 | skip_channels (int): Number of input channels of the low-level 20 | high-resolution feature map from encoder. 21 | out_channels (int): Number of output channels. 22 | num_convs (int): Number of convolutional layers in the conv_block. 23 | Default: 2. 24 | stride (int): Stride of convolutional layer in conv_block. Default: 1. 25 | dilation (int): Dilation rate of convolutional layer in conv_block. 26 | Default: 1. 27 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 28 | memory while slowing down the training speed. Default: False. 29 | conv_cfg (dict | None): Config dict for convolution layer. 30 | Default: None. 31 | norm_cfg (dict | None): Config dict for normalization layer. 32 | Default: dict(type='BN'). 33 | act_cfg (dict | None): Config dict for activation layer in ConvModule. 34 | Default: dict(type='ReLU'). 35 | upsample_cfg (dict): The upsample config of the upsample module in 36 | decoder. Default: dict(type='InterpConv'). If the size of 37 | high-level feature map is the same as that of skip feature map 38 | (low-level feature map from encoder), it does not need upsample the 39 | high-level feature map and the upsample_cfg is None. 40 | dcn (bool): Use deformable convolution in convolutional layer or not. 41 | Default: None. 42 | plugins (dict): plugins for convolutional layers. Default: None. 43 | """ 44 | 45 | def __init__(self, 46 | conv_block, 47 | in_channels, 48 | skip_channels, 49 | out_channels, 50 | num_convs=2, 51 | stride=1, 52 | dilation=1, 53 | with_cp=False, 54 | conv_cfg=None, 55 | norm_cfg=dict(type='BN'), 56 | act_cfg=dict(type='ReLU'), 57 | upsample_cfg=dict(type='InterpConv'), 58 | dcn=None, 59 | plugins=None): 60 | super(UpConvBlock, self).__init__() 61 | assert dcn is None, 'Not implemented yet.' 62 | assert plugins is None, 'Not implemented yet.' 63 | 64 | self.conv_block = conv_block( 65 | in_channels=2 * skip_channels, 66 | out_channels=out_channels, 67 | num_convs=num_convs, 68 | stride=stride, 69 | dilation=dilation, 70 | with_cp=with_cp, 71 | conv_cfg=conv_cfg, 72 | norm_cfg=norm_cfg, 73 | act_cfg=act_cfg, 74 | dcn=None, 75 | plugins=None) 76 | if upsample_cfg is not None: 77 | self.upsample = build_upsample_layer( 78 | cfg=upsample_cfg, 79 | in_channels=in_channels, 80 | out_channels=skip_channels, 81 | with_cp=with_cp, 82 | norm_cfg=norm_cfg, 83 | act_cfg=act_cfg) 84 | else: 85 | self.upsample = ConvModule( 86 | in_channels, 87 | skip_channels, 88 | kernel_size=1, 89 | stride=1, 90 | padding=0, 91 | conv_cfg=conv_cfg, 92 | norm_cfg=norm_cfg, 93 | act_cfg=act_cfg) 94 | 95 | def forward(self, skip, x): 96 | """Forward function.""" 97 | 98 | x = self.upsample(x) 99 | out = torch.cat([skip, x], dim=1) 100 | out = self.conv_block(out) 101 | 102 | return out 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Dynamic-Tuning

2 | 3 | 4 | 5 |

6 | 7 | Dynamic-Tuning 8 | 9 |

10 | 11 | 12 | The official implementation of "2024NeurIPS Dynamic Tuning Towards Parameter and Inference Efficiency for ViT Adaptation". 13 | 14 | > Wangbo Zhao1, Jiasheng Tang2,3, Yizeng Han4, Yibing Song2,3, Kai Wang1, Gao Huang4, Fan Wang2, Yang You1 15 | > 16 | > 1[National University of Singapore](https://www.nus.edu.sg/), 2[DAMO Academy, Alibaba Group](https://damo.alibaba.com/?language=zh), 3Hupan Lab, 4[Tsinghua University](https://www.tsinghua.edu.cn/) 17 | > 18 | > [Paper](https://arxiv.org/abs/2403.11808) 19 | 20 | 21 | ## News 🚀🚀🚀 22 | - `2024.10.16`: We update the code: add a distillation technique (our paper in NeurIPS 2024 verision), support actually efficient inference, support semantic segmentation. Our paper in NeurIPS 2024 verision will be released soon. 23 | - `2024.09.26`: DyT is accepted by NeurIPS 2024. We will update the code and paper soon. 24 | - `2024.03.23`: The code is released. 25 | 26 | ## Abstract 27 | Existing parameter-efficient fine-tuning (PEFT) methods have achieved significant success on vision transformers (ViTs) adaptation by improving parameter efficiency. However, the exploration of enhancing inference efficiency during adaptation remains underexplored. This limits the broader application of pre-trained ViT models, especially when the model is computationally extensive. In this paper, we propose Dynamic Tuning (DyT), a novel approach to improve both parameter and inference efficiency for ViT adaptation. Specifically, besides using the lightweight adapter modules, we propose a token dispatcher to distinguish informative tokens from less important ones, allowing the latter to dynamically skip the original block, thereby reducing the redundant computation during inference. Additionally, we explore multiple design variants to find the best practice of DyT. Finally, inspired by the mixture-of-experts (MoE) mechanism, we introduce an enhanced adapter to further boost the adaptation performance. We validate DyT across various tasks, including image/video recognition and semantic segmentation. For instance, DyT achieves comparable or even superior performance compared to existing PEFT methods while evoking only 71%-85% of their FLOPs on the VTAB-1K benchmark. 28 |

29 | 31 | 32 | ## 🛠 Dataset Prepare 33 | - For VTAB-1K, we recommend to adopt the split provided by [SSF](https://github.com/dongzelian/SSF). You can directly download the VTAB-1K from their repo. 34 | - For other image datasets, they will be automatically downloaded when you first run our code. 35 | - For video datasets (K400 and SSv2), you can download them from [OpenDataLab](https://opendatalab.org.cn/OpenMMLab/Kinetics-400) or their offical websites. 36 | 37 | ## 🛠 Installation 38 | ``` 39 | pip install -r requirements.txt # install torch, timm, torchvision, etc. 40 | wget https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth # download the ckpt from timm 41 | ``` 42 | 43 | ## ⚙️ Fine-tuning 44 | ``` 45 | bash ./train_IN21K.sh # training on complete datasets 46 | bash ./train_vtab.sh # training on vtab benchmark 47 | bash ./train_video.sh # training on video datasets 48 | ``` 49 | 50 | ## ⚙️ Measure Inference Speed 51 | ``` 52 | bash ./measure_speed.sh 53 | ``` 54 | 55 | ## Citation 56 | If you found our work useful, please consider citing us. 57 | ``` 58 | @article{zhao2024dynamic, 59 | title={Dynamic tuning towards parameter and inference efficiency for vit adaptation}, 60 | author={Zhao, Wangbo and Tang, Jiasheng and Han, Yizeng and Song, Yibing and Wang, Kai and Huang, Gao and Wang, Fan and You, Yang}, 61 | journal={arXiv preprint arXiv:2403.11808}, 62 | year={2024} 63 | } 64 | ``` 65 | 66 | 67 | ## Acknowledge 68 | The repo is partly built based on [AdaptFormer](https://github.com/ShoufaChen/AdaptFormer), [AdViT](https://github.com/MengLcool/AdaViT), and [PETL-ViT](https://github.com/JieShibo/PETL-ViT). We are grateful for their generous contribution to open source. 69 | 70 | 71 | ## Contact 72 | 🔥🔥🔥 If you are interested in this work and hope to cooperate with us, please drop an email to wangbo.zhao96@gmail.com 🔥🔥🔥 73 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/uper_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | 6 | from mmseg.ops import resize 7 | from ..builder import HEADS 8 | from .decode_head import BaseDecodeHead 9 | from .psp_head import PPM 10 | 11 | 12 | @HEADS.register_module() 13 | class UPerHead(BaseDecodeHead): 14 | """Unified Perceptual Parsing for Scene Understanding. 15 | 16 | This head is the implementation of `UPerNet 17 | `_. 18 | 19 | Args: 20 | pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid 21 | Module applied on the last feature. Default: (1, 2, 3, 6). 22 | """ 23 | 24 | def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): 25 | super(UPerHead, self).__init__( 26 | input_transform='multiple_select', **kwargs) 27 | # PSP Module 28 | self.psp_modules = PPM( 29 | pool_scales, 30 | self.in_channels[-1], 31 | self.channels, 32 | conv_cfg=self.conv_cfg, 33 | norm_cfg=self.norm_cfg, 34 | act_cfg=self.act_cfg, 35 | align_corners=self.align_corners) 36 | self.bottleneck = ConvModule( 37 | self.in_channels[-1] + len(pool_scales) * self.channels, 38 | self.channels, 39 | 3, 40 | padding=1, 41 | conv_cfg=self.conv_cfg, 42 | norm_cfg=self.norm_cfg, 43 | act_cfg=self.act_cfg) 44 | # FPN Module 45 | self.lateral_convs = nn.ModuleList() 46 | self.fpn_convs = nn.ModuleList() 47 | for in_channels in self.in_channels[:-1]: # skip the top layer 48 | l_conv = ConvModule( 49 | in_channels, 50 | self.channels, 51 | 1, 52 | conv_cfg=self.conv_cfg, 53 | norm_cfg=self.norm_cfg, 54 | act_cfg=self.act_cfg, 55 | inplace=False) 56 | fpn_conv = ConvModule( 57 | self.channels, 58 | self.channels, 59 | 3, 60 | padding=1, 61 | conv_cfg=self.conv_cfg, 62 | norm_cfg=self.norm_cfg, 63 | act_cfg=self.act_cfg, 64 | inplace=False) 65 | self.lateral_convs.append(l_conv) 66 | self.fpn_convs.append(fpn_conv) 67 | 68 | self.fpn_bottleneck = ConvModule( 69 | len(self.in_channels) * self.channels, 70 | self.channels, 71 | 3, 72 | padding=1, 73 | conv_cfg=self.conv_cfg, 74 | norm_cfg=self.norm_cfg, 75 | act_cfg=self.act_cfg) 76 | 77 | def psp_forward(self, inputs): 78 | """Forward function of PSP module.""" 79 | x = inputs[-1] 80 | psp_outs = [x] 81 | psp_outs.extend(self.psp_modules(x)) 82 | psp_outs = torch.cat(psp_outs, dim=1) 83 | output = self.bottleneck(psp_outs) 84 | 85 | return output 86 | 87 | def forward(self, inputs): 88 | """Forward function.""" 89 | 90 | inputs = self._transform_inputs(inputs) 91 | 92 | # build laterals 93 | laterals = [ 94 | lateral_conv(inputs[i]) 95 | for i, lateral_conv in enumerate(self.lateral_convs) 96 | ] 97 | 98 | laterals.append(self.psp_forward(inputs)) 99 | 100 | # build top-down path 101 | used_backbone_levels = len(laterals) 102 | for i in range(used_backbone_levels - 1, 0, -1): 103 | prev_shape = laterals[i - 1].shape[2:] 104 | laterals[i - 1] = laterals[i - 1] + resize( 105 | laterals[i], 106 | size=prev_shape, 107 | mode='bilinear', 108 | align_corners=self.align_corners) 109 | 110 | # build outputs 111 | fpn_outs = [ 112 | self.fpn_convs[i](laterals[i]) 113 | for i in range(used_backbone_levels - 1) 114 | ] 115 | # append psp feature 116 | fpn_outs.append(laterals[-1]) 117 | 118 | for i in range(used_backbone_levels - 1, 0, -1): 119 | fpn_outs[i] = resize( 120 | fpn_outs[i], 121 | size=fpn_outs[0].shape[2:], 122 | mode='bilinear', 123 | align_corners=self.align_corners) 124 | fpn_outs = torch.cat(fpn_outs, dim=1) 125 | output = self.fpn_bottleneck(fpn_outs) 126 | output = self.cls_seg(output) 127 | return output 128 | -------------------------------------------------------------------------------- /models/losses.py: -------------------------------------------------------------------------------- 1 | from numpy.lib.arraysetops import isin 2 | from timm import loss 3 | from timm.data.transforms_factory import transforms_imagenet_train 4 | import torch 5 | from torch.functional import Tensor 6 | import torch.nn as nn 7 | 8 | def binaray_entropy(prob, eps=1e-7): 9 | neg_entro = prob * prob.clamp(min=eps).log() + (1-prob) * (1-prob).clamp(min=eps).log() 10 | return - neg_entro 11 | 12 | 13 | 14 | 15 | class AdaLoss(nn.Module): 16 | def __init__(self, base_criterion, 17 | 18 | layer_target_ratio=0.5, 19 | layer_loss_ratio=2., 20 | layer_diverse_ratio=0.1, 21 | layer_entropy_weight=0.1, 22 | layer_minimal_weight=0., 23 | layer_minimal=0., 24 | 25 | token_target_ratio=0.5, 26 | token_loss_ratio=2., 27 | token_minimal=0.1, 28 | token_minimal_weight=1. 29 | ): 30 | super().__init__() 31 | self.base_criterion = base_criterion 32 | 33 | # self.layer_target_ratio = layer_target_ratio 34 | # self.layer_loss_ratio = layer_loss_ratio 35 | # self.layer_diverse_ratio = layer_diverse_ratio 36 | # self.layer_entropy_weight = layer_entropy_weight 37 | # self.layer_minimal_weight = layer_minimal_weight 38 | # self.layer_minimal = layer_minimal 39 | 40 | self.token_target_ratio = token_target_ratio 41 | self.token_loss_ratio = token_loss_ratio 42 | self.token_minimal = token_minimal 43 | self.token_minimal_weight = token_minimal_weight 44 | 45 | 46 | 47 | 48 | def forward(self, outputs, y): 49 | ''' 50 | head_select: (b, num_layers, num_head) 51 | ''' 52 | 53 | x, token_select, _ = outputs["prediction"], outputs["token_select"], outputs["token_logits"] 54 | 55 | base_loss = self.base_criterion(x, y) 56 | # layer_loss = self._get_layer_loss(x, layer_select, layer_logits) 57 | token_loss = self._get_token_loss(x, token_select) 58 | 59 | loss = base_loss + self.token_loss_ratio * token_loss 60 | 61 | return loss, dict(base_loss=base_loss, token_loss=self.token_loss_ratio * token_loss) 62 | 63 | def _get_token_loss(self, x, token_select): 64 | """ 65 | token_select : tensor (b, num_layer, l) 66 | 67 | """ 68 | if token_select is not None : 69 | token_mean = token_select.mean() 70 | # token_flops_loss = (token_mean - self.token_target_ratio).abs().mean() 71 | # token_flops_loss = (token_mean - self.token_target_ratio).clamp(min=0.).mean() 72 | token_flops_loss = ((token_mean - self.token_target_ratio)**2).mean() 73 | 74 | if self.token_minimal_weight > 0 : 75 | token_mean = token_select.mean(-1) 76 | token_minimal_loss = (self.token_minimal - token_mean).clamp(min=0.).sum() 77 | else : 78 | token_minimal_loss = 0 79 | 80 | token_loss = token_flops_loss + self.token_minimal_weight * token_minimal_loss 81 | else : 82 | token_loss = x.new_zeros(1).mean() 83 | 84 | return token_loss 85 | 86 | 87 | def _get_layer_loss(self, x, layer_select, logits_set): 88 | if layer_select is not None : 89 | layer_mean = layer_select.mean() 90 | layer_flops_loss = (layer_mean - self.layer_target_ratio).abs().mean() 91 | 92 | if self.layer_diverse_ratio > 0 : 93 | layer_mean = layer_select.mean((0,-1)) 94 | layer_diverse_loss = (layer_mean - self.layer_target_ratio).abs().mean() 95 | else : 96 | layer_diverse_loss = 0 97 | 98 | if self.layer_entropy_weight > 0 : 99 | layer_select_logits = logits_set['layer_select_logits'] 100 | layer_entropy = binaray_entropy(layer_select_logits.sigmoid()).mean() 101 | else : 102 | layer_entropy = 0 103 | 104 | if self.layer_minimal_weight > 0 : 105 | layer_mean = layer_select.mean(0) #(num_layers, 2) 106 | layer_minimal_loss = (self.layer_minimal - layer_mean).clamp(min=0.).sum() 107 | else : 108 | layer_minimal_loss = 0 109 | 110 | layer_loss = layer_flops_loss + self.layer_diverse_ratio * layer_diverse_loss - self.layer_entropy_weight * layer_entropy \ 111 | + self.layer_minimal_weight * layer_minimal_loss 112 | else : 113 | layer_loss = x.new_zeros(1).mean() 114 | 115 | return layer_loss 116 | 117 | 118 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/configs/beit/upernet/our_vit_coco-stuff164k.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254) 3 | # Github source: https://github.com/microsoft/unilm/tree/master/beit 4 | # Copyright (c) 2021 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # By Hangbo Bao 7 | # Based on timm, mmseg, setr, xcit and swin code bases 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm 9 | # https://github.com/fudan-zvg/SETR 10 | # https://github.com/facebookresearch/xcit/ 11 | # https://github.com/microsoft/Swin-Transformer 12 | # --------------------------------------------------------' 13 | # recommand use this config for BEiT models which are self-supervised pretrained on imagenet 14 | _base_ = [ 15 | '../../_base_/models/upernet_beit.py', 16 | '../../_base_/default_runtime.py', '../../_base_/schedules/schedule_80k.py' 17 | ] 18 | 19 | crop_size = (512, 512) 20 | 21 | model = dict( 22 | backbone=dict( 23 | _delete_=True, 24 | type='VisionTransformer21K', 25 | img_size=512, 26 | patch_size=16, 27 | embed_dim=768, 28 | depth=12, 29 | num_heads=12, 30 | mlp_ratio=4.0, 31 | qkv_bias=True, 32 | drop_path_rate=0.1, 33 | out_indices=[3, 5, 7, 11], 34 | use_rel_pos_bias=True 35 | ), 36 | decode_head=dict( 37 | in_channels=[768, 768, 768, 768], 38 | num_classes=171, 39 | channels=768, 40 | ), 41 | auxiliary_head=dict( 42 | in_channels=768, 43 | num_classes=171 44 | ), 45 | test_cfg = dict(mode='slide', crop_size=crop_size, stride=(341, 341)) 46 | ) 47 | 48 | 49 | 50 | optimizer = dict(_delete_=True, type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.05, 51 | # constructor='LayerDecayOptimizerConstructor', 52 | # paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65) 53 | ) 54 | 55 | 56 | lr_config = dict(_delete_=True, policy='poly', 57 | warmup='linear', 58 | warmup_iters=1500, 59 | warmup_ratio=1e-6, 60 | power=1.0, min_lr=0.0, by_epoch=False) 61 | 62 | # By default, models are trained on 8 GPUs with 2 images per GPU 63 | # data=dict(samples_per_gpu=2) 64 | 65 | runner = dict(type='IterBasedRunnerAmp') 66 | 67 | # do not use mmdet version fp16 68 | fp16 = None 69 | optimizer_config = dict( 70 | type="DistOptimizerHook", 71 | update_interval=1, 72 | grad_clip=None, 73 | coalesce=True, 74 | bucket_size_mb=-1, 75 | use_fp16=False, 76 | ) 77 | 78 | 79 | 80 | # dataset settings 81 | dataset_type = 'COCOStuffDataset' 82 | data_root = '/home/zhaowangbo.zwb/dataset/coco_stuff164k/' 83 | img_norm_cfg = dict( 84 | mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) 85 | crop_size = (512, 512) 86 | train_pipeline = [ 87 | dict(type='LoadImageFromFile'), 88 | dict(type='LoadAnnotations'), 89 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 90 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 91 | dict(type='RandomFlip', prob=0.5), 92 | dict(type='PhotoMetricDistortion'), 93 | dict(type='Normalize', **img_norm_cfg), 94 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 95 | dict(type='DefaultFormatBundle'), 96 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 97 | ] 98 | test_pipeline = [ 99 | dict(type='LoadImageFromFile'), 100 | dict( 101 | type='MultiScaleFlipAug', 102 | img_scale=(2048, 512), 103 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 104 | flip=False, 105 | transforms=[ 106 | dict(type='Resize', keep_ratio=True), 107 | dict(type='RandomFlip'), 108 | dict(type='Normalize', **img_norm_cfg), 109 | dict(type='ImageToTensor', keys=['img']), 110 | dict(type='Collect', keys=['img']), 111 | ]) 112 | ] 113 | data = dict( 114 | samples_per_gpu=2, 115 | workers_per_gpu=8, 116 | train=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | img_dir='images/train2017', 120 | ann_dir='annotations/train2017', 121 | pipeline=train_pipeline), 122 | val=dict( 123 | type=dataset_type, 124 | data_root=data_root, 125 | img_dir='images/val2017', 126 | ann_dir='annotations/val2017', 127 | pipeline=test_pipeline), 128 | test=dict( 129 | type=dataset_type, 130 | data_root=data_root, 131 | img_dir='images/val2017', 132 | ann_dir='annotations/val2017', 133 | pipeline=test_pipeline)) 134 | 135 | evaluation = dict(interval=10000, metric='mIoU') -------------------------------------------------------------------------------- /datasets/volume_transforms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import torch 4 | 5 | 6 | def convert_img(img): 7 | """Converts (H, W, C) numpy.ndarray to (C, W, H) format 8 | """ 9 | if len(img.shape) == 3: 10 | img = img.transpose(2, 0, 1) 11 | if len(img.shape) == 2: 12 | img = np.expand_dims(img, 0) 13 | return img 14 | 15 | 16 | class ClipToTensor(object): 17 | """Convert a list of m (H x W x C) numpy.ndarrays in the range [0, 255] 18 | to a torch.FloatTensor of shape (C x m x H x W) in the range [0, 1.0] 19 | """ 20 | 21 | def __init__(self, channel_nb=3, div_255=True, numpy=False): 22 | self.channel_nb = channel_nb 23 | self.div_255 = div_255 24 | self.numpy = numpy 25 | 26 | def __call__(self, clip): 27 | """ 28 | Args: clip (list of numpy.ndarray): clip (list of images) 29 | to be converted to tensor. 30 | """ 31 | # Retrieve shape 32 | if isinstance(clip[0], np.ndarray): 33 | h, w, ch = clip[0].shape 34 | assert ch == self.channel_nb, 'Got {0} instead of 3 channels'.format( 35 | ch) 36 | elif isinstance(clip[0], Image.Image): 37 | w, h = clip[0].size 38 | else: 39 | raise TypeError('Expected numpy.ndarray or PIL.Image\ 40 | but got list of {0}'.format(type(clip[0]))) 41 | 42 | np_clip = np.zeros([self.channel_nb, len(clip), int(h), int(w)]) 43 | 44 | # Convert 45 | for img_idx, img in enumerate(clip): 46 | if isinstance(img, np.ndarray): 47 | pass 48 | elif isinstance(img, Image.Image): 49 | img = np.array(img, copy=False) 50 | else: 51 | raise TypeError('Expected numpy.ndarray or PIL.Image\ 52 | but got list of {0}'.format(type(clip[0]))) 53 | img = convert_img(img) 54 | np_clip[:, img_idx, :, :] = img 55 | if self.numpy: 56 | if self.div_255: 57 | np_clip = np_clip / 255.0 58 | return np_clip 59 | 60 | else: 61 | tensor_clip = torch.from_numpy(np_clip) 62 | 63 | if not isinstance(tensor_clip, torch.FloatTensor): 64 | tensor_clip = tensor_clip.float() 65 | if self.div_255: 66 | tensor_clip = torch.div(tensor_clip, 255) 67 | return tensor_clip 68 | 69 | 70 | # Note this norms data to -1/1 71 | class ClipToTensor_K(object): 72 | """Convert a list of m (H x W x C) numpy.ndarrays in the range [0, 255] 73 | to a torch.FloatTensor of shape (C x m x H x W) in the range [0, 1.0] 74 | """ 75 | 76 | def __init__(self, channel_nb=3, div_255=True, numpy=False): 77 | self.channel_nb = channel_nb 78 | self.div_255 = div_255 79 | self.numpy = numpy 80 | 81 | def __call__(self, clip): 82 | """ 83 | Args: clip (list of numpy.ndarray): clip (list of images) 84 | to be converted to tensor. 85 | """ 86 | # Retrieve shape 87 | if isinstance(clip[0], np.ndarray): 88 | h, w, ch = clip[0].shape 89 | assert ch == self.channel_nb, 'Got {0} instead of 3 channels'.format( 90 | ch) 91 | elif isinstance(clip[0], Image.Image): 92 | w, h = clip[0].size 93 | else: 94 | raise TypeError('Expected numpy.ndarray or PIL.Image\ 95 | but got list of {0}'.format(type(clip[0]))) 96 | 97 | np_clip = np.zeros([self.channel_nb, len(clip), int(h), int(w)]) 98 | 99 | # Convert 100 | for img_idx, img in enumerate(clip): 101 | if isinstance(img, np.ndarray): 102 | pass 103 | elif isinstance(img, Image.Image): 104 | img = np.array(img, copy=False) 105 | else: 106 | raise TypeError('Expected numpy.ndarray or PIL.Image\ 107 | but got list of {0}'.format(type(clip[0]))) 108 | img = convert_img(img) 109 | np_clip[:, img_idx, :, :] = img 110 | if self.numpy: 111 | if self.div_255: 112 | np_clip = (np_clip - 127.5) / 127.5 113 | return np_clip 114 | 115 | else: 116 | tensor_clip = torch.from_numpy(np_clip) 117 | 118 | if not isinstance(tensor_clip, torch.FloatTensor): 119 | tensor_clip = tensor_clip.float() 120 | if self.div_255: 121 | tensor_clip = torch.div(torch.sub(tensor_clip, 127.5), 127.5) 122 | return tensor_clip 123 | 124 | 125 | class ToTensor(object): 126 | """Converts numpy array to tensor 127 | """ 128 | 129 | def __call__(self, array): 130 | tensor = torch.from_numpy(array) 131 | return tensor 132 | -------------------------------------------------------------------------------- /dense_tasks/Segmentation/configs/beit/upernet/our_vit.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254) 3 | # Github source: https://github.com/microsoft/unilm/tree/master/beit 4 | # Copyright (c) 2021 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # By Hangbo Bao 7 | # Based on timm, mmseg, setr, xcit and swin code bases 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm 9 | # https://github.com/fudan-zvg/SETR 10 | # https://github.com/facebookresearch/xcit/ 11 | # https://github.com/microsoft/Swin-Transformer 12 | # --------------------------------------------------------' 13 | # recommand use this config for BEiT models which are self-supervised pretrained on imagenet 14 | _base_ = [ 15 | '../../_base_/models/upernet_beit.py', 16 | '../../_base_/default_runtime.py', '../../_base_/schedules/schedule_160k.py' 17 | ] 18 | 19 | crop_size = (512, 512) 20 | 21 | model = dict( 22 | backbone=dict( 23 | _delete_=True, 24 | type='VisionTransformer21K', 25 | img_size=512, 26 | patch_size=16, 27 | embed_dim=768, 28 | depth=12, 29 | num_heads=12, 30 | mlp_ratio=4.0, 31 | qkv_bias=True, 32 | drop_path_rate=0.1, 33 | out_indices=[3, 5, 7, 11], 34 | use_rel_pos_bias=True 35 | ), 36 | decode_head=dict( 37 | in_channels=[768, 768, 768, 768], 38 | num_classes=150, 39 | channels=768, 40 | ), 41 | auxiliary_head=dict( 42 | in_channels=768, 43 | num_classes=150 44 | ), 45 | test_cfg = dict(mode='slide', crop_size=crop_size, stride=(341, 341)) 46 | ) 47 | 48 | 49 | 50 | optimizer = dict(_delete_=True, type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.05, 51 | # constructor='LayerDecayOptimizerConstructor', 52 | # paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65) 53 | ) 54 | 55 | 56 | lr_config = dict(_delete_=True, policy='poly', 57 | warmup='linear', 58 | warmup_iters=1500, 59 | warmup_ratio=1e-6, 60 | power=1.0, min_lr=0.0, by_epoch=False) 61 | 62 | # By default, models are trained on 8 GPUs with 2 images per GPU 63 | # data=dict(samples_per_gpu=2) 64 | 65 | runner = dict(type='IterBasedRunnerAmp') 66 | 67 | # do not use mmdet version fp16 68 | fp16 = None 69 | optimizer_config = dict( 70 | type="DistOptimizerHook", 71 | update_interval=1, 72 | grad_clip=None, 73 | coalesce=True, 74 | bucket_size_mb=-1, 75 | use_fp16=False, 76 | ) 77 | 78 | 79 | 80 | # dataset settings 81 | dataset_type = 'ADE20KDataset' 82 | data_root = '/home/zhaowangbo.zwb/dataset/ADEChallengeData2016/' 83 | img_norm_cfg = dict( 84 | mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) 85 | crop_size = (512, 512) 86 | train_pipeline = [ 87 | dict(type='LoadImageFromFile'), 88 | dict(type='LoadAnnotations', reduce_zero_label=True), 89 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 90 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 91 | dict(type='RandomFlip', prob=0.5), 92 | dict(type='PhotoMetricDistortion'), 93 | dict(type='Normalize', **img_norm_cfg), 94 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 95 | dict(type='DefaultFormatBundle'), 96 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 97 | ] 98 | test_pipeline = [ 99 | dict(type='LoadImageFromFile'), 100 | dict( 101 | type='MultiScaleFlipAug', 102 | img_scale=(2048, 512), 103 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 104 | flip=False, 105 | transforms=[ 106 | dict(type='Resize', keep_ratio=True), 107 | dict(type='RandomFlip'), 108 | dict(type='Normalize', **img_norm_cfg), 109 | dict(type='ImageToTensor', keys=['img']), 110 | dict(type='Collect', keys=['img']), 111 | ]) 112 | ] 113 | data = dict( 114 | samples_per_gpu=2, 115 | workers_per_gpu=4, 116 | train=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | img_dir='images/training', 120 | ann_dir='annotations/training', 121 | pipeline=train_pipeline), 122 | val=dict( 123 | type=dataset_type, 124 | data_root=data_root, 125 | img_dir='images/validation', 126 | ann_dir='annotations/validation', 127 | pipeline=test_pipeline), 128 | test=dict( 129 | type=dataset_type, 130 | data_root=data_root, 131 | img_dir='images/validation', 132 | ann_dir='annotations/validation', 133 | pipeline=test_pipeline)) 134 | 135 | evaluation = dict(interval=16000, metric='mIoU') -------------------------------------------------------------------------------- /dense_tasks/Segmentation/mmseg/models/decode_heads/ocr_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from mmcv.cnn import ConvModule 6 | 7 | from mmseg.ops import resize 8 | from ..builder import HEADS 9 | from ..utils import SelfAttentionBlock as _SelfAttentionBlock 10 | from .cascade_decode_head import BaseCascadeDecodeHead 11 | 12 | 13 | class SpatialGatherModule(nn.Module): 14 | """Aggregate the context features according to the initial predicted 15 | probability distribution. 16 | 17 | Employ the soft-weighted method to aggregate the context. 18 | """ 19 | 20 | def __init__(self, scale): 21 | super(SpatialGatherModule, self).__init__() 22 | self.scale = scale 23 | 24 | def forward(self, feats, probs): 25 | """Forward function.""" 26 | batch_size, num_classes, height, width = probs.size() 27 | channels = feats.size(1) 28 | probs = probs.view(batch_size, num_classes, -1) 29 | feats = feats.view(batch_size, channels, -1) 30 | # [batch_size, height*width, num_classes] 31 | feats = feats.permute(0, 2, 1) 32 | # [batch_size, channels, height*width] 33 | probs = F.softmax(self.scale * probs, dim=2) 34 | # [batch_size, channels, num_classes] 35 | ocr_context = torch.matmul(probs, feats) 36 | ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3) 37 | return ocr_context 38 | 39 | 40 | class ObjectAttentionBlock(_SelfAttentionBlock): 41 | """Make a OCR used SelfAttentionBlock.""" 42 | 43 | def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, 44 | act_cfg): 45 | if scale > 1: 46 | query_downsample = nn.MaxPool2d(kernel_size=scale) 47 | else: 48 | query_downsample = None 49 | super(ObjectAttentionBlock, self).__init__( 50 | key_in_channels=in_channels, 51 | query_in_channels=in_channels, 52 | channels=channels, 53 | out_channels=in_channels, 54 | share_key_query=False, 55 | query_downsample=query_downsample, 56 | key_downsample=None, 57 | key_query_num_convs=2, 58 | key_query_norm=True, 59 | value_out_num_convs=1, 60 | value_out_norm=True, 61 | matmul_norm=True, 62 | with_out=True, 63 | conv_cfg=conv_cfg, 64 | norm_cfg=norm_cfg, 65 | act_cfg=act_cfg) 66 | self.bottleneck = ConvModule( 67 | in_channels * 2, 68 | in_channels, 69 | 1, 70 | conv_cfg=self.conv_cfg, 71 | norm_cfg=self.norm_cfg, 72 | act_cfg=self.act_cfg) 73 | 74 | def forward(self, query_feats, key_feats): 75 | """Forward function.""" 76 | context = super(ObjectAttentionBlock, 77 | self).forward(query_feats, key_feats) 78 | output = self.bottleneck(torch.cat([context, query_feats], dim=1)) 79 | if self.query_downsample is not None: 80 | output = resize(query_feats) 81 | 82 | return output 83 | 84 | 85 | @HEADS.register_module() 86 | class OCRHead(BaseCascadeDecodeHead): 87 | """Object-Contextual Representations for Semantic Segmentation. 88 | 89 | This head is the implementation of `OCRNet 90 | `_. 91 | 92 | Args: 93 | ocr_channels (int): The intermediate channels of OCR block. 94 | scale (int): The scale of probability map in SpatialGatherModule in 95 | Default: 1. 96 | """ 97 | 98 | def __init__(self, ocr_channels, scale=1, **kwargs): 99 | super(OCRHead, self).__init__(**kwargs) 100 | self.ocr_channels = ocr_channels 101 | self.scale = scale 102 | self.object_context_block = ObjectAttentionBlock( 103 | self.channels, 104 | self.ocr_channels, 105 | self.scale, 106 | conv_cfg=self.conv_cfg, 107 | norm_cfg=self.norm_cfg, 108 | act_cfg=self.act_cfg) 109 | self.spatial_gather_module = SpatialGatherModule(self.scale) 110 | 111 | self.bottleneck = ConvModule( 112 | self.in_channels, 113 | self.channels, 114 | 3, 115 | padding=1, 116 | conv_cfg=self.conv_cfg, 117 | norm_cfg=self.norm_cfg, 118 | act_cfg=self.act_cfg) 119 | 120 | def forward(self, inputs, prev_output): 121 | """Forward function.""" 122 | x = self._transform_inputs(inputs) 123 | feats = self.bottleneck(x) 124 | context = self.spatial_gather_module(feats, prev_output) 125 | object_context = self.object_context_block(feats, context) 126 | output = self.cls_seg(object_context) 127 | 128 | return output 129 | --------------------------------------------------------------------------------