├── models
    ├── __init__.py
    └── losses.py
├── datasets
    ├── __init__.py
    ├── vtab.py
    ├── functional.py
    ├── food101.py
    ├── dtd.py
    └── volume_transforms.py
├── logo.png
├── video_datasets
    ├── load_binary.py
    └── video_datasets.py
├── dense_tasks
    └── Segmentation
    │   ├── mmseg
    │       ├── core
    │       │   ├── utils
    │       │   │   ├── __init__.py
    │       │   │   └── misc.py
    │       │   ├── __init__.py
    │       │   ├── seg
    │       │   │   ├── sampler
    │       │   │   │   ├── __init__.py
    │       │   │   │   ├── base_pixel_sampler.py
    │       │   │   │   └── ohem_pixel_sampler.py
    │       │   │   ├── __init__.py
    │       │   │   └── builder.py
    │       │   └── evaluation
    │       │   │   └── __init__.py
    │       ├── ops
    │       │   ├── __init__.py
    │       │   ├── wrappers.py
    │       │   └── encoding.py
    │       ├── utils
    │       │   ├── __init__.py
    │       │   ├── collect_env.py
    │       │   └── logger.py
    │       ├── models
    │       │   ├── segmentors
    │       │   │   ├── __init__.py
    │       │   │   └── cascade_encoder_decoder.py
    │       │   ├── necks
    │       │   │   ├── __init__.py
    │       │   │   ├── multilevel_neck.py
    │       │   │   └── mla_neck.py
    │       │   ├── __init__.py
    │       │   ├── utils
    │       │   │   ├── __init__.py
    │       │   │   ├── shape_convert.py
    │       │   │   ├── make_divisible.py
    │       │   │   ├── se_layer.py
    │       │   │   ├── res_layer.py
    │       │   │   └── up_conv_block.py
    │       │   ├── losses
    │       │   │   ├── __init__.py
    │       │   │   ├── accuracy.py
    │       │   │   └── utils.py
    │       │   ├── backbones
    │       │   │   ├── __init__.py
    │       │   │   └── timm_backbone.py
    │       │   ├── decode_heads
    │       │   │   ├── __init__.py
    │       │   │   ├── cc_head.py
    │       │   │   ├── nl_head.py
    │       │   │   ├── gc_head.py
    │       │   │   ├── segformer_head.py
    │       │   │   ├── setr_mla_head.py
    │       │   │   ├── cascade_decode_head.py
    │       │   │   ├── sep_fcn_head.py
    │       │   │   ├── fpn_head.py
    │       │   │   ├── fcn_head.py
    │       │   │   ├── setr_up_head.py
    │       │   │   ├── lraspp_head.py
    │       │   │   ├── psp_head.py
    │       │   │   ├── aspp_head.py
    │       │   │   ├── sep_aspp_head.py
    │       │   │   ├── stdc_head.py
    │       │   │   ├── uper_head.py
    │       │   │   └── ocr_head.py
    │       │   └── builder.py
    │       ├── datasets
    │       │   ├── pipelines
    │       │   │   ├── formating.py
    │       │   │   ├── __init__.py
    │       │   │   └── compose.py
    │       │   ├── dark_zurich.py
    │       │   ├── night_driving.py
    │       │   ├── hrf.py
    │       │   ├── stare.py
    │       │   ├── drive.py
    │       │   ├── chase_db1.py
    │       │   ├── __init__.py
    │       │   ├── voc.py
    │       │   └── loveda.py
    │       ├── apis
    │       │   └── __init__.py
    │       ├── version.py
    │       └── __init__.py
    │   ├── mmcv_custom
    │       ├── apex_runner
    │       │   ├── __init__.py
    │       │   ├── optimizer.py
    │       │   ├── checkpoint.py
    │       │   └── apex_iter_based_runner.py
    │       ├── __init__.py
    │       └── layer_decay_optimizer_constructor.py
    │   ├── tools
    │       ├── dist_test.sh
    │       ├── seg_train.sh
    │       └── seg_train_coco-stuff164k.sh
    │   ├── configs
    │       ├── _base_
    │       │   ├── default_runtime.py
    │       │   ├── schedules
    │       │   │   ├── schedule_80k.py
    │       │   │   └── schedule_160k.py
    │       │   ├── datasets
    │       │   │   └── ade20k.py
    │       │   └── models
    │       │   │   └── upernet_beit.py
    │       └── beit
    │       │   └── upernet
    │       │       ├── our_vit_coco-stuff164k.py
    │       │       └── our_vit.py
    │   └── README.md
├── measure_speed.sh
├── train_vtab.sh
├── util
    ├── lr_sched.py
    ├── metrics.py
    ├── logger.py
    ├── crop.py
    ├── lars.py
    ├── datasets.py
    └── lr_decay.py
├── train_video.sh
├── train_IN21K.sh
├── configs.py
├── .gitignore
├── requirements.txt
└── README.md


/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NUS-HPC-AI-Lab/Dynamic-Tuning/HEAD/logo.png


--------------------------------------------------------------------------------
/video_datasets/load_binary.py:
--------------------------------------------------------------------------------
1 | def load_binary(path):
2 |     with open(path, 'rb') as f:
3 |         return f.read()
4 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .misc import add_prefix
3 | 
4 | __all__ = ['add_prefix']
5 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/ops/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .encoding import Encoding
3 | from .wrappers import Upsample, resize
4 | 
5 | __all__ = ['Upsample', 'resize', 'Encoding']
6 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/core/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .evaluation import *  # noqa: F401, F403
3 | from .seg import *  # noqa: F401, F403
4 | from .utils import *  # noqa: F401, F403
5 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .collect_env import collect_env
3 | from .logger import get_root_logger
4 | 
5 | __all__ = ['get_root_logger', 'collect_env']
6 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/core/seg/sampler/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base_pixel_sampler import BasePixelSampler
3 | from .ohem_pixel_sampler import OHEMPixelSampler
4 | 
5 | __all__ = ['BasePixelSampler', 'OHEMPixelSampler']
6 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/core/seg/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .builder import build_pixel_sampler
3 | from .sampler import BasePixelSampler, OHEMPixelSampler
4 | 
5 | __all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler']
6 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmcv_custom/apex_runner/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 | from .checkpoint import save_checkpoint
3 | from .apex_iter_based_runner import IterBasedRunnerAmp
4 | 
5 | 
6 | __all__ = [
7 |     'save_checkpoint', 'IterBasedRunnerAmp', 
8 | ]
9 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/segmentors/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .base import BaseSegmentor
3 | from .cascade_encoder_decoder import CascadeEncoderDecoder
4 | from .encoder_decoder import EncoderDecoder
5 | 
6 | __all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder']
7 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | from .fpn import FPN
3 | from .ic_neck import ICNeck
4 | from .jpu import JPU
5 | from .mla_neck import MLANeck
6 | from .multilevel_neck import MultiLevelNeck
7 | 
8 | __all__ = ['FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck', 'JPU']
9 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
10 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/core/seg/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import Registry, build_from_cfg
 3 | 
 4 | PIXEL_SAMPLERS = Registry('pixel sampler')
 5 | 
 6 | 
 7 | def build_pixel_sampler(cfg, **default_args):
 8 |     """Build pixel sampler for segmentation map."""
 9 |     return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args)
10 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/pipelines/formating.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # flake8: noqa
 3 | import warnings
 4 | 
 5 | from .formatting import *
 6 | 
 7 | warnings.warn('DeprecationWarning: mmseg.datasets.pipelines.formating will be '
 8 |               'deprecated in 2021, please replace it with '
 9 |               'mmseg.datasets.pipelines.formatting.')
10 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/tools/seg_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG="configs/beit/upernet/our_vit.py"
 4 | GPUS=${GPUS:-8}
 5 | PORT=$((12000 + $RANDOM % 20000))
 6 | 
 7 | CLUSTER=True \
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/seg_train.py $CONFIG --launcher pytorch --finetune "VIT_BASE_IN21K"
11 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # yapf:disable
 2 | log_config = dict(
 3 |     interval=50,
 4 |     hooks=[
 5 |         dict(type='TextLoggerHook', by_epoch=False),
 6 |         # dict(type='TensorboardLoggerHook')
 7 |     ])
 8 | # yapf:enable
 9 | dist_params = dict(backend='nccl')
10 | log_level = 'INFO'
11 | load_from = None
12 | resume_from = None
13 | workflow = [('train', 1)]
14 | cudnn_benchmark = True
15 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/core/seg/sampler/base_pixel_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from abc import ABCMeta, abstractmethod
 3 | 
 4 | 
 5 | class BasePixelSampler(metaclass=ABCMeta):
 6 |     """Base class of pixel sampler."""
 7 | 
 8 |     def __init__(self, **kwargs):
 9 |         pass
10 | 
11 |     @abstractmethod
12 |     def sample(self, seg_logit, seg_label):
13 |         """Placeholder for sample function."""
14 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
3 | optimizer_config = dict()
4 | # learning policy
5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
6 | # runtime settings
7 | runner = dict(type='IterBasedRunner', max_iters=80000)
8 | checkpoint_config = dict(by_epoch=False, interval=8000)
9 | evaluation = dict(interval=8000, metric='mIoU')


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/dark_zurich.py:
--------------------------------------------------------------------------------
 1 | from .builder import DATASETS
 2 | from .cityscapes import CityscapesDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module()
 6 | class DarkZurichDataset(CityscapesDataset):
 7 |     """DarkZurichDataset dataset."""
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         super().__init__(
11 |             img_suffix='_rgb_anon.png',
12 |             seg_map_suffix='_gt_labelTrainIds.png',
13 |             **kwargs)
14 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .checkpoint import load_checkpoint
 4 | from .layer_decay_optimizer_constructor import LayerDecayOptimizerConstructor
 5 | from .resize_transform import SETR_Resize
 6 | from .apex_runner.optimizer import DistOptimizerHook
 7 | from .train_api import train_segmentor
 8 | 
 9 | __all__ = ['load_checkpoint', 'LayerDecayOptimizerConstructor', 'SETR_Resize', 'DistOptimizerHook', 'train_segmentor']
10 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/night_driving.py:
--------------------------------------------------------------------------------
 1 | from .builder import DATASETS
 2 | from .cityscapes import CityscapesDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module()
 6 | class NightDrivingDataset(CityscapesDataset):
 7 |     """NightDrivingDataset dataset."""
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         super().__init__(
11 |             img_suffix='_leftImg8bit.png',
12 |             seg_map_suffix='_gtCoarse_labelTrainIds.png',
13 |             **kwargs)
14 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=160000)
 8 | checkpoint_config = dict(by_epoch=False, interval=16000)
 9 | evaluation = dict(interval=16000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/tools/seg_train_coco-stuff164k.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG="configs/beit/upernet/our_vit_coco-stuff164k.py"
 4 | GPUS=${GPUS:-8}
 5 | PORT=$((12000 + $RANDOM % 20000))
 6 | 
 7 | CLUSTER=True \
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/seg_train.py $CONFIG --launcher pytorch --finetune "VIT_BASE_IN21K" --dataset_name "coco-stuff164k"
11 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def add_prefix(inputs, prefix):
 3 |     """Add prefix for dict.
 4 | 
 5 |     Args:
 6 |         inputs (dict): The input dict with str keys.
 7 |         prefix (str): The prefix to add.
 8 | 
 9 |     Returns:
10 | 
11 |         dict: The dict with keys updated with ``prefix``.
12 |     """
13 | 
14 |     outputs = dict()
15 |     for name, value in inputs.items():
16 |         outputs[f'{prefix}.{name}'] = value
17 | 
18 |     return outputs
19 | 


--------------------------------------------------------------------------------
/measure_speed.sh:
--------------------------------------------------------------------------------
 1 | DATASET=cifar100_full
 2 | CLUSTER=True \
 3 | 
 4 | python speed.py \
 5 |         --batch_size 128 \
 6 |         --cls_token \
 7 |         --finetune VIT_BASE_IN21K \
 8 |         --dist_eval \
 9 |         --output_dir "./output/IN21K/0.5/${DATASET}" \
10 |         --drop_path 0.0 \
11 |         --blr 1e-3 \
12 |         --weight_decay 0.01 \
13 |         --dataset "${DATASET}" \
14 |         --ffn_adapt \
15 |         --auto_remove \
16 |         --token_target_ratio 0.5 \
17 |         --eval \
18 |         --eval_ckpt "your_ckpt"
19 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .class_names import get_classes, get_palette
 3 | from .eval_hooks import DistEvalHook, EvalHook
 4 | from .metrics import (eval_metrics, intersect_and_union, mean_dice,
 5 |                       mean_fscore, mean_iou, pre_eval_to_metrics)
 6 | 
 7 | __all__ = [
 8 |     'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore',
 9 |     'eval_metrics', 'get_classes', 'get_palette', 'pre_eval_to_metrics',
10 |     'intersect_and_union'
11 | ]
12 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .inference import inference_segmentor, init_segmentor, show_result_pyplot
 3 | from .test import multi_gpu_test, single_gpu_test
 4 | from .train import (get_root_logger, init_random_seed, set_random_seed,
 5 |                     train_segmentor)
 6 | 
 7 | __all__ = [
 8 |     'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor',
 9 |     'inference_segmentor', 'multi_gpu_test', 'single_gpu_test',
10 |     'show_result_pyplot', 'init_random_seed'
11 | ]
12 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.utils import collect_env as collect_base_env
 3 | from mmcv.utils import get_git_hash
 4 | 
 5 | import mmseg
 6 | 
 7 | 
 8 | def collect_env():
 9 |     """Collect the information of the running environments."""
10 |     env_info = collect_base_env()
11 |     env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}'
12 | 
13 |     return env_info
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     for name, val in collect_env().items():
18 |         print('{}: {}'.format(name, val))
19 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | 
 3 | __version__ = '0.20.2'
 4 | 
 5 | 
 6 | def parse_version_info(version_str):
 7 |     version_info = []
 8 |     for x in version_str.split('.'):
 9 |         if x.isdigit():
10 |             version_info.append(int(x))
11 |         elif x.find('rc') != -1:
12 |             patch_version = x.split('rc')
13 |             version_info.append(int(patch_version[0]))
14 |             version_info.append(f'rc{patch_version[1]}')
15 |     return tuple(version_info)
16 | 
17 | 
18 | version_info = parse_version_info(__version__)
19 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .backbones import *  # noqa: F401,F403
 3 | from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone,
 4 |                       build_head, build_loss, build_segmentor)
 5 | from .decode_heads import *  # noqa: F401,F403
 6 | from .losses import *  # noqa: F401,F403
 7 | from .necks import *  # noqa: F401,F403
 8 | from .segmentors import *  # noqa: F401,F403
 9 | 
10 | __all__ = [
11 |     'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone',
12 |     'build_head', 'build_loss', 'build_segmentor'
13 | ]
14 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .embed import PatchEmbed
 2 | from .inverted_residual import InvertedResidual, InvertedResidualV3
 3 | from .make_divisible import make_divisible
 4 | from .res_layer import ResLayer
 5 | from .se_layer import SELayer
 6 | from .self_attention_block import SelfAttentionBlock
 7 | from .shape_convert import nchw_to_nlc, nlc_to_nchw
 8 | from .up_conv_block import UpConvBlock
 9 | 
10 | __all__ = [
11 |     'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual',
12 |     'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed',
13 |     'nchw_to_nlc', 'nlc_to_nchw'
14 | ]
15 | 


--------------------------------------------------------------------------------
/train_vtab.sh:
--------------------------------------------------------------------------------
 1 | GPU_COUNT=8
 2 | DATASETS=(cifar caltech101 dtd oxford_flowers102 oxford_iiit_pet svhn sun397 patch_camelyon eurosat resisc45 diabetic_retinopathy clevr_count clevr_dist dmlab kitti dsprites_loc dsprites_ori smallnorb_azi smallnorb_ele)
 3 | i=0
 4 | 
 5 | for DATASET in "${DATASETS[@]}"
 6 | do
 7 |     GPU_ID=$((i % GPU_COUNT))
 8 |     CLUSTER=True CUDA_VISIBLE_DEVICES=$GPU_ID python main_vtab.py --batch_size 64 --cls_token --finetune VIT_BASE_IN21K --dist_eval --output_dir "./output_vtab/${DATASET}" --drop_path 0.0 --dataset $DATASET --ffn_num 16 --ffn_adapt --auto_remove --eval_freq 1 --token_target_ratio 0.5 &
 9 |     i=$((i + 1))
10 | done
11 | wait
12 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .accuracy import Accuracy, accuracy
 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 4 |                                  cross_entropy, mask_cross_entropy)
 5 | from .dice_loss import DiceLoss
 6 | from .focal_loss import FocalLoss
 7 | from .lovasz_loss import LovaszLoss
 8 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
 9 | 
10 | __all__ = [
11 |     'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
12 |     'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss',
13 |     'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss',
14 |     'FocalLoss'
15 | ]
16 | 


--------------------------------------------------------------------------------
/util/lr_sched.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import math
 8 | 
 9 | def adjust_learning_rate(optimizer, epoch, args):
10 |     """Decay the learning rate with half-cycle cosine after warmup"""
11 |     if epoch < args.warmup_epochs:
12 |         lr = args.lr * epoch / args.warmup_epochs 
13 |     else:
14 |         lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \
15 |             (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs)))
16 |     for param_group in optimizer.param_groups:
17 |         if "lr_scale" in param_group:
18 |             param_group["lr"] = lr * param_group["lr_scale"]
19 |         else:
20 |             param_group["lr"] = lr
21 |     return lr
22 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/hrf.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | 
 4 | from .builder import DATASETS
 5 | from .custom import CustomDataset
 6 | 
 7 | 
 8 | @DATASETS.register_module()
 9 | class HRFDataset(CustomDataset):
10 |     """HRF dataset.
11 | 
12 |     In segmentation map annotation for HRF, 0 stands for background, which is
13 |     included in 2 categories. ``reduce_zero_label`` is fixed to False. The
14 |     ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
15 |     '.png'.
16 |     """
17 | 
18 |     CLASSES = ('background', 'vessel')
19 | 
20 |     PALETTE = [[120, 120, 120], [6, 230, 230]]
21 | 
22 |     def __init__(self, **kwargs):
23 |         super(HRFDataset, self).__init__(
24 |             img_suffix='.png',
25 |             seg_map_suffix='.png',
26 |             reduce_zero_label=False,
27 |             **kwargs)
28 |         assert osp.exists(self.img_dir)
29 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/stare.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | 
 4 | from .builder import DATASETS
 5 | from .custom import CustomDataset
 6 | 
 7 | 
 8 | @DATASETS.register_module()
 9 | class STAREDataset(CustomDataset):
10 |     """STARE dataset.
11 | 
12 |     In segmentation map annotation for STARE, 0 stands for background, which is
13 |     included in 2 categories. ``reduce_zero_label`` is fixed to False. The
14 |     ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
15 |     '.ah.png'.
16 |     """
17 | 
18 |     CLASSES = ('background', 'vessel')
19 | 
20 |     PALETTE = [[120, 120, 120], [6, 230, 230]]
21 | 
22 |     def __init__(self, **kwargs):
23 |         super(STAREDataset, self).__init__(
24 |             img_suffix='.png',
25 |             seg_map_suffix='.ah.png',
26 |             reduce_zero_label=False,
27 |             **kwargs)
28 |         assert osp.exists(self.img_dir)
29 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/drive.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | 
 4 | from .builder import DATASETS
 5 | from .custom import CustomDataset
 6 | 
 7 | 
 8 | @DATASETS.register_module()
 9 | class DRIVEDataset(CustomDataset):
10 |     """DRIVE dataset.
11 | 
12 |     In segmentation map annotation for DRIVE, 0 stands for background, which is
13 |     included in 2 categories. ``reduce_zero_label`` is fixed to False. The
14 |     ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
15 |     '_manual1.png'.
16 |     """
17 | 
18 |     CLASSES = ('background', 'vessel')
19 | 
20 |     PALETTE = [[120, 120, 120], [6, 230, 230]]
21 | 
22 |     def __init__(self, **kwargs):
23 |         super(DRIVEDataset, self).__init__(
24 |             img_suffix='.png',
25 |             seg_map_suffix='_manual1.png',
26 |             reduce_zero_label=False,
27 |             **kwargs)
28 |         assert osp.exists(self.img_dir)
29 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/chase_db1.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | 
 4 | from .builder import DATASETS
 5 | from .custom import CustomDataset
 6 | 
 7 | 
 8 | @DATASETS.register_module()
 9 | class ChaseDB1Dataset(CustomDataset):
10 |     """Chase_db1 dataset.
11 | 
12 |     In segmentation map annotation for Chase_db1, 0 stands for background,
13 |     which is included in 2 categories. ``reduce_zero_label`` is fixed to False.
14 |     The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to
15 |     '_1stHO.png'.
16 |     """
17 | 
18 |     CLASSES = ('background', 'vessel')
19 | 
20 |     PALETTE = [[120, 120, 120], [6, 230, 230]]
21 | 
22 |     def __init__(self, **kwargs):
23 |         super(ChaseDB1Dataset, self).__init__(
24 |             img_suffix='.png',
25 |             seg_map_suffix='_1stHO.png',
26 |             reduce_zero_label=False,
27 |             **kwargs)
28 |         assert osp.exists(self.img_dir)
29 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .compose import Compose
 3 | from .formatting import (Collect, ImageToTensor, ToDataContainer, ToTensor,
 4 |                          Transpose, to_tensor)
 5 | from .loading import LoadAnnotations, LoadImageFromFile
 6 | from .test_time_aug import MultiScaleFlipAug
 7 | from .transforms import (CLAHE, AdjustGamma, Normalize, Pad,
 8 |                          PhotoMetricDistortion, RandomCrop, RandomCutOut,
 9 |                          RandomFlip, RandomRotate, Rerange, Resize, RGB2Gray,
10 |                          SegRescale)
11 | 
12 | __all__ = [
13 |     'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
14 |     'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile',
15 |     'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop',
16 |     'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate',
17 |     'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray', 'RandomCutOut'
18 | ]
19 | 


--------------------------------------------------------------------------------
/util/metrics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import functional as F
 3 | 
 4 | def accuracy(output, target, topk=(1,)):
 5 |     """Computes the accuracy over the k top predictions for the specified values of k"""
 6 |     maxk = min(max(topk), output.size()[1])
 7 |     batch_size = target.size(0)
 8 |     _, pred = output.topk(maxk, 1, True, True)
 9 |     pred = pred.t()
10 |     correct = pred.eq(target.reshape(1, -1).expand_as(pred))
11 |     return [correct[:min(k, maxk)].reshape(-1).float().sum(0) * 100. / batch_size for k in topk]
12 | 
13 | 
14 | def mean_per_class_accuracy(pred, target, num_classes):
15 |     pred_score, pred_label = torch.topk(pred, k=1)
16 |     pred_label = pred_label.flatten()
17 |     
18 |     pred_label = F.one_hot(pred_label, num_classes)
19 |     target_label = F.one_hot(target, num_classes)
20 |     class_correct = (pred_label & target_label)
21 |     
22 |     tp_sum = class_correct.sum(0)
23 |     gt_sum = target_label.sum(0)
24 |     recall = tp_sum / torch.clamp(gt_sum, min=1).float() * 100
25 |     recall = recall.mean(0)
26 |     return recall


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import logging
 3 | 
 4 | from mmcv.utils import get_logger
 5 | 
 6 | 
 7 | def get_root_logger(log_file=None, log_level=logging.INFO):
 8 |     """Get the root logger.
 9 | 
10 |     The logger will be initialized if it has not been initialized. By default a
11 |     StreamHandler will be added. If `log_file` is specified, a FileHandler will
12 |     also be added. The name of the root logger is the top-level package name,
13 |     e.g., "mmseg".
14 | 
15 |     Args:
16 |         log_file (str | None): The log filename. If specified, a FileHandler
17 |             will be added to the root logger.
18 |         log_level (int): The root logger level. Note that only the process of
19 |             rank 0 is affected, while other processes will set the level to
20 |             "Error" and be silent most of the time.
21 | 
22 |     Returns:
23 |         logging.Logger: The root logger.
24 |     """
25 | 
26 |     logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level)
27 | 
28 |     return logger
29 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/utils/shape_convert.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def nlc_to_nchw(x, hw_shape):
 3 |     """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor.
 4 | 
 5 |     Args:
 6 |         x (Tensor): The input tensor of shape [N, L, C] before conversion.
 7 |         hw_shape (Sequence[int]): The height and width of output feature map.
 8 | 
 9 |     Returns:
10 |         Tensor: The output tensor of shape [N, C, H, W] after conversion.
11 |     """
12 |     H, W = hw_shape
13 |     assert len(x.shape) == 3
14 |     B, L, C = x.shape
15 |     assert L == H * W, 'The seq_len doesn\'t match H, W'
16 |     return x.transpose(1, 2).reshape(B, C, H, W)
17 | 
18 | 
19 | def nchw_to_nlc(x):
20 |     """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor.
21 | 
22 |     Args:
23 |         x (Tensor): The input tensor of shape [N, C, H, W] before conversion.
24 | 
25 |     Returns:
26 |         Tensor: The output tensor of shape [N, L, C] after conversion.
27 |     """
28 |     assert len(x.shape) == 4
29 |     return x.flatten(2).transpose(1, 2).contiguous()
30 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .bisenetv1 import BiSeNetV1
 3 | from .bisenetv2 import BiSeNetV2
 4 | from .cgnet import CGNet
 5 | from .erfnet import ERFNet
 6 | from .fast_scnn import FastSCNN
 7 | from .hrnet import HRNet
 8 | from .icnet import ICNet
 9 | from .mit import MixVisionTransformer
10 | from .mobilenet_v2 import MobileNetV2
11 | from .mobilenet_v3 import MobileNetV3
12 | from .resnest import ResNeSt
13 | from .resnet import ResNet, ResNetV1c, ResNetV1d
14 | from .resnext import ResNeXt
15 | from .stdc import STDCContextPathNet, STDCNet
16 | from .swin import SwinTransformer
17 | from .timm_backbone import TIMMBackbone
18 | from .twins import PCPVT, SVT
19 | from .unet import UNet
20 | from .vit import VisionTransformer
21 | 
22 | __all__ = [
23 |     'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN',
24 |     'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3',
25 |     'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer',
26 |     'BiSeNetV1', 'BiSeNetV2', 'ICNet', 'TIMMBackbone', 'ERFNet', 'PCPVT',
27 |     'SVT', 'STDCNet', 'STDCContextPathNet'
28 | ]
29 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .ade import ADE20KDataset
 3 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
 4 | from .chase_db1 import ChaseDB1Dataset
 5 | from .cityscapes import CityscapesDataset
 6 | from .coco_stuff import COCOStuffDataset
 7 | from .custom import CustomDataset
 8 | from .dark_zurich import DarkZurichDataset
 9 | from .dataset_wrappers import ConcatDataset, RepeatDataset
10 | from .drive import DRIVEDataset
11 | from .hrf import HRFDataset
12 | from .loveda import LoveDADataset
13 | from .night_driving import NightDrivingDataset
14 | from .pascal_context import PascalContextDataset, PascalContextDataset59
15 | from .stare import STAREDataset
16 | from .voc import PascalVOCDataset
17 | 
18 | __all__ = [
19 |     'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
20 |     'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset',
21 |     'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset',
22 |     'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset',
23 |     'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset',
24 |     'COCOStuffDataset', 'LoveDADataset'
25 | ]
26 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | 
 4 | from .builder import DATASETS
 5 | from .custom import CustomDataset
 6 | 
 7 | 
 8 | @DATASETS.register_module()
 9 | class PascalVOCDataset(CustomDataset):
10 |     """Pascal VOC dataset.
11 | 
12 |     Args:
13 |         split (str): Split txt file for Pascal VOC.
14 |     """
15 | 
16 |     CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
17 |                'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
18 |                'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
19 |                'train', 'tvmonitor')
20 | 
21 |     PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
22 |                [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0],
23 |                [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128],
24 |                [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0],
25 |                [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]
26 | 
27 |     def __init__(self, split, **kwargs):
28 |         super(PascalVOCDataset, self).__init__(
29 |             img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs)
30 |         assert osp.exists(self.img_dir) and self.split is not None
31 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmcv_custom/apex_runner/optimizer.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner import OptimizerHook, HOOKS
 2 | try:
 3 |     import apex
 4 | except:
 5 |     print('apex is not installed')
 6 | 
 7 | 
 8 | @HOOKS.register_module()
 9 | class DistOptimizerHook(OptimizerHook):
10 |     """Optimizer hook for distributed training."""
11 | 
12 |     def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False):
13 |         self.grad_clip = grad_clip
14 |         self.coalesce = coalesce
15 |         self.bucket_size_mb = bucket_size_mb
16 |         self.update_interval = update_interval
17 |         self.use_fp16 = use_fp16
18 | 
19 |     def before_run(self, runner):
20 |         runner.optimizer.zero_grad()
21 | 
22 |     def after_train_iter(self, runner):
23 |         runner.outputs['loss'] /= self.update_interval
24 |         if self.use_fp16:
25 |             with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
26 |                 scaled_loss.backward()
27 |         else:
28 |             runner.outputs['loss'].backward()
29 |         if self.every_n_iters(runner, self.update_interval):
30 |             if self.grad_clip is not None:
31 |                 self.clip_grads(runner.model.parameters())
32 |             runner.optimizer.step()
33 |             runner.optimizer.zero_grad()
34 | 


--------------------------------------------------------------------------------
/util/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import logging
 4 | import functools
 5 | from termcolor import colored
 6 | 
 7 | 
 8 | @functools.lru_cache()
 9 | def create_logger(output_dir, dist_rank=0, name=''):
10 |     # create logger
11 |     logger = logging.getLogger(name)
12 |     logger.setLevel(logging.DEBUG)
13 |     logger.propagate = False
14 | 
15 |     # create formatter
16 |     fmt = '[%(asctime)s %(name)s] (%(filename)s %(lineno)d): %(levelname)s %(message)s'
17 |     color_fmt = colored('[%(asctime)s %(name)s]', 'green') + \
18 |                 colored('(%(filename)s %(lineno)d)', 'yellow') + ': %(levelname)s %(message)s'
19 | 
20 |     # create console handlers for master process
21 |     if dist_rank == 0:
22 |         console_handler = logging.StreamHandler(sys.stdout)
23 |         console_handler.setLevel(logging.DEBUG)
24 |         console_handler.setFormatter(
25 |             logging.Formatter(fmt=color_fmt, datefmt='%Y-%m-%d %H:%M:%S'))
26 |         logger.addHandler(console_handler)
27 | 
28 |     # create file handlers
29 |     file_handler = logging.FileHandler(os.path.join(output_dir, f'log_rank{dist_rank}.txt'), mode='a')
30 |     file_handler.setLevel(logging.DEBUG)
31 |     file_handler.setFormatter(logging.Formatter(fmt=fmt, datefmt='%Y-%m-%d %H:%M:%S'))
32 |     logger.addHandler(file_handler)
33 | 
34 |     return logger
35 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/utils/make_divisible.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | def make_divisible(value, divisor, min_value=None, min_ratio=0.9):
 3 |     """Make divisible function.
 4 | 
 5 |     This function rounds the channel number to the nearest value that can be
 6 |     divisible by the divisor. It is taken from the original tf repo. It ensures
 7 |     that all layers have a channel number that is divisible by divisor. It can
 8 |     be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py  # noqa
 9 | 
10 |     Args:
11 |         value (int): The original channel number.
12 |         divisor (int): The divisor to fully divide the channel number.
13 |         min_value (int): The minimum value of the output channel.
14 |             Default: None, means that the minimum value equal to the divisor.
15 |         min_ratio (float): The minimum ratio of the rounded channel number to
16 |             the original channel number. Default: 0.9.
17 | 
18 |     Returns:
19 |         int: The modified output channel number.
20 |     """
21 | 
22 |     if min_value is None:
23 |         min_value = divisor
24 |     new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)
25 |     # Make sure that round down does not go down by more than (1-min_ratio).
26 |     if new_value < min_ratio * value:
27 |         new_value += divisor
28 |     return new_value
29 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .ann_head import ANNHead
 3 | from .apc_head import APCHead
 4 | from .aspp_head import ASPPHead
 5 | from .cc_head import CCHead
 6 | from .da_head import DAHead
 7 | from .dm_head import DMHead
 8 | from .dnl_head import DNLHead
 9 | from .dpt_head import DPTHead
10 | from .ema_head import EMAHead
11 | from .enc_head import EncHead
12 | from .fcn_head import FCNHead
13 | from .fpn_head import FPNHead
14 | from .gc_head import GCHead
15 | from .isa_head import ISAHead
16 | from .lraspp_head import LRASPPHead
17 | from .nl_head import NLHead
18 | from .ocr_head import OCRHead
19 | from .point_head import PointHead
20 | from .psa_head import PSAHead
21 | from .psp_head import PSPHead
22 | from .segformer_head import SegformerHead
23 | from .sep_aspp_head import DepthwiseSeparableASPPHead
24 | from .sep_fcn_head import DepthwiseSeparableFCNHead
25 | from .setr_mla_head import SETRMLAHead
26 | from .setr_up_head import SETRUPHead
27 | from .stdc_head import STDCHead
28 | from .uper_head import UPerHead
29 | 
30 | __all__ = [
31 |     'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead',
32 |     'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead',
33 |     'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead',
34 |     'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead',
35 |     'SETRMLAHead', 'DPTHead', 'SETRMLAHead', 'SegformerHead', 'ISAHead',
36 |     'STDCHead'
37 | ]
38 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/cc_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | 
 4 | from ..builder import HEADS
 5 | from .fcn_head import FCNHead
 6 | 
 7 | try:
 8 |     from mmcv.ops import CrissCrossAttention
 9 | except ModuleNotFoundError:
10 |     CrissCrossAttention = None
11 | 
12 | 
13 | @HEADS.register_module()
14 | class CCHead(FCNHead):
15 |     """CCNet: Criss-Cross Attention for Semantic Segmentation.
16 | 
17 |     This head is the implementation of `CCNet
18 |     <https://arxiv.org/abs/1811.11721>`_.
19 | 
20 |     Args:
21 |         recurrence (int): Number of recurrence of Criss Cross Attention
22 |             module. Default: 2.
23 |     """
24 | 
25 |     def __init__(self, recurrence=2, **kwargs):
26 |         if CrissCrossAttention is None:
27 |             raise RuntimeError('Please install mmcv-full for '
28 |                                'CrissCrossAttention ops')
29 |         super(CCHead, self).__init__(num_convs=2, **kwargs)
30 |         self.recurrence = recurrence
31 |         self.cca = CrissCrossAttention(self.channels)
32 | 
33 |     def forward(self, inputs):
34 |         """Forward function."""
35 |         x = self._transform_inputs(inputs)
36 |         output = self.convs[0](x)
37 |         for _ in range(self.recurrence):
38 |             output = self.cca(output)
39 |         output = self.convs[1](output)
40 |         if self.concat_input:
41 |             output = self.conv_cat(torch.cat([x, output], dim=1))
42 |         output = self.cls_seg(output)
43 |         return output
44 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import warnings
 3 | 
 4 | from mmcv.cnn import MODELS as MMCV_MODELS
 5 | from mmcv.cnn.bricks.registry import ATTENTION as MMCV_ATTENTION
 6 | from mmcv.utils import Registry
 7 | 
 8 | MODELS = Registry('models', parent=MMCV_MODELS)
 9 | ATTENTION = Registry('attention', parent=MMCV_ATTENTION)
10 | 
11 | BACKBONES = MODELS
12 | NECKS = MODELS
13 | HEADS = MODELS
14 | LOSSES = MODELS
15 | SEGMENTORS = MODELS
16 | 
17 | 
18 | def build_backbone(cfg):
19 |     """Build backbone."""
20 |     return BACKBONES.build(cfg)
21 | 
22 | 
23 | def build_neck(cfg):
24 |     """Build neck."""
25 |     return NECKS.build(cfg)
26 | 
27 | 
28 | def build_head(cfg):
29 |     """Build head."""
30 |     return HEADS.build(cfg)
31 | 
32 | 
33 | def build_loss(cfg):
34 |     """Build loss."""
35 |     return LOSSES.build(cfg)
36 | 
37 | 
38 | def build_segmentor(cfg, train_cfg=None, test_cfg=None):
39 |     """Build segmentor."""
40 |     if train_cfg is not None or test_cfg is not None:
41 |         warnings.warn(
42 |             'train_cfg and test_cfg is deprecated, '
43 |             'please specify them in model', UserWarning)
44 |     assert cfg.get('train_cfg') is None or train_cfg is None, \
45 |         'train_cfg specified in both outer field and model field '
46 |     assert cfg.get('test_cfg') is None or test_cfg is None, \
47 |         'test_cfg specified in both outer field and model field '
48 |     return SEGMENTORS.build(
49 |         cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))
50 | 


--------------------------------------------------------------------------------
/util/crop.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import math
 8 | 
 9 | import PIL.Image
10 | import torch
11 | 
12 | from torchvision import transforms
13 | from torchvision.transforms import functional as F
14 | 
15 | 
16 | class RandomResizedCrop(transforms.RandomResizedCrop):
17 |     """
18 |     RandomResizedCrop for matching TF/TPU implementation: no for-loop is used.
19 |     This may lead to results different with torchvision's version.
20 |     Following BYOL's TF code:
21 |     https://github.com/deepmind/deepmind-research/blob/master/byol/utils/dataset.py#L206
22 |     """
23 |     @staticmethod
24 |     def get_params(img, scale, ratio):
25 |         assert isinstance(img, PIL.Image.Image)
26 |         # width, height = F._get_image_size(img)
27 |         width, height = img.width, img.height
28 |         area = height * width
29 | 
30 |         target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
31 |         log_ratio = torch.log(torch.tensor(ratio))
32 |         aspect_ratio = torch.exp(
33 |             torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
34 |         ).item()
35 | 
36 |         w = int(round(math.sqrt(target_area * aspect_ratio)))
37 |         h = int(round(math.sqrt(target_area / aspect_ratio)))
38 | 
39 |         w = min(w, width)
40 |         h = min(h, height)
41 | 
42 |         i = torch.randint(0, height - h + 1, size=(1,)).item()
43 |         j = torch.randint(0, width - w + 1, size=(1,)).item()
44 | 
45 |         return i, j, h, w


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import collections
 3 | 
 4 | from mmcv.utils import build_from_cfg
 5 | 
 6 | from ..builder import PIPELINES
 7 | 
 8 | 
 9 | @PIPELINES.register_module()
10 | class Compose(object):
11 |     """Compose multiple transforms sequentially.
12 | 
13 |     Args:
14 |         transforms (Sequence[dict | callable]): Sequence of transform object or
15 |             config dict to be composed.
16 |     """
17 | 
18 |     def __init__(self, transforms):
19 |         assert isinstance(transforms, collections.abc.Sequence)
20 |         self.transforms = []
21 |         for transform in transforms:
22 |             if isinstance(transform, dict):
23 |                 transform = build_from_cfg(transform, PIPELINES)
24 |                 self.transforms.append(transform)
25 |             elif callable(transform):
26 |                 self.transforms.append(transform)
27 |             else:
28 |                 raise TypeError('transform must be callable or a dict')
29 | 
30 |     def __call__(self, data):
31 |         """Call function to apply transforms sequentially.
32 | 
33 |         Args:
34 |             data (dict): A result dict contains the data to transform.
35 | 
36 |         Returns:
37 |            dict: Transformed data.
38 |         """
39 | 
40 |         for t in self.transforms:
41 |             data = t(data)
42 |             if data is None:
43 |                 return None
44 |         return data
45 | 
46 |     def __repr__(self):
47 |         format_string = self.__class__.__name__ + '('
48 |         for t in self.transforms:
49 |             format_string += '\n'
50 |             format_string += f'    {t}'
51 |         format_string += '\n)'
52 |         return format_string
53 | 


--------------------------------------------------------------------------------
/train_video.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | ADAPTER_CHANNEL=$1
 4 | GPUS=${GPUS:-8}
 5 | PORT=$((12000 + $RANDOM % 20000))
 6 | MASTER_ADDR=${MASTER_ADDR:-127.0.0.1}
 7 | 
 8 | 
 9 | 
10 | DATASET=K400
11 | CLUSTER=True \
12 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
13 | python -m torch.distributed.launch \
14 |        --master_addr=$MASTER_ADDR \
15 |        --nproc_per_node=$GPUS \
16 |        --master_port=$PORT \
17 |        --use_env \
18 |        main_video.py \
19 |             --batch_size 16 \
20 |             --cls_token \
21 |             --finetune VIT_BASE_IN21K \
22 |             --dist_eval \
23 |             --output_dir "./video_output/IN21K/0.5/${DATASET}" \
24 |             --drop_path 0.0 \
25 |             --blr 1e-3 \
26 |             --epochs 12 \
27 |             --warmup_epochs 2 \
28 |             --weight_decay 0.01 \
29 |             --dataset "${DATASET}" \
30 |             --ffn_adapt \
31 |             --auto_remove \
32 |             --token_target_ratio 0.5
33 | 
34 | 
35 | DATASET=SSV2
36 | CLUSTER=True \
37 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
38 | python -m torch.distributed.launch \
39 |        --master_addr=$MASTER_ADDR \
40 |        --nproc_per_node=$GPUS \
41 |        --master_port=$PORT \
42 |        --use_env \
43 |        main_video.py \
44 |             --batch_size 16 \
45 |             --cls_token \
46 |             --finetune VIT_BASE_IN21K \
47 |             --dist_eval \
48 |             --output_dir "./video_output/IN21K/0.5/${DATASET}" \
49 |             --drop_path 0.0 \
50 |             --blr 1e-3 \
51 |             --epochs 50 \
52 |             --warmup_epochs 2 \
53 |             --eval_freq 5 \
54 |             --weight_decay 0.01 \
55 |             --dataset "${DATASET}" \
56 |             --ffn_adapt \
57 |             --auto_remove \
58 |             --token_target_ratio 0.5


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/nl_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from mmcv.cnn import NonLocal2d
 4 | 
 5 | from ..builder import HEADS
 6 | from .fcn_head import FCNHead
 7 | 
 8 | 
 9 | @HEADS.register_module()
10 | class NLHead(FCNHead):
11 |     """Non-local Neural Networks.
12 | 
13 |     This head is the implementation of `NLNet
14 |     <https://arxiv.org/abs/1711.07971>`_.
15 | 
16 |     Args:
17 |         reduction (int): Reduction factor of projection transform. Default: 2.
18 |         use_scale (bool): Whether to scale pairwise_weight by
19 |             sqrt(1/inter_channels). Default: True.
20 |         mode (str): The nonlocal mode. Options are 'embedded_gaussian',
21 |             'dot_product'. Default: 'embedded_gaussian.'.
22 |     """
23 | 
24 |     def __init__(self,
25 |                  reduction=2,
26 |                  use_scale=True,
27 |                  mode='embedded_gaussian',
28 |                  **kwargs):
29 |         super(NLHead, self).__init__(num_convs=2, **kwargs)
30 |         self.reduction = reduction
31 |         self.use_scale = use_scale
32 |         self.mode = mode
33 |         self.nl_block = NonLocal2d(
34 |             in_channels=self.channels,
35 |             reduction=self.reduction,
36 |             use_scale=self.use_scale,
37 |             conv_cfg=self.conv_cfg,
38 |             norm_cfg=self.norm_cfg,
39 |             mode=self.mode)
40 | 
41 |     def forward(self, inputs):
42 |         """Forward function."""
43 |         x = self._transform_inputs(inputs)
44 |         output = self.convs[0](x)
45 |         output = self.nl_block(output)
46 |         output = self.convs[1](output)
47 |         if self.concat_input:
48 |             output = self.conv_cat(torch.cat([x, output], dim=1))
49 |         output = self.cls_seg(output)
50 |         return output
51 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/gc_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from mmcv.cnn import ContextBlock
 4 | 
 5 | from ..builder import HEADS
 6 | from .fcn_head import FCNHead
 7 | 
 8 | 
 9 | @HEADS.register_module()
10 | class GCHead(FCNHead):
11 |     """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond.
12 | 
13 |     This head is the implementation of `GCNet
14 |     <https://arxiv.org/abs/1904.11492>`_.
15 | 
16 |     Args:
17 |         ratio (float): Multiplier of channels ratio. Default: 1/4.
18 |         pooling_type (str): The pooling type of context aggregation.
19 |             Options are 'att', 'avg'. Default: 'avg'.
20 |         fusion_types (tuple[str]): The fusion type for feature fusion.
21 |             Options are 'channel_add', 'channel_mul'. Default: ('channel_add',)
22 |     """
23 | 
24 |     def __init__(self,
25 |                  ratio=1 / 4.,
26 |                  pooling_type='att',
27 |                  fusion_types=('channel_add', ),
28 |                  **kwargs):
29 |         super(GCHead, self).__init__(num_convs=2, **kwargs)
30 |         self.ratio = ratio
31 |         self.pooling_type = pooling_type
32 |         self.fusion_types = fusion_types
33 |         self.gc_block = ContextBlock(
34 |             in_channels=self.channels,
35 |             ratio=self.ratio,
36 |             pooling_type=self.pooling_type,
37 |             fusion_types=self.fusion_types)
38 | 
39 |     def forward(self, inputs):
40 |         """Forward function."""
41 |         x = self._transform_inputs(inputs)
42 |         output = self.convs[0](x)
43 |         output = self.gc_block(output)
44 |         output = self.convs[1](output)
45 |         if self.concat_input:
46 |             output = self.conv_cat(torch.cat([x, output], dim=1))
47 |         output = self.cls_seg(output)
48 |         return output
49 | 


--------------------------------------------------------------------------------
/util/lars.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | # --------------------------------------------------------
 7 | # LARS optimizer, implementation from MoCo v3:
 8 | # https://github.com/facebookresearch/moco-v3
 9 | # --------------------------------------------------------
10 | 
11 | import torch
12 | 
13 | 
14 | class LARS(torch.optim.Optimizer):
15 |     """
16 |     LARS optimizer, no rate scaling or weight decay for parameters <= 1D.
17 |     """
18 |     def __init__(self, params, lr=0, weight_decay=0, momentum=0.9, trust_coefficient=0.001):
19 |         defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum, trust_coefficient=trust_coefficient)
20 |         super().__init__(params, defaults)
21 | 
22 |     @torch.no_grad()
23 |     def step(self):
24 |         for g in self.param_groups:
25 |             for p in g['params']:
26 |                 dp = p.grad
27 | 
28 |                 if dp is None:
29 |                     continue
30 | 
31 |                 if p.ndim > 1: # if not normalization gamma/beta or bias
32 |                     dp = dp.add(p, alpha=g['weight_decay'])
33 |                     param_norm = torch.norm(p)
34 |                     update_norm = torch.norm(dp)
35 |                     one = torch.ones_like(param_norm)
36 |                     q = torch.where(param_norm > 0.,
37 |                                     torch.where(update_norm > 0,
38 |                                     (g['trust_coefficient'] * param_norm / update_norm), one),
39 |                                     one)
40 |                     dp = dp.mul(q)
41 | 
42 |                 param_state = self.state[p]
43 |                 if 'mu' not in param_state:
44 |                     param_state['mu'] = torch.zeros_like(p)
45 |                 mu = param_state['mu']
46 |                 mu.mul_(g['momentum']).add_(dp)
47 |                 p.add_(mu, alpha=-g['lr'])


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/ops/wrappers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import warnings
 3 | 
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | def resize(input,
 9 |            size=None,
10 |            scale_factor=None,
11 |            mode='nearest',
12 |            align_corners=None,
13 |            warning=True):
14 |     if warning:
15 |         if size is not None and align_corners:
16 |             input_h, input_w = tuple(int(x) for x in input.shape[2:])
17 |             output_h, output_w = tuple(int(x) for x in size)
18 |             if output_h > input_h or output_w > output_h:
19 |                 if ((output_h > 1 and output_w > 1 and input_h > 1
20 |                      and input_w > 1) and (output_h - 1) % (input_h - 1)
21 |                         and (output_w - 1) % (input_w - 1)):
22 |                     warnings.warn(
23 |                         f'When align_corners={align_corners}, '
24 |                         'the output would more aligned if '
25 |                         f'input size {(input_h, input_w)} is `x+1` and '
26 |                         f'out size {(output_h, output_w)} is `nx+1`')
27 |     return F.interpolate(input, size, scale_factor, mode, align_corners)
28 | 
29 | 
30 | class Upsample(nn.Module):
31 | 
32 |     def __init__(self,
33 |                  size=None,
34 |                  scale_factor=None,
35 |                  mode='nearest',
36 |                  align_corners=None):
37 |         super(Upsample, self).__init__()
38 |         self.size = size
39 |         if isinstance(scale_factor, tuple):
40 |             self.scale_factor = tuple(float(factor) for factor in scale_factor)
41 |         else:
42 |             self.scale_factor = float(scale_factor) if scale_factor else None
43 |         self.mode = mode
44 |         self.align_corners = align_corners
45 | 
46 |     def forward(self, x):
47 |         if not self.size:
48 |             size = [int(t * self.scale_factor) for t in x.shape[-2:]]
49 |         else:
50 |             size = self.size
51 |         return resize(x, size, None, self.mode, self.align_corners)
52 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/configs/_base_/datasets/ade20k.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ADE20KDataset'
 3 | data_root = '/home/zhaowangbo.zwb/dataset/ADEChallengeData2016/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     samples_per_gpu=4,
36 |     workers_per_gpu=4,
37 |     train=dict(
38 |         type=dataset_type,
39 |         data_root=data_root,
40 |         img_dir='images/training',
41 |         ann_dir='annotations/training',
42 |         pipeline=train_pipeline),
43 |     val=dict(
44 |         type=dataset_type,
45 |         data_root=data_root,
46 |         img_dir='images/validation',
47 |         ann_dir='annotations/validation',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='images/validation',
53 |         ann_dir='annotations/validation',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/configs/_base_/models/upernet_beit.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254)
 3 | # Github source: https://github.com/microsoft/unilm/tree/master/beit
 4 | # Copyright (c) 2021 Microsoft
 5 | # Licensed under The MIT License [see LICENSE for details]
 6 | # By Hangbo Bao
 7 | # Based on timm, mmseg, setr, xcit and swin code bases
 8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm
 9 | # https://github.com/fudan-zvg/SETR
10 | # https://github.com/facebookresearch/xcit/
11 | # https://github.com/microsoft/Swin-Transformer
12 | # --------------------------------------------------------'
13 | # norm_cfg = dict(type='SyncBN', requires_grad=True)
14 | norm_cfg = dict(type='BN', requires_grad=True)
15 | model = dict(
16 |     type='EncoderDecoder',
17 |     pretrained=None,
18 |     backbone=dict(
19 |         type='XCiT',
20 |         patch_size=16,
21 |         embed_dim=384,
22 |         depth=12,
23 |         num_heads=8,
24 |         mlp_ratio=4,
25 |         qkv_bias=True,
26 |         use_abs_pos_emb=True,
27 |         use_rel_pos_bias=False,
28 |     ),
29 |     decode_head=dict(
30 |         type='UPerHead',
31 |         in_channels=[384, 384, 384, 384],
32 |         in_index=[0, 1, 2, 3],
33 |         pool_scales=(1, 2, 3, 6),
34 |         channels=512,
35 |         dropout_ratio=0.1,
36 |         num_classes=19,
37 |         norm_cfg=norm_cfg,
38 |         align_corners=False,
39 |         loss_decode=dict(
40 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
41 |     auxiliary_head=dict(
42 |         type='FCNHead',
43 |         in_channels=384,
44 |         in_index=2,
45 |         channels=256,
46 |         num_convs=1,
47 |         concat_input=False,
48 |         dropout_ratio=0.1,
49 |         num_classes=19,
50 |         norm_cfg=norm_cfg,
51 |         align_corners=False,
52 |         loss_decode=dict(
53 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
54 |     # model training and testing settings
55 |     train_cfg=dict(),
56 |     test_cfg=dict(mode='whole'))
57 | 


--------------------------------------------------------------------------------
/datasets/vtab.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | 
 3 | from PIL import Image
 4 | import os
 5 | import os.path
 6 | from torchvision import transforms
 7 | 
 8 | _DATASET_NAME = (
 9 |     'cifar',
10 |     'caltech101',
11 |     'dtd',
12 |     'oxford_flowers102',
13 |     'oxford_iiit_pet',
14 |     'svhn',
15 |     'sun397',
16 |     'patch_camelyon',
17 |     'eurosat',
18 |     'resisc45',
19 |     'diabetic_retinopathy',
20 |     'clevr_count',
21 |     'clevr_dist',
22 |     'dmlab',
23 |     'kitti',
24 |     'dsprites_loc',
25 |     'dsprites_ori',
26 |     'smallnorb_azi',
27 |     'smallnorb_ele',
28 | )
29 | 
30 | _CLASSES_NUM = (100, 102, 47, 102, 37, 10, 397, 2, 10, 45, 5, 8, 6, 6, 4, 16, 16, 18, 9)
31 | 
32 | def get_classes_num(dataset_name):
33 |     dict_ = {name: num for name, num in zip(_DATASET_NAME, _CLASSES_NUM)}
34 |     return dict_[dataset_name]
35 | 
36 | 
37 | def get_classes_name(idx):
38 |     return _DATASET_NAME[idx]
39 | 
40 | 
41 | def default_loader(path):
42 |     return Image.open(path).convert('RGB')
43 | 
44 | 
45 | def default_flist_reader(flist):
46 |     imlist = []
47 |     with open(flist, 'r') as rf:
48 |         for line in rf.readlines():
49 |             impath, imlabel = line.strip().split()
50 |             imlist.append((impath, int(imlabel)))
51 | 
52 |     return imlist
53 | 
54 | class ImageFilelist(data.Dataset):
55 |     def __init__(self, root, flist, transform=None, target_transform=None,
56 |                  flist_reader=default_flist_reader, loader=default_loader):
57 |         self.root = root
58 |         self.imlist = flist_reader(flist)
59 |         self.transform = transform
60 |         self.target_transform = target_transform
61 |         self.loader = loader
62 | 
63 |     def __getitem__(self, index):
64 |         impath, target = self.imlist[index]
65 |         img = self.loader(os.path.join(self.root, impath))
66 |         if self.transform is not None:
67 |             img = self.transform(img)
68 |         if self.target_transform is not None:
69 |             target = self.target_transform(target)
70 | 
71 |         return img, target
72 | 
73 |     def __len__(self):
74 |         return len(self.imlist)
75 | 


--------------------------------------------------------------------------------
/util/datasets.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | # --------------------------------------------------------
 7 | # References:
 8 | # DeiT: https://github.com/facebookresearch/deit
 9 | # --------------------------------------------------------
10 | 
11 | import os
12 | import PIL
13 | 
14 | from torchvision import datasets, transforms
15 | 
16 | from timm.data import create_transform
17 | from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
18 | 
19 | 
20 | def build_dataset(is_train, args):
21 |     transform = build_transform(is_train, args)
22 | 
23 |     root = os.path.join(args.data_path, 'train' if is_train else 'val')
24 |     dataset = datasets.ImageFolder(root, transform=transform)
25 | 
26 |     print(dataset)
27 | 
28 |     return dataset
29 | 
30 | 
31 | def build_transform(is_train, args):
32 |     mean = IMAGENET_DEFAULT_MEAN
33 |     std = IMAGENET_DEFAULT_STD
34 |     # train transform
35 |     if is_train:
36 |         # this should always dispatch to transforms_imagenet_train
37 |         transform = create_transform(
38 |             input_size=args.input_size,
39 |             is_training=True,
40 |             color_jitter=args.color_jitter,
41 |             auto_augment=args.aa,
42 |             interpolation='bicubic',
43 |             re_prob=args.reprob,
44 |             re_mode=args.remode,
45 |             re_count=args.recount,
46 |             mean=mean,
47 |             std=std,
48 |         )
49 |         return transform
50 | 
51 |     # eval transform
52 |     t = []
53 |     if args.input_size <= 224:
54 |         crop_pct = 224 / 256
55 |     else:
56 |         crop_pct = 1.0
57 |     size = int(args.input_size / crop_pct)
58 |     t.append(
59 |         transforms.Resize(size, interpolation=PIL.Image.BICUBIC),  # to maintain same ratio w.r.t. 224 images
60 |     )
61 |     t.append(transforms.CenterCrop(args.input_size))
62 | 
63 |     t.append(transforms.ToTensor())
64 |     t.append(transforms.Normalize(mean, std))
65 |     return transforms.Compose(t)
66 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import warnings
 3 | 
 4 | import mmcv
 5 | from packaging.version import parse
 6 | 
 7 | from .version import __version__, version_info
 8 | 
 9 | MMCV_MIN = '1.3.13'
10 | MMCV_MAX = '1.5.0'
11 | 
12 | 
13 | def digit_version(version_str: str, length: int = 4):
14 |     """Convert a version string into a tuple of integers.
15 | 
16 |     This method is usually used for comparing two versions. For pre-release
17 |     versions: alpha < beta < rc.
18 | 
19 |     Args:
20 |         version_str (str): The version string.
21 |         length (int): The maximum number of version levels. Default: 4.
22 | 
23 |     Returns:
24 |         tuple[int]: The version info in digits (integers).
25 |     """
26 |     version = parse(version_str)
27 |     assert version.release, f'failed to parse version {version_str}'
28 |     release = list(version.release)
29 |     release = release[:length]
30 |     if len(release) < length:
31 |         release = release + [0] * (length - len(release))
32 |     if version.is_prerelease:
33 |         mapping = {'a': -3, 'b': -2, 'rc': -1}
34 |         val = -4
35 |         # version.pre can be None
36 |         if version.pre:
37 |             if version.pre[0] not in mapping:
38 |                 warnings.warn(f'unknown prerelease version {version.pre[0]}, '
39 |                               'version checking may go wrong')
40 |             else:
41 |                 val = mapping[version.pre[0]]
42 |             release.extend([val, version.pre[-1]])
43 |         else:
44 |             release.extend([val, 0])
45 | 
46 |     elif version.is_postrelease:
47 |         release.extend([1, version.post])
48 |     else:
49 |         release.extend([0, 0])
50 |     return tuple(release)
51 | 
52 | 
53 | mmcv_min_version = digit_version(MMCV_MIN)
54 | mmcv_max_version = digit_version(MMCV_MAX)
55 | mmcv_version = digit_version(mmcv.__version__)
56 | 
57 | 
58 | assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \
59 |     f'MMCV=={mmcv.__version__} is used but incompatible. ' \
60 |     f'Please install mmcv>={mmcv_min_version}, <={mmcv_max_version}.'
61 | 
62 | __all__ = ['__version__', 'version_info', 'digit_version']
63 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/backbones/timm_backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | try:
 3 |     import timm
 4 | except ImportError:
 5 |     timm = None
 6 | 
 7 | from mmcv.cnn.bricks.registry import NORM_LAYERS
 8 | from mmcv.runner import BaseModule
 9 | 
10 | from ..builder import BACKBONES
11 | 
12 | 
13 | @BACKBONES.register_module()
14 | class TIMMBackbone(BaseModule):
15 |     """Wrapper to use backbones from timm library. More details can be found in
16 |     `timm <https://github.com/rwightman/pytorch-image-models>`_ .
17 | 
18 |     Args:
19 |         model_name (str): Name of timm model to instantiate.
20 |         pretrained (bool): Load pretrained weights if True.
21 |         checkpoint_path (str): Path of checkpoint to load after
22 |             model is initialized.
23 |         in_channels (int): Number of input image channels. Default: 3.
24 |         init_cfg (dict, optional): Initialization config dict
25 |         **kwargs: Other timm & model specific arguments.
26 |     """
27 | 
28 |     def __init__(
29 |         self,
30 |         model_name,
31 |         features_only=True,
32 |         pretrained=True,
33 |         checkpoint_path='',
34 |         in_channels=3,
35 |         init_cfg=None,
36 |         **kwargs,
37 |     ):
38 |         if timm is None:
39 |             raise RuntimeError('timm is not installed')
40 |         super(TIMMBackbone, self).__init__(init_cfg)
41 |         if 'norm_layer' in kwargs:
42 |             kwargs['norm_layer'] = NORM_LAYERS.get(kwargs['norm_layer'])
43 |         self.timm_model = timm.create_model(
44 |             model_name=model_name,
45 |             features_only=features_only,
46 |             pretrained=pretrained,
47 |             in_chans=in_channels,
48 |             checkpoint_path=checkpoint_path,
49 |             **kwargs,
50 |         )
51 | 
52 |         # Make unused parameters None
53 |         self.timm_model.global_pool = None
54 |         self.timm_model.fc = None
55 |         self.timm_model.classifier = None
56 | 
57 |         # Hack to use pretrained weights from timm
58 |         if pretrained or checkpoint_path:
59 |             self._is_init = True
60 | 
61 |     def forward(self, x):
62 |         features = self.timm_model(x)
63 |         return features
64 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/segformer_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn as nn
 4 | from mmcv.cnn import ConvModule
 5 | 
 6 | from mmseg.models.builder import HEADS
 7 | from mmseg.models.decode_heads.decode_head import BaseDecodeHead
 8 | from mmseg.ops import resize
 9 | 
10 | 
11 | @HEADS.register_module()
12 | class SegformerHead(BaseDecodeHead):
13 |     """The all mlp Head of segformer.
14 | 
15 |     This head is the implementation of
16 |     `Segformer <https://arxiv.org/abs/2105.15203>` _.
17 | 
18 |     Args:
19 |         interpolate_mode: The interpolate mode of MLP head upsample operation.
20 |             Default: 'bilinear'.
21 |     """
22 | 
23 |     def __init__(self, interpolate_mode='bilinear', **kwargs):
24 |         super().__init__(input_transform='multiple_select', **kwargs)
25 | 
26 |         self.interpolate_mode = interpolate_mode
27 |         num_inputs = len(self.in_channels)
28 | 
29 |         assert num_inputs == len(self.in_index)
30 | 
31 |         self.convs = nn.ModuleList()
32 |         for i in range(num_inputs):
33 |             self.convs.append(
34 |                 ConvModule(
35 |                     in_channels=self.in_channels[i],
36 |                     out_channels=self.channels,
37 |                     kernel_size=1,
38 |                     stride=1,
39 |                     norm_cfg=self.norm_cfg,
40 |                     act_cfg=self.act_cfg))
41 | 
42 |         self.fusion_conv = ConvModule(
43 |             in_channels=self.channels * num_inputs,
44 |             out_channels=self.channels,
45 |             kernel_size=1,
46 |             norm_cfg=self.norm_cfg)
47 | 
48 |     def forward(self, inputs):
49 |         # Receive 4 stage backbone feature map: 1/4, 1/8, 1/16, 1/32
50 |         inputs = self._transform_inputs(inputs)
51 |         outs = []
52 |         for idx in range(len(inputs)):
53 |             x = inputs[idx]
54 |             conv = self.convs[idx]
55 |             outs.append(
56 |                 resize(
57 |                     input=conv(x),
58 |                     size=inputs[0].shape[2:],
59 |                     mode=self.interpolate_mode,
60 |                     align_corners=self.align_corners))
61 | 
62 |         out = self.fusion_conv(torch.cat(outs, dim=1))
63 | 
64 |         out = self.cls_seg(out)
65 | 
66 |         return out
67 | 


--------------------------------------------------------------------------------
/train_IN21K.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | ADAPTER_CHANNEL=$1
 4 | GPUS=${GPUS:-8}
 5 | PORT=$((12000 + $RANDOM % 20000))
 6 | MASTER_ADDR=${MASTER_ADDR:-127.0.0.1}
 7 | 
 8 | 
 9 | DATASET=cifar100_full
10 | CLUSTER=True \
11 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
12 | python -m torch.distributed.launch \
13 |        --master_addr=$MASTER_ADDR \
14 |        --nproc_per_node=$GPUS \
15 |        --master_port=$PORT \
16 |        --use_env \
17 |        main_image.py \
18 |             --batch_size 128 \
19 |             --cls_token \
20 |             --finetune VIT_BASE_IN21K \
21 |             --dist_eval \
22 |             --output_dir "./output/IN21K/0.5/${DATASET}" \
23 |             --drop_path 0.0 \
24 |             --blr 1e-3 \
25 |             --weight_decay 0.01 \
26 |             --dataset "${DATASET}" \
27 |             --ffn_adapt \
28 |             --auto_remove \
29 |             --token_target_ratio 0.5
30 | 
31 | 
32 | DATASET=svhn_full
33 | CLUSTER=True \
34 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
35 | python -m torch.distributed.launch \
36 |        --master_addr=$MASTER_ADDR \
37 |        --nproc_per_node=$GPUS \
38 |        --master_port=$PORT \
39 |        --use_env \
40 |        main_image.py \
41 |             --batch_size 128 \
42 |             --cls_token \
43 |             --finetune VIT_BASE_IN21K \
44 |             --dist_eval \
45 |             --output_dir "./output/IN21K/0.5/${DATASET}" \
46 |             --drop_path 0.0 \
47 |             --blr 1e-3 \
48 |             --weight_decay 0.01 \
49 |             --dataset "${DATASET}" \
50 |             --ffn_adapt \
51 |             --auto_remove \
52 |             --token_target_ratio 0.5
53 | 
54 | 
55 | 
56 | 
57 | DATASET=food101_full
58 | CLUSTER=True \
59 | CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
60 | python -m torch.distributed.launch \
61 |        --master_addr=$MASTER_ADDR \
62 |        --nproc_per_node=$GPUS \
63 |        --master_port=$PORT \
64 |        --use_env \
65 |        main_image.py \
66 |             --batch_size 128 \
67 |             --cls_token \
68 |             --finetune VIT_BASE_IN21K \
69 |             --dist_eval \
70 |             --output_dir "./output/IN21K/0.5/${DATASET}" \
71 |             --drop_path 0.0 \
72 |             --blr 1e-3 \
73 |             --weight_decay 0.01 \
74 |             --dataset "${DATASET}" \
75 |             --ffn_adapt \
76 |             --auto_remove \
77 |             --token_target_ratio 0.5
78 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/utils/se_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import mmcv
 3 | import torch.nn as nn
 4 | from mmcv.cnn import ConvModule
 5 | 
 6 | from .make_divisible import make_divisible
 7 | 
 8 | 
 9 | class SELayer(nn.Module):
10 |     """Squeeze-and-Excitation Module.
11 | 
12 |     Args:
13 |         channels (int): The input (and output) channels of the SE layer.
14 |         ratio (int): Squeeze ratio in SELayer, the intermediate channel will be
15 |             ``int(channels/ratio)``. Default: 16.
16 |         conv_cfg (None or dict): Config dict for convolution layer.
17 |             Default: None, which means using conv2d.
18 |         act_cfg (dict or Sequence[dict]): Config dict for activation layer.
19 |             If act_cfg is a dict, two activation layers will be configured
20 |             by this dict. If act_cfg is a sequence of dicts, the first
21 |             activation layer will be configured by the first dict and the
22 |             second activation layer will be configured by the second dict.
23 |             Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,
24 |             divisor=6.0)).
25 |     """
26 | 
27 |     def __init__(self,
28 |                  channels,
29 |                  ratio=16,
30 |                  conv_cfg=None,
31 |                  act_cfg=(dict(type='ReLU'),
32 |                           dict(type='HSigmoid', bias=3.0, divisor=6.0))):
33 |         super(SELayer, self).__init__()
34 |         if isinstance(act_cfg, dict):
35 |             act_cfg = (act_cfg, act_cfg)
36 |         assert len(act_cfg) == 2
37 |         assert mmcv.is_tuple_of(act_cfg, dict)
38 |         self.global_avgpool = nn.AdaptiveAvgPool2d(1)
39 |         self.conv1 = ConvModule(
40 |             in_channels=channels,
41 |             out_channels=make_divisible(channels // ratio, 8),
42 |             kernel_size=1,
43 |             stride=1,
44 |             conv_cfg=conv_cfg,
45 |             act_cfg=act_cfg[0])
46 |         self.conv2 = ConvModule(
47 |             in_channels=make_divisible(channels // ratio, 8),
48 |             out_channels=channels,
49 |             kernel_size=1,
50 |             stride=1,
51 |             conv_cfg=conv_cfg,
52 |             act_cfg=act_cfg[1])
53 | 
54 |     def forward(self, x):
55 |         out = self.global_avgpool(x)
56 |         out = self.conv1(out)
57 |         out = self.conv2(out)
58 |         return x * out
59 | 


--------------------------------------------------------------------------------
/configs.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | 
 4 | CLUSTER = os.environ.get('CLUSTER')
 5 | 
 6 | if not CLUSTER:
 7 |     CHECKPOINTS = {
 8 |         'VIT_BASE_IN21K': 'jx_vit_base_patch16_224_in21k-e5005f0a.pth',
 9 |     }
10 |     DATASETS = {
11 |         'cifar10': 'path/small_datasets',
12 |         'cifar100': 'path/small_datasets',
13 |         'food101': 'path/small_datasets',
14 |         'svhn': 'path/small_datasets',
15 |         'flowers102': 'path/small_datasets',
16 |         'fgvc_aircraft': 'path/small_datasets',
17 |         'stanford_cars': 'path/small_datasets',
18 |         'dtd': 'path/small_datasets',
19 |         'oxford_iiit_pet': 'path/small_datasets',
20 |         'vtab': 'path/vtab-1k',
21 |         'K400': dict(
22 |             TRAIN_ROOT='path/K400',
23 |             VAL_ROOT='path/K400',
24 |             TRAIN_LIST='path/K400/k400_train.txt',
25 |             VAL_LIST='path/K400/k400_val.txt',
26 |             NUM_CLASSES=400),
27 |         'HMDB51': dict(
28 |             TRAIN_ROOT='path/HMDB51',
29 |             VAL_ROOT='path/HMDB51',
30 |             TRAIN_LIST='path/HMDB51/hmdb51_split1_train.txt' ,
31 |             VAL_LIST='path/HMDB51/hmdb51_split1_test.txt',
32 |             NUM_CLASSES=51,
33 |         ),
34 |     }
35 | 
36 | 
37 | else: # for debug 
38 |     CHECKPOINTS = {
39 |         'VIT_BASE_IN21K': '/mnt/workspace/workgroup/zhaowangbo.zwb/research/dynamic_PEFT_ECCV2024/jx_vit_base_patch16_224_in21k-e5005f0a.pth',
40 |     }
41 |     DATASETS = {
42 |         'cifar10': 'path/small_datasets',
43 |         'cifar100': 'path/small_datasets',
44 |         'food101': 'path/small_datasets',
45 |         'svhn': 'path/small_datasets',
46 |         'flowers102': 'path/small_datasets',
47 |         'fgvc_aircraft': 'path/small_datasets',
48 |         'stanford_cars': 'path/small_datasets',
49 |         'dtd': 'path/small_datasets',
50 |         'oxford_iiit_pet': 'path/small_datasets',
51 |         'vtab': 'path/vtab-1k',
52 |         'K400': dict(
53 |             TRAIN_ROOT='path/K400',
54 |             VAL_ROOT='path/K400',
55 |             TRAIN_LIST='path/K400/k400_train.txt',
56 |             VAL_LIST='path/K400/k400_val.txt',
57 |             NUM_CLASSES=400),
58 |         'HMDB51': dict(
59 |             TRAIN_ROOT='path/HMDB51',
60 |             VAL_ROOT='path/HMDB51',
61 |             TRAIN_LIST='path/HMDB51/hmdb51_split1_train.txt' ,
62 |             VAL_LIST='path/HMDB51/hmdb51_split1_test.txt',
63 |             NUM_CLASSES=51,
64 |         ),
65 |     }
66 | 
67 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/setr_mla_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn as nn
 4 | from mmcv.cnn import ConvModule
 5 | 
 6 | from mmseg.ops import Upsample
 7 | from ..builder import HEADS
 8 | from .decode_head import BaseDecodeHead
 9 | 
10 | 
11 | @HEADS.register_module()
12 | class SETRMLAHead(BaseDecodeHead):
13 |     """Multi level feature aggretation head of SETR.
14 | 
15 |     MLA head of `SETR  <https://arxiv.org/pdf/2012.15840.pdf>`_.
16 | 
17 |     Args:
18 |         mlahead_channels (int): Channels of conv-conv-4x of multi-level feature
19 |             aggregation. Default: 128.
20 |         up_scale (int): The scale factor of interpolate. Default:4.
21 |     """
22 | 
23 |     def __init__(self, mla_channels=128, up_scale=4, **kwargs):
24 |         super(SETRMLAHead, self).__init__(
25 |             input_transform='multiple_select', **kwargs)
26 |         self.mla_channels = mla_channels
27 | 
28 |         num_inputs = len(self.in_channels)
29 | 
30 |         # Refer to self.cls_seg settings of BaseDecodeHead
31 |         assert self.channels == num_inputs * mla_channels
32 | 
33 |         self.up_convs = nn.ModuleList()
34 |         for i in range(num_inputs):
35 |             self.up_convs.append(
36 |                 nn.Sequential(
37 |                     ConvModule(
38 |                         in_channels=self.in_channels[i],
39 |                         out_channels=mla_channels,
40 |                         kernel_size=3,
41 |                         padding=1,
42 |                         norm_cfg=self.norm_cfg,
43 |                         act_cfg=self.act_cfg),
44 |                     ConvModule(
45 |                         in_channels=mla_channels,
46 |                         out_channels=mla_channels,
47 |                         kernel_size=3,
48 |                         padding=1,
49 |                         norm_cfg=self.norm_cfg,
50 |                         act_cfg=self.act_cfg),
51 |                     Upsample(
52 |                         scale_factor=up_scale,
53 |                         mode='bilinear',
54 |                         align_corners=self.align_corners)))
55 | 
56 |     def forward(self, inputs):
57 |         inputs = self._transform_inputs(inputs)
58 |         outs = []
59 |         for x, up_conv in zip(inputs, self.up_convs):
60 |             outs.append(up_conv(x))
61 |         out = torch.cat(outs, dim=1)
62 |         out = self.cls_seg(out)
63 |         return out
64 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | test_case.py
  2 | summary.csv
  3 | output/
  4 | output_ada/
  5 | work_dirs/*
  6 | __MACOSX
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | pip-wheel-metadata/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 | 
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 | 
107 | # SageMath parsed files
108 | *.sage.py
109 | 
110 | # Environments
111 | .env
112 | .venv
113 | env/
114 | venv/
115 | ENV/
116 | env.bak/
117 | venv.bak/
118 | 
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 | 
123 | # Rope project settings
124 | .ropeproject
125 | 
126 | # mkdocs documentation
127 | /site
128 | 
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 | 
134 | # Pyre type checker
135 | .pyre/


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/cascade_decode_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from abc import ABCMeta, abstractmethod
 3 | 
 4 | from .decode_head import BaseDecodeHead
 5 | 
 6 | 
 7 | class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta):
 8 |     """Base class for cascade decode head used in
 9 |     :class:`CascadeEncoderDecoder."""
10 | 
11 |     def __init__(self, *args, **kwargs):
12 |         super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs)
13 | 
14 |     @abstractmethod
15 |     def forward(self, inputs, prev_output):
16 |         """Placeholder of forward function."""
17 |         pass
18 | 
19 |     def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg,
20 |                       train_cfg):
21 |         """Forward function for training.
22 |         Args:
23 |             inputs (list[Tensor]): List of multi-level img features.
24 |             prev_output (Tensor): The output of previous decode head.
25 |             img_metas (list[dict]): List of image info dict where each dict
26 |                 has: 'img_shape', 'scale_factor', 'flip', and may also contain
27 |                 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
28 |                 For details on the values of these keys see
29 |                 `mmseg/datasets/pipelines/formatting.py:Collect`.
30 |             gt_semantic_seg (Tensor): Semantic segmentation masks
31 |                 used if the architecture supports semantic segmentation task.
32 |             train_cfg (dict): The training config.
33 | 
34 |         Returns:
35 |             dict[str, Tensor]: a dictionary of loss components
36 |         """
37 |         seg_logits = self.forward(inputs, prev_output)
38 |         losses = self.losses(seg_logits, gt_semantic_seg)
39 | 
40 |         return losses
41 | 
42 |     def forward_test(self, inputs, prev_output, img_metas, test_cfg):
43 |         """Forward function for testing.
44 | 
45 |         Args:
46 |             inputs (list[Tensor]): List of multi-level img features.
47 |             prev_output (Tensor): The output of previous decode head.
48 |             img_metas (list[dict]): List of image info dict where each dict
49 |                 has: 'img_shape', 'scale_factor', 'flip', and may also contain
50 |                 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
51 |                 For details on the values of these keys see
52 |                 `mmseg/datasets/pipelines/formatting.py:Collect`.
53 |             test_cfg (dict): The testing config.
54 | 
55 |         Returns:
56 |             Tensor: Output segmentation map.
57 |         """
58 |         return self.forward(inputs, prev_output)
59 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/sep_fcn_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.cnn import DepthwiseSeparableConvModule
 3 | 
 4 | from ..builder import HEADS
 5 | from .fcn_head import FCNHead
 6 | 
 7 | 
 8 | @HEADS.register_module()
 9 | class DepthwiseSeparableFCNHead(FCNHead):
10 |     """Depthwise-Separable Fully Convolutional Network for Semantic
11 |     Segmentation.
12 | 
13 |     This head is implemented according to `Fast-SCNN: Fast Semantic
14 |     Segmentation Network <https://arxiv.org/abs/1902.04502>`_.
15 | 
16 |     Args:
17 |         in_channels(int): Number of output channels of FFM.
18 |         channels(int): Number of middle-stage channels in the decode head.
19 |         concat_input(bool): Whether to concatenate original decode input into
20 |             the result of several consecutive convolution layers.
21 |             Default: True.
22 |         num_classes(int): Used to determine the dimension of
23 |             final prediction tensor.
24 |         in_index(int): Correspond with 'out_indices' in FastSCNN backbone.
25 |         norm_cfg (dict | None): Config of norm layers.
26 |         align_corners (bool): align_corners argument of F.interpolate.
27 |             Default: False.
28 |         loss_decode(dict): Config of loss type and some
29 |             relevant additional options.
30 |         dw_act_cfg (dict):Activation config of depthwise ConvModule. If it is
31 |             'default', it will be the same as `act_cfg`. Default: None.
32 |     """
33 | 
34 |     def __init__(self, dw_act_cfg=None, **kwargs):
35 |         super(DepthwiseSeparableFCNHead, self).__init__(**kwargs)
36 |         self.convs[0] = DepthwiseSeparableConvModule(
37 |             self.in_channels,
38 |             self.channels,
39 |             kernel_size=self.kernel_size,
40 |             padding=self.kernel_size // 2,
41 |             norm_cfg=self.norm_cfg,
42 |             dw_act_cfg=dw_act_cfg)
43 | 
44 |         for i in range(1, self.num_convs):
45 |             self.convs[i] = DepthwiseSeparableConvModule(
46 |                 self.channels,
47 |                 self.channels,
48 |                 kernel_size=self.kernel_size,
49 |                 padding=self.kernel_size // 2,
50 |                 norm_cfg=self.norm_cfg,
51 |                 dw_act_cfg=dw_act_cfg)
52 | 
53 |         if self.concat_input:
54 |             self.conv_cat = DepthwiseSeparableConvModule(
55 |                 self.in_channels + self.channels,
56 |                 self.channels,
57 |                 kernel_size=self.kernel_size,
58 |                 padding=self.kernel_size // 2,
59 |                 norm_cfg=self.norm_cfg,
60 |                 dw_act_cfg=dw_act_cfg)
61 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/fpn_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import numpy as np
 3 | import torch.nn as nn
 4 | from mmcv.cnn import ConvModule
 5 | 
 6 | from mmseg.ops import Upsample, resize
 7 | from ..builder import HEADS
 8 | from .decode_head import BaseDecodeHead
 9 | 
10 | 
11 | @HEADS.register_module()
12 | class FPNHead(BaseDecodeHead):
13 |     """Panoptic Feature Pyramid Networks.
14 | 
15 |     This head is the implementation of `Semantic FPN
16 |     <https://arxiv.org/abs/1901.02446>`_.
17 | 
18 |     Args:
19 |         feature_strides (tuple[int]): The strides for input feature maps.
20 |             stack_lateral. All strides suppose to be power of 2. The first
21 |             one is of largest resolution.
22 |     """
23 | 
24 |     def __init__(self, feature_strides, **kwargs):
25 |         super(FPNHead, self).__init__(
26 |             input_transform='multiple_select', **kwargs)
27 |         assert len(feature_strides) == len(self.in_channels)
28 |         assert min(feature_strides) == feature_strides[0]
29 |         self.feature_strides = feature_strides
30 | 
31 |         self.scale_heads = nn.ModuleList()
32 |         for i in range(len(feature_strides)):
33 |             head_length = max(
34 |                 1,
35 |                 int(np.log2(feature_strides[i]) - np.log2(feature_strides[0])))
36 |             scale_head = []
37 |             for k in range(head_length):
38 |                 scale_head.append(
39 |                     ConvModule(
40 |                         self.in_channels[i] if k == 0 else self.channels,
41 |                         self.channels,
42 |                         3,
43 |                         padding=1,
44 |                         conv_cfg=self.conv_cfg,
45 |                         norm_cfg=self.norm_cfg,
46 |                         act_cfg=self.act_cfg))
47 |                 if feature_strides[i] != feature_strides[0]:
48 |                     scale_head.append(
49 |                         Upsample(
50 |                             scale_factor=2,
51 |                             mode='bilinear',
52 |                             align_corners=self.align_corners))
53 |             self.scale_heads.append(nn.Sequential(*scale_head))
54 | 
55 |     def forward(self, inputs):
56 | 
57 |         x = self._transform_inputs(inputs)
58 | 
59 |         output = self.scale_heads[0](x[0])
60 |         for i in range(1, len(self.feature_strides)):
61 |             # non inplace
62 |             output = output + resize(
63 |                 self.scale_heads[i](x[i]),
64 |                 size=output.shape[2:],
65 |                 mode='bilinear',
66 |                 align_corners=self.align_corners)
67 | 
68 |         output = self.cls_seg(output)
69 |         return output
70 | 


--------------------------------------------------------------------------------
/util/lr_decay.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | # --------------------------------------------------------
 7 | # References:
 8 | # ELECTRA https://github.com/google-research/electra
 9 | # BEiT: https://github.com/microsoft/unilm/tree/master/beit
10 | # --------------------------------------------------------
11 | 
12 | import json
13 | 
14 | 
15 | def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75):
16 |     """
17 |     Parameter groups for layer-wise lr decay
18 |     Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58
19 |     """
20 |     param_group_names = {}
21 |     param_groups = {}
22 | 
23 |     num_layers = len(model.blocks) + 1
24 | 
25 |     layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1))
26 | 
27 |     for n, p in model.named_parameters():
28 |         if not p.requires_grad:
29 |             continue
30 | 
31 |         # no decay: all 1D parameters and model specific ones
32 |         if p.ndim == 1 or n in no_weight_decay_list:
33 |             g_decay = "no_decay"
34 |             this_decay = 0.
35 |         else:
36 |             g_decay = "decay"
37 |             this_decay = weight_decay
38 |             
39 |         layer_id = get_layer_id_for_vit(n, num_layers)
40 |         group_name = "layer_%d_%s" % (layer_id, g_decay)
41 | 
42 |         if group_name not in param_group_names:
43 |             this_scale = layer_scales[layer_id]
44 | 
45 |             param_group_names[group_name] = {
46 |                 "lr_scale": this_scale,
47 |                 "weight_decay": this_decay,
48 |                 "params": [],
49 |             }
50 |             param_groups[group_name] = {
51 |                 "lr_scale": this_scale,
52 |                 "weight_decay": this_decay,
53 |                 "params": [],
54 |             }
55 | 
56 |         param_group_names[group_name]["params"].append(n)
57 |         param_groups[group_name]["params"].append(p)
58 | 
59 |     # print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2))
60 | 
61 |     return list(param_groups.values())
62 | 
63 | 
64 | def get_layer_id_for_vit(name, num_layers):
65 |     """
66 |     Assign a parameter with its layer id
67 |     Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33
68 |     """
69 |     if name in ['cls_token', 'pos_embed']:
70 |         return 0
71 |     elif name.startswith('patch_embed'):
72 |         return 0
73 |     elif name.startswith('blocks'):
74 |         return int(name.split('.')[1]) + 1
75 |     else:
76 |         return num_layers


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/necks/multilevel_neck.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch.nn as nn
 3 | from mmcv.cnn import ConvModule, xavier_init
 4 | 
 5 | from mmseg.ops import resize
 6 | from ..builder import NECKS
 7 | 
 8 | 
 9 | @NECKS.register_module()
10 | class MultiLevelNeck(nn.Module):
11 |     """MultiLevelNeck.
12 | 
13 |     A neck structure connect vit backbone and decoder_heads.
14 | 
15 |     Args:
16 |         in_channels (List[int]): Number of input channels per scale.
17 |         out_channels (int): Number of output channels (used at each scale).
18 |         scales (List[float]): Scale factors for each input feature map.
19 |             Default: [0.5, 1, 2, 4]
20 |         norm_cfg (dict): Config dict for normalization layer. Default: None.
21 |         act_cfg (dict): Config dict for activation layer in ConvModule.
22 |             Default: None.
23 |     """
24 | 
25 |     def __init__(self,
26 |                  in_channels,
27 |                  out_channels,
28 |                  scales=[0.5, 1, 2, 4],
29 |                  norm_cfg=None,
30 |                  act_cfg=None):
31 |         super(MultiLevelNeck, self).__init__()
32 |         assert isinstance(in_channels, list)
33 |         self.in_channels = in_channels
34 |         self.out_channels = out_channels
35 |         self.scales = scales
36 |         self.num_outs = len(scales)
37 |         self.lateral_convs = nn.ModuleList()
38 |         self.convs = nn.ModuleList()
39 |         for in_channel in in_channels:
40 |             self.lateral_convs.append(
41 |                 ConvModule(
42 |                     in_channel,
43 |                     out_channels,
44 |                     kernel_size=1,
45 |                     norm_cfg=norm_cfg,
46 |                     act_cfg=act_cfg))
47 |         for _ in range(self.num_outs):
48 |             self.convs.append(
49 |                 ConvModule(
50 |                     out_channels,
51 |                     out_channels,
52 |                     kernel_size=3,
53 |                     padding=1,
54 |                     stride=1,
55 |                     norm_cfg=norm_cfg,
56 |                     act_cfg=act_cfg))
57 | 
58 |     # default init_weights for conv(msra) and norm in ConvModule
59 |     def init_weights(self):
60 |         for m in self.modules():
61 |             if isinstance(m, nn.Conv2d):
62 |                 xavier_init(m, distribution='uniform')
63 | 
64 |     def forward(self, inputs):
65 |         assert len(inputs) == len(self.in_channels)
66 |         inputs = [
67 |             lateral_conv(inputs[i])
68 |             for i, lateral_conv in enumerate(self.lateral_convs)
69 |         ]
70 |         # for len(inputs) not equal to self.num_outs
71 |         if len(inputs) == 1:
72 |             inputs = [inputs[0] for _ in range(self.num_outs)]
73 |         outs = []
74 |         for i in range(self.num_outs):
75 |             x_resize = resize(
76 |                 inputs[i], scale_factor=self.scales[i], mode='bilinear')
77 |             outs.append(self.convs[i](x_resize))
78 |         return tuple(outs)
79 | 


--------------------------------------------------------------------------------
/video_datasets/video_datasets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from util.crop import RandomResizedCrop
 3 | from timm.data.constants import IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
 4 | import torchvision.transforms as transforms
 5 | import torchvision.datasets as datasets
 6 | from .k400 import VideoDataset
 7 | from .sthv2_dataset import SthV2VideoDataset
 8 | import torch
 9 | 
10 | def build_dataset(args):
11 |     if os.path.basename(args.finetune).startswith('VIT_BASE_IN21K'):
12 |         _mean = IMAGENET_INCEPTION_MEAN
13 |         _std = IMAGENET_INCEPTION_STD
14 | 
15 |     else:
16 |         raise ValueError(os.path.basename(args.finetune))
17 |     
18 | 
19 | 
20 |     if args.dataset == 'K400':
21 |         dataset_train = VideoDataset(
22 |             list_path=args.data_path[args.dataset]['TRAIN_LIST'],
23 |             data_root=args.data_path[args.dataset]['TRAIN_ROOT'],
24 |             random_sample=True,
25 |             mirror=True,
26 |             spatial_size=224,
27 |             auto_augment=None,
28 |             num_frames=8,
29 |             sampling_rate=16,
30 |             resize_type='random_short_side_scale_jitter',
31 |             scale_range=[1.0, 1.15],
32 |             mean=torch.Tensor(_mean),
33 |             std=torch.Tensor(_std)
34 |             )
35 |         dataset_val = VideoDataset(
36 |             list_path=args.data_path[args.dataset]['VAL_LIST'],
37 |             data_root=args.data_path[args.dataset]['VAL_ROOT'],
38 |             random_sample=False,
39 |             spatial_size=224,
40 |             num_frames=8,
41 |             sampling_rate=16,
42 |             num_spatial_views=1,
43 |             num_temporal_views=3,
44 |             mean=torch.Tensor(_mean),
45 |             std=torch.Tensor(_std)
46 |             )
47 |         metric = "accuracy"
48 | 
49 |         
50 |         
51 |     elif args.dataset == 'SSV2':
52 |         dataset_train = SthV2VideoDataset(
53 |             list_path=args.data_path[args.dataset]['TRAIN_LIST'],
54 |             data_root=args.data_path[args.dataset]['TRAIN_ROOT'],
55 |             random_sample=True,
56 |             mirror=False,
57 |             spatial_size=224,
58 |             auto_augment='rand-m7-n4-mstd0.5-inc1',
59 |             num_frames=8,
60 |             sampling_rate=0,
61 |             resize_type='random_resized_crop',
62 |             scale_range=[0.08, 1.0],
63 |             mean=torch.Tensor(_mean),
64 |             std=torch.Tensor(_std)
65 |             )
66 | 
67 |         
68 |         dataset_val = SthV2VideoDataset(
69 |             list_path=args.data_path[args.dataset]['VAL_LIST'],
70 |             data_root=args.data_path[args.dataset]['VAL_ROOT'],
71 |             random_sample=False,
72 |             spatial_size=224,
73 |             num_frames=8,
74 |             sampling_rate=0,
75 |             num_spatial_views=3,
76 |             num_temporal_views=1,
77 |             mean=torch.Tensor(_mean),
78 |             std=torch.Tensor(_std)
79 |             )
80 |         
81 |         metric = "accuracy"
82 |     
83 | 
84 |     else:
85 |         raise ValueError(args.dataset)
86 | 
87 |     return dataset_train, dataset_val,  metric
88 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/ops/encoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.nn import functional as F
 5 | 
 6 | 
 7 | class Encoding(nn.Module):
 8 |     """Encoding Layer: a learnable residual encoder.
 9 | 
10 |     Input is of shape  (batch_size, channels, height, width).
11 |     Output is of shape (batch_size, num_codes, channels).
12 | 
13 |     Args:
14 |         channels: dimension of the features or feature channels
15 |         num_codes: number of code words
16 |     """
17 | 
18 |     def __init__(self, channels, num_codes):
19 |         super(Encoding, self).__init__()
20 |         # init codewords and smoothing factor
21 |         self.channels, self.num_codes = channels, num_codes
22 |         std = 1. / ((num_codes * channels)**0.5)
23 |         # [num_codes, channels]
24 |         self.codewords = nn.Parameter(
25 |             torch.empty(num_codes, channels,
26 |                         dtype=torch.float).uniform_(-std, std),
27 |             requires_grad=True)
28 |         # [num_codes]
29 |         self.scale = nn.Parameter(
30 |             torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0),
31 |             requires_grad=True)
32 | 
33 |     @staticmethod
34 |     def scaled_l2(x, codewords, scale):
35 |         num_codes, channels = codewords.size()
36 |         batch_size = x.size(0)
37 |         reshaped_scale = scale.view((1, 1, num_codes))
38 |         expanded_x = x.unsqueeze(2).expand(
39 |             (batch_size, x.size(1), num_codes, channels))
40 |         reshaped_codewords = codewords.view((1, 1, num_codes, channels))
41 | 
42 |         scaled_l2_norm = reshaped_scale * (
43 |             expanded_x - reshaped_codewords).pow(2).sum(dim=3)
44 |         return scaled_l2_norm
45 | 
46 |     @staticmethod
47 |     def aggregate(assignment_weights, x, codewords):
48 |         num_codes, channels = codewords.size()
49 |         reshaped_codewords = codewords.view((1, 1, num_codes, channels))
50 |         batch_size = x.size(0)
51 | 
52 |         expanded_x = x.unsqueeze(2).expand(
53 |             (batch_size, x.size(1), num_codes, channels))
54 |         encoded_feat = (assignment_weights.unsqueeze(3) *
55 |                         (expanded_x - reshaped_codewords)).sum(dim=1)
56 |         return encoded_feat
57 | 
58 |     def forward(self, x):
59 |         assert x.dim() == 4 and x.size(1) == self.channels
60 |         # [batch_size, channels, height, width]
61 |         batch_size = x.size(0)
62 |         # [batch_size, height x width, channels]
63 |         x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous()
64 |         # assignment_weights: [batch_size, channels, num_codes]
65 |         assignment_weights = F.softmax(
66 |             self.scaled_l2(x, self.codewords, self.scale), dim=2)
67 |         # aggregate
68 |         encoded_feat = self.aggregate(assignment_weights, x, self.codewords)
69 |         return encoded_feat
70 | 
71 |     def __repr__(self):
72 |         repr_str = self.__class__.__name__
73 |         repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \
74 |                     f'x{self.channels})'
75 |         return repr_str
76 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/fcn_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn as nn
 4 | from mmcv.cnn import ConvModule
 5 | 
 6 | from ..builder import HEADS
 7 | from .decode_head import BaseDecodeHead
 8 | 
 9 | 
10 | @HEADS.register_module()
11 | class FCNHead(BaseDecodeHead):
12 |     """Fully Convolution Networks for Semantic Segmentation.
13 | 
14 |     This head is implemented of `FCNNet <https://arxiv.org/abs/1411.4038>`_.
15 | 
16 |     Args:
17 |         num_convs (int): Number of convs in the head. Default: 2.
18 |         kernel_size (int): The kernel size for convs in the head. Default: 3.
19 |         concat_input (bool): Whether concat the input and output of convs
20 |             before classification layer.
21 |         dilation (int): The dilation rate for convs in the head. Default: 1.
22 |     """
23 | 
24 |     def __init__(self,
25 |                  num_convs=2,
26 |                  kernel_size=3,
27 |                  concat_input=True,
28 |                  dilation=1,
29 |                  **kwargs):
30 |         assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int)
31 |         self.num_convs = num_convs
32 |         self.concat_input = concat_input
33 |         self.kernel_size = kernel_size
34 |         super(FCNHead, self).__init__(**kwargs)
35 |         if num_convs == 0:
36 |             assert self.in_channels == self.channels
37 | 
38 |         conv_padding = (kernel_size // 2) * dilation
39 |         convs = []
40 |         convs.append(
41 |             ConvModule(
42 |                 self.in_channels,
43 |                 self.channels,
44 |                 kernel_size=kernel_size,
45 |                 padding=conv_padding,
46 |                 dilation=dilation,
47 |                 conv_cfg=self.conv_cfg,
48 |                 norm_cfg=self.norm_cfg,
49 |                 act_cfg=self.act_cfg))
50 |         for i in range(num_convs - 1):
51 |             convs.append(
52 |                 ConvModule(
53 |                     self.channels,
54 |                     self.channels,
55 |                     kernel_size=kernel_size,
56 |                     padding=conv_padding,
57 |                     dilation=dilation,
58 |                     conv_cfg=self.conv_cfg,
59 |                     norm_cfg=self.norm_cfg,
60 |                     act_cfg=self.act_cfg))
61 |         if num_convs == 0:
62 |             self.convs = nn.Identity()
63 |         else:
64 |             self.convs = nn.Sequential(*convs)
65 |         if self.concat_input:
66 |             self.conv_cat = ConvModule(
67 |                 self.in_channels + self.channels,
68 |                 self.channels,
69 |                 kernel_size=kernel_size,
70 |                 padding=kernel_size // 2,
71 |                 conv_cfg=self.conv_cfg,
72 |                 norm_cfg=self.norm_cfg,
73 |                 act_cfg=self.act_cfg)
74 | 
75 |     def forward(self, inputs):
76 |         """Forward function."""
77 |         x = self._transform_inputs(inputs)
78 |         output = self.convs(x)
79 |         if self.concat_input:
80 |             output = self.conv_cat(torch.cat([x, output], dim=1))
81 |         output = self.cls_seg(output)
82 |         return output
83 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmcv_custom/apex_runner/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | import os.path as osp
 3 | import time
 4 | from tempfile import TemporaryDirectory
 5 | 
 6 | import torch
 7 | from torch.optim import Optimizer
 8 | 
 9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 | 
13 | try:
14 |     import apex
15 | except:
16 |     print('apex is not installed')
17 | 
18 | 
19 | def save_checkpoint(model, filename, optimizer=None, meta=None):
20 |     """Save checkpoint to file.
21 | 
22 |     The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
23 |     ``optimizer``, ``amp``. By default ``meta`` will contain version
24 |     and time info.
25 | 
26 |     Args:
27 |         model (Module): Module whose params are to be saved.
28 |         filename (str): Checkpoint filename.
29 |         optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 |         meta (dict, optional): Metadata to be saved in checkpoint.
31 |     """
32 |     if meta is None:
33 |         meta = {}
34 |     elif not isinstance(meta, dict):
35 |         raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
36 |     meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
37 | 
38 |     if is_module_wrapper(model):
39 |         model = model.module
40 | 
41 |     if hasattr(model, 'CLASSES') and model.CLASSES is not None:
42 |         # save class name to the meta
43 |         meta.update(CLASSES=model.CLASSES)
44 | 
45 |     checkpoint = {
46 |         'meta': meta,
47 |         'state_dict': weights_to_cpu(get_state_dict(model))
48 |     }
49 |     # save optimizer state dict in the checkpoint
50 |     if isinstance(optimizer, Optimizer):
51 |         checkpoint['optimizer'] = optimizer.state_dict()
52 |     elif isinstance(optimizer, dict):
53 |         checkpoint['optimizer'] = {}
54 |         for name, optim in optimizer.items():
55 |             checkpoint['optimizer'][name] = optim.state_dict()
56 | 
57 |     # # save amp state dict in the checkpoint
58 |     # checkpoint['amp'] = apex.amp.state_dict()
59 | 
60 |     if filename.startswith('pavi://'):
61 |         try:
62 |             from pavi import modelcloud
63 |             from pavi.exception import NodeNotFoundError
64 |         except ImportError:
65 |             raise ImportError(
66 |                 'Please install pavi to load checkpoint from modelcloud.')
67 |         model_path = filename[7:]
68 |         root = modelcloud.Folder()
69 |         model_dir, model_name = osp.split(model_path)
70 |         try:
71 |             model = modelcloud.get(model_dir)
72 |         except NodeNotFoundError:
73 |             model = root.create_training_model(model_dir)
74 |         with TemporaryDirectory() as tmp_dir:
75 |             checkpoint_file = osp.join(tmp_dir, model_name)
76 |             with open(checkpoint_file, 'wb') as f:
77 |                 torch.save(checkpoint, f)
78 |                 f.flush()
79 |             model.create_file(checkpoint_file, name=model_name)
80 |     else:
81 |         mmcv.mkdir_or_exist(osp.dirname(filename))
82 |         # immediately flush buffer
83 |         with open(filename, 'wb') as f:
84 |             torch.save(checkpoint, f)
85 |             f.flush()
86 | 


--------------------------------------------------------------------------------
/datasets/functional.py:
--------------------------------------------------------------------------------
 1 | import numbers
 2 | import cv2
 3 | import numpy as np
 4 | import PIL
 5 | import torch
 6 | 
 7 | 
 8 | def _is_tensor_clip(clip):
 9 |     return torch.is_tensor(clip) and clip.ndimension() == 4
10 | 
11 | 
12 | def crop_clip(clip, min_h, min_w, h, w):
13 |     if isinstance(clip[0], np.ndarray):
14 |         cropped = [img[min_h:min_h + h, min_w:min_w + w, :] for img in clip]
15 | 
16 |     elif isinstance(clip[0], PIL.Image.Image):
17 |         cropped = [
18 |             img.crop((min_w, min_h, min_w + w, min_h + h)) for img in clip
19 |         ]
20 |     else:
21 |         raise TypeError('Expected numpy.ndarray or PIL.Image' +
22 |                         'but got list of {0}'.format(type(clip[0])))
23 |     return cropped
24 | 
25 | 
26 | def resize_clip(clip, size, interpolation='bilinear'):
27 |     if isinstance(clip[0], np.ndarray):
28 |         if isinstance(size, numbers.Number):
29 |             im_h, im_w, im_c = clip[0].shape
30 |             # Min spatial dim already matches minimal size
31 |             if (im_w <= im_h and im_w == size) or (im_h <= im_w
32 |                                                    and im_h == size):
33 |                 return clip
34 |             new_h, new_w = get_resize_sizes(im_h, im_w, size)
35 |             size = (new_w, new_h)
36 |         else:
37 |             size = size[0], size[1]
38 |         if interpolation == 'bilinear':
39 |             np_inter = cv2.INTER_LINEAR
40 |         else:
41 |             np_inter = cv2.INTER_NEAREST
42 |         scaled = [
43 |             cv2.resize(img, size, interpolation=np_inter) for img in clip
44 |         ]
45 |     elif isinstance(clip[0], PIL.Image.Image):
46 |         if isinstance(size, numbers.Number):
47 |             im_w, im_h = clip[0].size
48 |             # Min spatial dim already matches minimal size
49 |             if (im_w <= im_h and im_w == size) or (im_h <= im_w
50 |                                                    and im_h == size):
51 |                 return clip
52 |             new_h, new_w = get_resize_sizes(im_h, im_w, size)
53 |             size = (new_w, new_h)
54 |         else:
55 |             size = size[1], size[0]
56 |         if interpolation == 'bilinear':
57 |             pil_inter = PIL.Image.BILINEAR
58 |         else:
59 |             pil_inter = PIL.Image.NEAREST
60 |         scaled = [img.resize(size, pil_inter) for img in clip]
61 |     else:
62 |         raise TypeError('Expected numpy.ndarray or PIL.Image' +
63 |                         'but got list of {0}'.format(type(clip[0])))
64 |     return scaled
65 | 
66 | 
67 | def get_resize_sizes(im_h, im_w, size):
68 |     if im_w < im_h:
69 |         ow = size
70 |         oh = int(size * im_h / im_w)
71 |     else:
72 |         oh = size
73 |         ow = int(size * im_w / im_h)
74 |     return oh, ow
75 | 
76 | 
77 | def normalize(clip, mean, std, inplace=False):
78 |     if not _is_tensor_clip(clip):
79 |         raise TypeError('tensor is not a torch clip.')
80 | 
81 |     if not inplace:
82 |         clip = clip.clone()
83 | 
84 |     dtype = clip.dtype
85 |     mean = torch.as_tensor(mean, dtype=dtype, device=clip.device)
86 |     std = torch.as_tensor(std, dtype=dtype, device=clip.device)
87 |     clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None])
88 | 
89 |     return clip
90 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/setr_up_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch.nn as nn
 3 | from mmcv.cnn import ConvModule, build_norm_layer
 4 | 
 5 | from mmseg.ops import Upsample
 6 | from ..builder import HEADS
 7 | from .decode_head import BaseDecodeHead
 8 | 
 9 | 
10 | @HEADS.register_module()
11 | class SETRUPHead(BaseDecodeHead):
12 |     """Naive upsampling head and Progressive upsampling head of SETR.
13 | 
14 |     Naive or PUP head of `SETR  <https://arxiv.org/pdf/2012.15840.pdf>`_.
15 | 
16 |     Args:
17 |         norm_layer (dict): Config dict for input normalization.
18 |             Default: norm_layer=dict(type='LN', eps=1e-6, requires_grad=True).
19 |         num_convs (int): Number of decoder convolutions. Default: 1.
20 |         up_scale (int): The scale factor of interpolate. Default:4.
21 |         kernel_size (int): The kernel size of convolution when decoding
22 |             feature information from backbone. Default: 3.
23 |         init_cfg (dict | list[dict] | None): Initialization config dict.
24 |             Default: dict(
25 |                      type='Constant', val=1.0, bias=0, layer='LayerNorm').
26 |     """
27 | 
28 |     def __init__(self,
29 |                  norm_layer=dict(type='LN', eps=1e-6, requires_grad=True),
30 |                  num_convs=1,
31 |                  up_scale=4,
32 |                  kernel_size=3,
33 |                  init_cfg=[
34 |                      dict(type='Constant', val=1.0, bias=0, layer='LayerNorm'),
35 |                      dict(
36 |                          type='Normal',
37 |                          std=0.01,
38 |                          override=dict(name='conv_seg'))
39 |                  ],
40 |                  **kwargs):
41 | 
42 |         assert kernel_size in [1, 3], 'kernel_size must be 1 or 3.'
43 | 
44 |         super(SETRUPHead, self).__init__(init_cfg=init_cfg, **kwargs)
45 | 
46 |         assert isinstance(self.in_channels, int)
47 | 
48 |         _, self.norm = build_norm_layer(norm_layer, self.in_channels)
49 | 
50 |         self.up_convs = nn.ModuleList()
51 |         in_channels = self.in_channels
52 |         out_channels = self.channels
53 |         for _ in range(num_convs):
54 |             self.up_convs.append(
55 |                 nn.Sequential(
56 |                     ConvModule(
57 |                         in_channels=in_channels,
58 |                         out_channels=out_channels,
59 |                         kernel_size=kernel_size,
60 |                         stride=1,
61 |                         padding=int(kernel_size - 1) // 2,
62 |                         norm_cfg=self.norm_cfg,
63 |                         act_cfg=self.act_cfg),
64 |                     Upsample(
65 |                         scale_factor=up_scale,
66 |                         mode='bilinear',
67 |                         align_corners=self.align_corners)))
68 |             in_channels = out_channels
69 | 
70 |     def forward(self, x):
71 |         x = self._transform_inputs(x)
72 | 
73 |         n, c, h, w = x.shape
74 |         x = x.reshape(n, c, h * w).transpose(2, 1).contiguous()
75 |         x = self.norm(x)
76 |         x = x.transpose(1, 2).reshape(n, c, h, w).contiguous()
77 | 
78 |         for up_conv in self.up_convs:
79 |             x = up_conv(x)
80 |         out = self.cls_seg(x)
81 |         return out
82 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def accuracy(pred, target, topk=1, thresh=None):
 6 |     """Calculate accuracy according to the prediction and target.
 7 | 
 8 |     Args:
 9 |         pred (torch.Tensor): The model prediction, shape (N, num_class, ...)
10 |         target (torch.Tensor): The target of each prediction, shape (N, , ...)
11 |         topk (int | tuple[int], optional): If the predictions in ``topk``
12 |             matches the target, the predictions will be regarded as
13 |             correct ones. Defaults to 1.
14 |         thresh (float, optional): If not None, predictions with scores under
15 |             this threshold are considered incorrect. Default to None.
16 | 
17 |     Returns:
18 |         float | tuple[float]: If the input ``topk`` is a single integer,
19 |             the function will return a single float as accuracy. If
20 |             ``topk`` is a tuple containing multiple integers, the
21 |             function will return a tuple containing accuracies of
22 |             each ``topk`` number.
23 |     """
24 |     assert isinstance(topk, (int, tuple))
25 |     if isinstance(topk, int):
26 |         topk = (topk, )
27 |         return_single = True
28 |     else:
29 |         return_single = False
30 | 
31 |     maxk = max(topk)
32 |     if pred.size(0) == 0:
33 |         accu = [pred.new_tensor(0.) for i in range(len(topk))]
34 |         return accu[0] if return_single else accu
35 |     assert pred.ndim == target.ndim + 1
36 |     assert pred.size(0) == target.size(0)
37 |     assert maxk <= pred.size(1), \
38 |         f'maxk {maxk} exceeds pred dimension {pred.size(1)}'
39 |     pred_value, pred_label = pred.topk(maxk, dim=1)
40 |     # transpose to shape (maxk, N, ...)
41 |     pred_label = pred_label.transpose(0, 1)
42 |     correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label))
43 |     if thresh is not None:
44 |         # Only prediction values larger than thresh are counted as correct
45 |         correct = correct & (pred_value > thresh).t()
46 |     res = []
47 |     for k in topk:
48 |         correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
49 |         res.append(correct_k.mul_(100.0 / target.numel()))
50 |     return res[0] if return_single else res
51 | 
52 | 
53 | class Accuracy(nn.Module):
54 |     """Accuracy calculation module."""
55 | 
56 |     def __init__(self, topk=(1, ), thresh=None):
57 |         """Module to calculate the accuracy.
58 | 
59 |         Args:
60 |             topk (tuple, optional): The criterion used to calculate the
61 |                 accuracy. Defaults to (1,).
62 |             thresh (float, optional): If not None, predictions with scores
63 |                 under this threshold are considered incorrect. Default to None.
64 |         """
65 |         super().__init__()
66 |         self.topk = topk
67 |         self.thresh = thresh
68 | 
69 |     def forward(self, pred, target):
70 |         """Forward function to calculate accuracy.
71 | 
72 |         Args:
73 |             pred (torch.Tensor): Prediction of models.
74 |             target (torch.Tensor): Target for each prediction.
75 | 
76 |         Returns:
77 |             tuple[float]: The accuracies under different topk criterions.
78 |         """
79 |         return accuracy(pred, target, self.topk, self.thresh)
80 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/lraspp_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn as nn
 4 | from mmcv import is_tuple_of
 5 | from mmcv.cnn import ConvModule
 6 | 
 7 | from mmseg.ops import resize
 8 | from ..builder import HEADS
 9 | from .decode_head import BaseDecodeHead
10 | 
11 | 
12 | @HEADS.register_module()
13 | class LRASPPHead(BaseDecodeHead):
14 |     """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3.
15 | 
16 |     This head is the improved implementation of `Searching for MobileNetV3
17 |     <https://ieeexplore.ieee.org/document/9008835>`_.
18 | 
19 |     Args:
20 |         branch_channels (tuple[int]): The number of output channels in every
21 |             each branch. Default: (32, 64).
22 |     """
23 | 
24 |     def __init__(self, branch_channels=(32, 64), **kwargs):
25 |         super(LRASPPHead, self).__init__(**kwargs)
26 |         if self.input_transform != 'multiple_select':
27 |             raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform '
28 |                              f'must be \'multiple_select\'. But received '
29 |                              f'\'{self.input_transform}\'')
30 |         assert is_tuple_of(branch_channels, int)
31 |         assert len(branch_channels) == len(self.in_channels) - 1
32 |         self.branch_channels = branch_channels
33 | 
34 |         self.convs = nn.Sequential()
35 |         self.conv_ups = nn.Sequential()
36 |         for i in range(len(branch_channels)):
37 |             self.convs.add_module(
38 |                 f'conv{i}',
39 |                 nn.Conv2d(
40 |                     self.in_channels[i], branch_channels[i], 1, bias=False))
41 |             self.conv_ups.add_module(
42 |                 f'conv_up{i}',
43 |                 ConvModule(
44 |                     self.channels + branch_channels[i],
45 |                     self.channels,
46 |                     1,
47 |                     norm_cfg=self.norm_cfg,
48 |                     act_cfg=self.act_cfg,
49 |                     bias=False))
50 | 
51 |         self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1)
52 | 
53 |         self.aspp_conv = ConvModule(
54 |             self.in_channels[-1],
55 |             self.channels,
56 |             1,
57 |             norm_cfg=self.norm_cfg,
58 |             act_cfg=self.act_cfg,
59 |             bias=False)
60 |         self.image_pool = nn.Sequential(
61 |             nn.AvgPool2d(kernel_size=49, stride=(16, 20)),
62 |             ConvModule(
63 |                 self.in_channels[2],
64 |                 self.channels,
65 |                 1,
66 |                 act_cfg=dict(type='Sigmoid'),
67 |                 bias=False))
68 | 
69 |     def forward(self, inputs):
70 |         """Forward function."""
71 |         inputs = self._transform_inputs(inputs)
72 | 
73 |         x = inputs[-1]
74 | 
75 |         x = self.aspp_conv(x) * resize(
76 |             self.image_pool(x),
77 |             size=x.size()[2:],
78 |             mode='bilinear',
79 |             align_corners=self.align_corners)
80 |         x = self.conv_up_input(x)
81 | 
82 |         for i in range(len(self.branch_channels) - 1, -1, -1):
83 |             x = resize(
84 |                 x,
85 |                 size=inputs[i].size()[2:],
86 |                 mode='bilinear',
87 |                 align_corners=self.align_corners)
88 |             x = torch.cat([x, self.convs[i](inputs[i])], 1)
89 |             x = self.conv_ups[i](x)
90 | 
91 |         return self.cls_seg(x)
92 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/segmentors/cascade_encoder_decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from torch import nn
 3 | 
 4 | from mmseg.core import add_prefix
 5 | from mmseg.ops import resize
 6 | from .. import builder
 7 | from ..builder import SEGMENTORS
 8 | from .encoder_decoder import EncoderDecoder
 9 | 
10 | 
11 | @SEGMENTORS.register_module()
12 | class CascadeEncoderDecoder(EncoderDecoder):
13 |     """Cascade Encoder Decoder segmentors.
14 | 
15 |     CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of
16 |     CascadeEncoderDecoder are cascaded. The output of previous decoder_head
17 |     will be the input of next decoder_head.
18 |     """
19 | 
20 |     def __init__(self,
21 |                  num_stages,
22 |                  backbone,
23 |                  decode_head,
24 |                  neck=None,
25 |                  auxiliary_head=None,
26 |                  train_cfg=None,
27 |                  test_cfg=None,
28 |                  pretrained=None,
29 |                  init_cfg=None):
30 |         self.num_stages = num_stages
31 |         super(CascadeEncoderDecoder, self).__init__(
32 |             backbone=backbone,
33 |             decode_head=decode_head,
34 |             neck=neck,
35 |             auxiliary_head=auxiliary_head,
36 |             train_cfg=train_cfg,
37 |             test_cfg=test_cfg,
38 |             pretrained=pretrained,
39 |             init_cfg=init_cfg)
40 | 
41 |     def _init_decode_head(self, decode_head):
42 |         """Initialize ``decode_head``"""
43 |         assert isinstance(decode_head, list)
44 |         assert len(decode_head) == self.num_stages
45 |         self.decode_head = nn.ModuleList()
46 |         for i in range(self.num_stages):
47 |             self.decode_head.append(builder.build_head(decode_head[i]))
48 |         self.align_corners = self.decode_head[-1].align_corners
49 |         self.num_classes = self.decode_head[-1].num_classes
50 | 
51 |     def encode_decode(self, img, img_metas):
52 |         """Encode images with backbone and decode into a semantic segmentation
53 |         map of the same size as input."""
54 |         x = self.extract_feat(img)
55 |         out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg)
56 |         for i in range(1, self.num_stages):
57 |             out = self.decode_head[i].forward_test(x, out, img_metas,
58 |                                                    self.test_cfg)
59 |         out = resize(
60 |             input=out,
61 |             size=img.shape[2:],
62 |             mode='bilinear',
63 |             align_corners=self.align_corners)
64 |         return out
65 | 
66 |     def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg):
67 |         """Run forward function and calculate loss for decode head in
68 |         training."""
69 |         losses = dict()
70 | 
71 |         loss_decode = self.decode_head[0].forward_train(
72 |             x, img_metas, gt_semantic_seg, self.train_cfg)
73 | 
74 |         losses.update(add_prefix(loss_decode, 'decode_0'))
75 | 
76 |         for i in range(1, self.num_stages):
77 |             # forward test again, maybe unnecessary for most methods.
78 |             prev_outputs = self.decode_head[i - 1].forward_test(
79 |                 x, img_metas, self.test_cfg)
80 |             loss_decode = self.decode_head[i].forward_train(
81 |                 x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg)
82 |             losses.update(add_prefix(loss_decode, f'decode_{i}'))
83 | 
84 |         return losses
85 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/README.md:
--------------------------------------------------------------------------------
 1 | # ADE20k Semantic segmentation with BEiT
 2 | 
 3 | ## Getting started 
 4 | 
 5 | 1. Install the [mmsegmentation](https://github.com/open-mmlab/mmsegmentation) library and some required packages.
 6 | 
 7 | ```bash
 8 | pip install mmcv-full==1.3.0 mmsegmentation==0.11.0
 9 | pip install scipy timm==0.3.2
10 | ```
11 | 
12 | 2. Install [apex](https://github.com/NVIDIA/apex) for mixed-precision training
13 | 
14 | ```bash
15 | git clone https://github.com/NVIDIA/apex
16 | cd apex
17 | pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
18 | ```
19 | 
20 | 3. Follow the guide in [mmseg](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#ade20k) to prepare the ADE20k dataset.
21 | 
22 | 
23 | ## Fine-tuning
24 | 
25 | Command format:
26 | ```
27 | tools/dist_train.sh <CONFIG_PATH> <NUM_GPUS>  --work-dir <SAVE_PATH> --seed 0  --deterministic --options model.pretrained=<IMAGENET_CHECKPOINT_PATH/URL>
28 | ```
29 | 
30 | For example, using a BEiT-base backbone with UperNet:
31 | ```bash
32 | bash tools/dist_train.sh \
33 |     configs/beit/upernet/upernet_beit_base_12_640_slide_160k_ade20k_pt2ft.py 8 \
34 |     --work-dir /path/to/save --seed 0  --deterministic \
35 |     --options model.pretrained=https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_224_pt22k_ft22k.pth?sv=2021-10-04&st=2023-06-08T11%3A16%3A02Z&se=2033-06-09T11%3A16%3A00Z&sr=c&sp=r&sig=N4pfCVmSeq4L4tS8QbrFVsX6f6q844eft8xSuXdxU48%3D
36 | ```
37 | 
38 | More config files can be found at [`configs/beit/upernet`](configs/beit/upernet).
39 | 
40 | 
41 | ## Evaluation
42 | 
43 | Command format:
44 | ```
45 | tools/dist_test.sh  <CONFIG_PATH> <CHECKPOINT_PATH> <NUM_GPUS> --eval mIoU
46 | ```
47 | 
48 | For example, evaluate a BEiT-base backbone with UperNet:
49 | ```bash
50 | bash tools/dist_test.sh configs/beit/upernet/upernet_beit_base_12_640_slide_160k_ade20k_pt2ft.py \ 
51 |     https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_640_pt22k_ft22ktoade20k.pth?sv=2021-10-04&st=2023-06-08T11%3A16%3A02Z&se=2033-06-09T11%3A16%3A00Z&sr=c&sp=r&sig=N4pfCVmSeq4L4tS8QbrFVsX6f6q844eft8xSuXdxU48%3D  4 --eval mIoU
52 | ```
53 | 
54 | Expected results:
55 | ```
56 | +--------+-------+-------+-------+
57 | | Scope  | mIoU  | mAcc  | aAcc  |
58 | +--------+-------+-------+-------+
59 | | global | 53.61 | 64.82 | 84.62 |
60 | +--------+-------+-------+-------+
61 | ```
62 | 
63 | Multi-scale + flip (`\*_ms.py`)
64 | ```
65 | bash tools/dist_test.sh configs/beit/upernet/upernet_beit_base_12_640_slide_160k_ade20k_ms.py \
66 |     https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_640_pt22k_ft22ktoade20k.pth?sv=2021-10-04&st=2023-06-08T11%3A16%3A02Z&se=2033-06-09T11%3A16%3A00Z&sr=c&sp=r&sig=N4pfCVmSeq4L4tS8QbrFVsX6f6q844eft8xSuXdxU48%3D  4 --eval mIoU
67 | ```
68 | 
69 | Expected results:
70 | ```
71 | +--------+-------+-------+------+
72 | | Scope  | mIoU  | mAcc  | aAcc |
73 | +--------+-------+-------+------+
74 | | global | 54.26 | 65.28 | 84.9 |
75 | +--------+-------+-------+------+
76 | ```
77 | 
78 | ---
79 | 
80 | ## Acknowledgment 
81 | 
82 | This code is built using the [mmsegmentation](https://github.com/open-mmlab/mmsegmentation) library, [Timm](https://github.com/rwightman/pytorch-image-models) library, the [Swin](https://github.com/microsoft/Swin-Transformer) repository, [XCiT](https://github.com/facebookresearch/xcit) and the [SETR](https://github.com/fudan-zvg/SETR) repository.
83 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/datasets/loveda.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import os.path as osp
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | from PIL import Image
 7 | 
 8 | from .builder import DATASETS
 9 | from .custom import CustomDataset
10 | 
11 | 
12 | @DATASETS.register_module()
13 | class LoveDADataset(CustomDataset):
14 |     """LoveDA dataset.
15 | 
16 |     In segmentation map annotation for LoveDA, 0 is the ignore index.
17 |     ``reduce_zero_label`` should be set to True. The ``img_suffix`` and
18 |     ``seg_map_suffix`` are both fixed to '.png'.
19 |     """
20 |     CLASSES = ('background', 'building', 'road', 'water', 'barren', 'forest',
21 |                'agricultural')
22 | 
23 |     PALETTE = [[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255],
24 |                [159, 129, 183], [0, 255, 0], [255, 195, 128]]
25 | 
26 |     def __init__(self, **kwargs):
27 |         super(LoveDADataset, self).__init__(
28 |             img_suffix='.png',
29 |             seg_map_suffix='.png',
30 |             reduce_zero_label=True,
31 |             **kwargs)
32 | 
33 |     def results2img(self, results, imgfile_prefix, indices=None):
34 |         """Write the segmentation results to images.
35 | 
36 |         Args:
37 |             results (list[ndarray]): Testing results of the
38 |                 dataset.
39 |             imgfile_prefix (str): The filename prefix of the png files.
40 |                 If the prefix is "somepath/xxx",
41 |                 the png files will be named "somepath/xxx.png".
42 |             indices (list[int], optional): Indices of input results, if not
43 |                 set, all the indices of the dataset will be used.
44 |                 Default: None.
45 | 
46 |         Returns:
47 |             list[str: str]: result txt files which contains corresponding
48 |             semantic segmentation images.
49 |         """
50 | 
51 |         mmcv.mkdir_or_exist(imgfile_prefix)
52 |         result_files = []
53 |         for result, idx in zip(results, indices):
54 | 
55 |             filename = self.img_infos[idx]['filename']
56 |             basename = osp.splitext(osp.basename(filename))[0]
57 | 
58 |             png_filename = osp.join(imgfile_prefix, f'{basename}.png')
59 | 
60 |             # The  index range of official requirement is from 0 to 6.
61 |             output = Image.fromarray(result.astype(np.uint8))
62 |             output.save(png_filename)
63 |             result_files.append(png_filename)
64 | 
65 |         return result_files
66 | 
67 |     def format_results(self, results, imgfile_prefix, indices=None):
68 |         """Format the results into dir (standard format for LoveDA evaluation).
69 | 
70 |         Args:
71 |             results (list): Testing results of the dataset.
72 |             imgfile_prefix (str): The prefix of images files. It
73 |                 includes the file path and the prefix of filename, e.g.,
74 |                 "a/b/prefix".
75 |             indices (list[int], optional): Indices of input results,
76 |                 if not set, all the indices of the dataset will be used.
77 |                 Default: None.
78 | 
79 |         Returns:
80 |             tuple: (result_files, tmp_dir), result_files is a list containing
81 |                 the image paths, tmp_dir is the temporal directory created
82 |                 for saving json/png files when img_prefix is not specified.
83 |         """
84 |         if indices is None:
85 |             indices = list(range(len(self)))
86 | 
87 |         assert isinstance(results, list), 'results must be a list.'
88 |         assert isinstance(indices, list), 'indices must be a list.'
89 | 
90 |         result_files = self.results2img(results, imgfile_prefix, indices)
91 | 
92 |         return result_files
93 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/utils/res_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from mmcv.cnn import build_conv_layer, build_norm_layer
 3 | from mmcv.runner import Sequential
 4 | from torch import nn as nn
 5 | 
 6 | 
 7 | class ResLayer(Sequential):
 8 |     """ResLayer to build ResNet style backbone.
 9 | 
10 |     Args:
11 |         block (nn.Module): block used to build ResLayer.
12 |         inplanes (int): inplanes of block.
13 |         planes (int): planes of block.
14 |         num_blocks (int): number of blocks.
15 |         stride (int): stride of the first block. Default: 1
16 |         avg_down (bool): Use AvgPool instead of stride conv when
17 |             downsampling in the bottleneck. Default: False
18 |         conv_cfg (dict): dictionary to construct and config conv layer.
19 |             Default: None
20 |         norm_cfg (dict): dictionary to construct and config norm layer.
21 |             Default: dict(type='BN')
22 |         multi_grid (int | None): Multi grid dilation rates of last
23 |             stage. Default: None
24 |         contract_dilation (bool): Whether contract first dilation of each layer
25 |             Default: False
26 |     """
27 | 
28 |     def __init__(self,
29 |                  block,
30 |                  inplanes,
31 |                  planes,
32 |                  num_blocks,
33 |                  stride=1,
34 |                  dilation=1,
35 |                  avg_down=False,
36 |                  conv_cfg=None,
37 |                  norm_cfg=dict(type='BN'),
38 |                  multi_grid=None,
39 |                  contract_dilation=False,
40 |                  **kwargs):
41 |         self.block = block
42 | 
43 |         downsample = None
44 |         if stride != 1 or inplanes != planes * block.expansion:
45 |             downsample = []
46 |             conv_stride = stride
47 |             if avg_down:
48 |                 conv_stride = 1
49 |                 downsample.append(
50 |                     nn.AvgPool2d(
51 |                         kernel_size=stride,
52 |                         stride=stride,
53 |                         ceil_mode=True,
54 |                         count_include_pad=False))
55 |             downsample.extend([
56 |                 build_conv_layer(
57 |                     conv_cfg,
58 |                     inplanes,
59 |                     planes * block.expansion,
60 |                     kernel_size=1,
61 |                     stride=conv_stride,
62 |                     bias=False),
63 |                 build_norm_layer(norm_cfg, planes * block.expansion)[1]
64 |             ])
65 |             downsample = nn.Sequential(*downsample)
66 | 
67 |         layers = []
68 |         if multi_grid is None:
69 |             if dilation > 1 and contract_dilation:
70 |                 first_dilation = dilation // 2
71 |             else:
72 |                 first_dilation = dilation
73 |         else:
74 |             first_dilation = multi_grid[0]
75 |         layers.append(
76 |             block(
77 |                 inplanes=inplanes,
78 |                 planes=planes,
79 |                 stride=stride,
80 |                 dilation=first_dilation,
81 |                 downsample=downsample,
82 |                 conv_cfg=conv_cfg,
83 |                 norm_cfg=norm_cfg,
84 |                 **kwargs))
85 |         inplanes = planes * block.expansion
86 |         for i in range(1, num_blocks):
87 |             layers.append(
88 |                 block(
89 |                     inplanes=inplanes,
90 |                     planes=planes,
91 |                     stride=1,
92 |                     dilation=dilation if multi_grid is None else multi_grid[i],
93 |                     conv_cfg=conv_cfg,
94 |                     norm_cfg=norm_cfg,
95 |                     **kwargs))
96 |         super(ResLayer, self).__init__(*layers)
97 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/core/seg/sampler/ohem_pixel_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from ..builder import PIXEL_SAMPLERS
 7 | from .base_pixel_sampler import BasePixelSampler
 8 | 
 9 | 
10 | @PIXEL_SAMPLERS.register_module()
11 | class OHEMPixelSampler(BasePixelSampler):
12 |     """Online Hard Example Mining Sampler for segmentation.
13 | 
14 |     Args:
15 |         context (nn.Module): The context of sampler, subclass of
16 |             :obj:`BaseDecodeHead`.
17 |         thresh (float, optional): The threshold for hard example selection.
18 |             Below which, are prediction with low confidence. If not
19 |             specified, the hard examples will be pixels of top ``min_kept``
20 |             loss. Default: None.
21 |         min_kept (int, optional): The minimum number of predictions to keep.
22 |             Default: 100000.
23 |     """
24 | 
25 |     def __init__(self, context, thresh=None, min_kept=100000):
26 |         super(OHEMPixelSampler, self).__init__()
27 |         self.context = context
28 |         assert min_kept > 1
29 |         self.thresh = thresh
30 |         self.min_kept = min_kept
31 | 
32 |     def sample(self, seg_logit, seg_label):
33 |         """Sample pixels that have high loss or with low prediction confidence.
34 | 
35 |         Args:
36 |             seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W)
37 |             seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W)
38 | 
39 |         Returns:
40 |             torch.Tensor: segmentation weight, shape (N, H, W)
41 |         """
42 |         with torch.no_grad():
43 |             assert seg_logit.shape[2:] == seg_label.shape[2:]
44 |             assert seg_label.shape[1] == 1
45 |             seg_label = seg_label.squeeze(1).long()
46 |             batch_kept = self.min_kept * seg_label.size(0)
47 |             valid_mask = seg_label != self.context.ignore_index
48 |             seg_weight = seg_logit.new_zeros(size=seg_label.size())
49 |             valid_seg_weight = seg_weight[valid_mask]
50 |             if self.thresh is not None:
51 |                 seg_prob = F.softmax(seg_logit, dim=1)
52 | 
53 |                 tmp_seg_label = seg_label.clone().unsqueeze(1)
54 |                 tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0
55 |                 seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1)
56 |                 sort_prob, sort_indices = seg_prob[valid_mask].sort()
57 | 
58 |                 if sort_prob.numel() > 0:
59 |                     min_threshold = sort_prob[min(batch_kept,
60 |                                                   sort_prob.numel() - 1)]
61 |                 else:
62 |                     min_threshold = 0.0
63 |                 threshold = max(min_threshold, self.thresh)
64 |                 valid_seg_weight[seg_prob[valid_mask] < threshold] = 1.
65 |             else:
66 |                 if not isinstance(self.context.loss_decode, nn.ModuleList):
67 |                     losses_decode = [self.context.loss_decode]
68 |                 else:
69 |                     losses_decode = self.context.loss_decode
70 |                 losses = 0.0
71 |                 for loss_module in losses_decode:
72 |                     losses += loss_module(
73 |                         seg_logit,
74 |                         seg_label,
75 |                         weight=None,
76 |                         ignore_index=self.context.ignore_index,
77 |                         reduction_override='none')
78 | 
79 |                 # faster than topk according to https://github.com/pytorch/pytorch/issues/22812  # noqa
80 |                 _, sort_indices = losses[valid_mask].sort(descending=True)
81 |                 valid_seg_weight[sort_indices[:batch_kept]] = 1.
82 | 
83 |             seg_weight[valid_mask] = valid_seg_weight
84 | 
85 |             return seg_weight
86 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/psp_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | import torch.nn as nn
  4 | from mmcv.cnn import ConvModule
  5 | 
  6 | from mmseg.ops import resize
  7 | from ..builder import HEADS
  8 | from .decode_head import BaseDecodeHead
  9 | 
 10 | 
 11 | class PPM(nn.ModuleList):
 12 |     """Pooling Pyramid Module used in PSPNet.
 13 | 
 14 |     Args:
 15 |         pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
 16 |             Module.
 17 |         in_channels (int): Input channels.
 18 |         channels (int): Channels after modules, before conv_seg.
 19 |         conv_cfg (dict|None): Config of conv layers.
 20 |         norm_cfg (dict|None): Config of norm layers.
 21 |         act_cfg (dict): Config of activation layers.
 22 |         align_corners (bool): align_corners argument of F.interpolate.
 23 |     """
 24 | 
 25 |     def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg,
 26 |                  act_cfg, align_corners, **kwargs):
 27 |         super(PPM, self).__init__()
 28 |         self.pool_scales = pool_scales
 29 |         self.align_corners = align_corners
 30 |         self.in_channels = in_channels
 31 |         self.channels = channels
 32 |         self.conv_cfg = conv_cfg
 33 |         self.norm_cfg = norm_cfg
 34 |         self.act_cfg = act_cfg
 35 |         for pool_scale in pool_scales:
 36 |             self.append(
 37 |                 nn.Sequential(
 38 |                     nn.AdaptiveAvgPool2d(pool_scale),
 39 |                     ConvModule(
 40 |                         self.in_channels,
 41 |                         self.channels,
 42 |                         1,
 43 |                         conv_cfg=self.conv_cfg,
 44 |                         norm_cfg=self.norm_cfg,
 45 |                         act_cfg=self.act_cfg,
 46 |                         **kwargs)))
 47 | 
 48 |     def forward(self, x):
 49 |         """Forward function."""
 50 |         ppm_outs = []
 51 |         for ppm in self:
 52 |             ppm_out = ppm(x)
 53 |             upsampled_ppm_out = resize(
 54 |                 ppm_out,
 55 |                 size=x.size()[2:],
 56 |                 mode='bilinear',
 57 |                 align_corners=self.align_corners)
 58 |             ppm_outs.append(upsampled_ppm_out)
 59 |         return ppm_outs
 60 | 
 61 | 
 62 | @HEADS.register_module()
 63 | class PSPHead(BaseDecodeHead):
 64 |     """Pyramid Scene Parsing Network.
 65 | 
 66 |     This head is the implementation of
 67 |     `PSPNet <https://arxiv.org/abs/1612.01105>`_.
 68 | 
 69 |     Args:
 70 |         pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
 71 |             Module. Default: (1, 2, 3, 6).
 72 |     """
 73 | 
 74 |     def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
 75 |         super(PSPHead, self).__init__(**kwargs)
 76 |         assert isinstance(pool_scales, (list, tuple))
 77 |         self.pool_scales = pool_scales
 78 |         self.psp_modules = PPM(
 79 |             self.pool_scales,
 80 |             self.in_channels,
 81 |             self.channels,
 82 |             conv_cfg=self.conv_cfg,
 83 |             norm_cfg=self.norm_cfg,
 84 |             act_cfg=self.act_cfg,
 85 |             align_corners=self.align_corners)
 86 |         self.bottleneck = ConvModule(
 87 |             self.in_channels + len(pool_scales) * self.channels,
 88 |             self.channels,
 89 |             3,
 90 |             padding=1,
 91 |             conv_cfg=self.conv_cfg,
 92 |             norm_cfg=self.norm_cfg,
 93 |             act_cfg=self.act_cfg)
 94 | 
 95 |     def forward(self, inputs):
 96 |         """Forward function."""
 97 |         x = self._transform_inputs(inputs)
 98 |         psp_outs = [x]
 99 |         psp_outs.extend(self.psp_modules(x))
100 |         psp_outs = torch.cat(psp_outs, dim=1)
101 |         output = self.bottleneck(psp_outs)
102 |         output = self.cls_seg(output)
103 |         return output
104 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmcv_custom/layer_decay_optimizer_constructor.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
 3 | from mmcv.runner import get_dist_info
 4 | 
 5 | 
 6 | def get_num_layer_for_vit(var_name, num_max_layer):
 7 |     if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
 8 |         return 0
 9 |     elif var_name.startswith("backbone.patch_embed"):
10 |         return 0
11 |     elif var_name.startswith("backbone.blocks"):
12 |         layer_id = int(var_name.split('.')[2])
13 |         return layer_id + 1
14 |     else:
15 |         return num_max_layer - 1
16 | 
17 | 
18 | @OPTIMIZER_BUILDERS.register_module()
19 | class LayerDecayOptimizerConstructor(DefaultOptimizerConstructor):
20 |     def add_params(self, params, module, prefix='', is_dcn_module=None):
21 |         """Add all parameters of module to the params list.
22 |         The parameters of the given module will be added to the list of param
23 |         groups, with specific rules defined by paramwise_cfg.
24 |         Args:
25 |             params (list[dict]): A list of param groups, it will be modified
26 |                 in place.
27 |             module (nn.Module): The module to be added.
28 |             prefix (str): The prefix of the module
29 |             is_dcn_module (int|float|None): If the current module is a
30 |                 submodule of DCN, `is_dcn_module` will be passed to
31 |                 control conv_offset layer's learning rate. Defaults to None.
32 |         """
33 |         parameter_groups = {}
34 |         print(self.paramwise_cfg)
35 |         num_layers = self.paramwise_cfg.get('num_layers') + 2
36 |         layer_decay_rate = self.paramwise_cfg.get('layer_decay_rate')
37 |         print("Build LayerDecayOptimizerConstructor %f - %d" % (layer_decay_rate, num_layers))
38 |         weight_decay = self.base_wd
39 | 
40 |         for name, param in module.named_parameters():
41 |             if not param.requires_grad:
42 |                 continue  # frozen weights
43 |             if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'):
44 |                 group_name = "no_decay"
45 |                 this_weight_decay = 0.
46 |             else:
47 |                 group_name = "decay"
48 |                 this_weight_decay = weight_decay
49 | 
50 |             layer_id = get_num_layer_for_vit(name, num_layers)
51 |             group_name = "layer_%d_%s" % (layer_id, group_name)
52 | 
53 |             if group_name not in parameter_groups:
54 |                 scale = layer_decay_rate ** (num_layers - layer_id - 1)
55 | 
56 |                 parameter_groups[group_name] = {
57 |                     "weight_decay": this_weight_decay,
58 |                     "params": [],
59 |                     "param_names": [], 
60 |                     "lr_scale": scale, 
61 |                     "group_name": group_name, 
62 |                     "lr": scale * self.base_lr, 
63 |                 }
64 | 
65 |             parameter_groups[group_name]["params"].append(param)
66 |             parameter_groups[group_name]["param_names"].append(name)
67 |         rank, _ = get_dist_info()
68 |         if rank == 0:
69 |             to_display = {}
70 |             for key in parameter_groups:
71 |                 to_display[key] = {
72 |                     "param_names": parameter_groups[key]["param_names"], 
73 |                     "lr_scale": parameter_groups[key]["lr_scale"], 
74 |                     "lr": parameter_groups[key]["lr"], 
75 |                     "weight_decay": parameter_groups[key]["weight_decay"], 
76 |                 }
77 |             print("Param groups = %s" % json.dumps(to_display, indent=2))
78 |         
79 |         # state_dict = module.state_dict()
80 |         # for group_name in parameter_groups:
81 |         #     group = parameter_groups[group_name]
82 |         #     for name in group["param_names"]:
83 |         #         group["params"].append(state_dict[name])
84 |         params.extend(parameter_groups.values())
85 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/aspp_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | import torch.nn as nn
  4 | from mmcv.cnn import ConvModule
  5 | 
  6 | from mmseg.ops import resize
  7 | from ..builder import HEADS
  8 | from .decode_head import BaseDecodeHead
  9 | 
 10 | 
 11 | class ASPPModule(nn.ModuleList):
 12 |     """Atrous Spatial Pyramid Pooling (ASPP) Module.
 13 | 
 14 |     Args:
 15 |         dilations (tuple[int]): Dilation rate of each layer.
 16 |         in_channels (int): Input channels.
 17 |         channels (int): Channels after modules, before conv_seg.
 18 |         conv_cfg (dict|None): Config of conv layers.
 19 |         norm_cfg (dict|None): Config of norm layers.
 20 |         act_cfg (dict): Config of activation layers.
 21 |     """
 22 | 
 23 |     def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg,
 24 |                  act_cfg):
 25 |         super(ASPPModule, self).__init__()
 26 |         self.dilations = dilations
 27 |         self.in_channels = in_channels
 28 |         self.channels = channels
 29 |         self.conv_cfg = conv_cfg
 30 |         self.norm_cfg = norm_cfg
 31 |         self.act_cfg = act_cfg
 32 |         for dilation in dilations:
 33 |             self.append(
 34 |                 ConvModule(
 35 |                     self.in_channels,
 36 |                     self.channels,
 37 |                     1 if dilation == 1 else 3,
 38 |                     dilation=dilation,
 39 |                     padding=0 if dilation == 1 else dilation,
 40 |                     conv_cfg=self.conv_cfg,
 41 |                     norm_cfg=self.norm_cfg,
 42 |                     act_cfg=self.act_cfg))
 43 | 
 44 |     def forward(self, x):
 45 |         """Forward function."""
 46 |         aspp_outs = []
 47 |         for aspp_module in self:
 48 |             aspp_outs.append(aspp_module(x))
 49 | 
 50 |         return aspp_outs
 51 | 
 52 | 
 53 | @HEADS.register_module()
 54 | class ASPPHead(BaseDecodeHead):
 55 |     """Rethinking Atrous Convolution for Semantic Image Segmentation.
 56 | 
 57 |     This head is the implementation of `DeepLabV3
 58 |     <https://arxiv.org/abs/1706.05587>`_.
 59 | 
 60 |     Args:
 61 |         dilations (tuple[int]): Dilation rates for ASPP module.
 62 |             Default: (1, 6, 12, 18).
 63 |     """
 64 | 
 65 |     def __init__(self, dilations=(1, 6, 12, 18), **kwargs):
 66 |         super(ASPPHead, self).__init__(**kwargs)
 67 |         assert isinstance(dilations, (list, tuple))
 68 |         self.dilations = dilations
 69 |         self.image_pool = nn.Sequential(
 70 |             nn.AdaptiveAvgPool2d(1),
 71 |             ConvModule(
 72 |                 self.in_channels,
 73 |                 self.channels,
 74 |                 1,
 75 |                 conv_cfg=self.conv_cfg,
 76 |                 norm_cfg=self.norm_cfg,
 77 |                 act_cfg=self.act_cfg))
 78 |         self.aspp_modules = ASPPModule(
 79 |             dilations,
 80 |             self.in_channels,
 81 |             self.channels,
 82 |             conv_cfg=self.conv_cfg,
 83 |             norm_cfg=self.norm_cfg,
 84 |             act_cfg=self.act_cfg)
 85 |         self.bottleneck = ConvModule(
 86 |             (len(dilations) + 1) * self.channels,
 87 |             self.channels,
 88 |             3,
 89 |             padding=1,
 90 |             conv_cfg=self.conv_cfg,
 91 |             norm_cfg=self.norm_cfg,
 92 |             act_cfg=self.act_cfg)
 93 | 
 94 |     def forward(self, inputs):
 95 |         """Forward function."""
 96 |         x = self._transform_inputs(inputs)
 97 |         aspp_outs = [
 98 |             resize(
 99 |                 self.image_pool(x),
100 |                 size=x.size()[2:],
101 |                 mode='bilinear',
102 |                 align_corners=self.align_corners)
103 |         ]
104 |         aspp_outs.extend(self.aspp_modules(x))
105 |         aspp_outs = torch.cat(aspp_outs, dim=1)
106 |         output = self.bottleneck(aspp_outs)
107 |         output = self.cls_seg(output)
108 |         return output
109 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/sep_aspp_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | import torch.nn as nn
  4 | from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule
  5 | 
  6 | from mmseg.ops import resize
  7 | from ..builder import HEADS
  8 | from .aspp_head import ASPPHead, ASPPModule
  9 | 
 10 | 
 11 | class DepthwiseSeparableASPPModule(ASPPModule):
 12 |     """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable
 13 |     conv."""
 14 | 
 15 |     def __init__(self, **kwargs):
 16 |         super(DepthwiseSeparableASPPModule, self).__init__(**kwargs)
 17 |         for i, dilation in enumerate(self.dilations):
 18 |             if dilation > 1:
 19 |                 self[i] = DepthwiseSeparableConvModule(
 20 |                     self.in_channels,
 21 |                     self.channels,
 22 |                     3,
 23 |                     dilation=dilation,
 24 |                     padding=dilation,
 25 |                     norm_cfg=self.norm_cfg,
 26 |                     act_cfg=self.act_cfg)
 27 | 
 28 | 
 29 | @HEADS.register_module()
 30 | class DepthwiseSeparableASPPHead(ASPPHead):
 31 |     """Encoder-Decoder with Atrous Separable Convolution for Semantic Image
 32 |     Segmentation.
 33 | 
 34 |     This head is the implementation of `DeepLabV3+
 35 |     <https://arxiv.org/abs/1802.02611>`_.
 36 | 
 37 |     Args:
 38 |         c1_in_channels (int): The input channels of c1 decoder. If is 0,
 39 |             the no decoder will be used.
 40 |         c1_channels (int): The intermediate channels of c1 decoder.
 41 |     """
 42 | 
 43 |     def __init__(self, c1_in_channels, c1_channels, **kwargs):
 44 |         super(DepthwiseSeparableASPPHead, self).__init__(**kwargs)
 45 |         assert c1_in_channels >= 0
 46 |         self.aspp_modules = DepthwiseSeparableASPPModule(
 47 |             dilations=self.dilations,
 48 |             in_channels=self.in_channels,
 49 |             channels=self.channels,
 50 |             conv_cfg=self.conv_cfg,
 51 |             norm_cfg=self.norm_cfg,
 52 |             act_cfg=self.act_cfg)
 53 |         if c1_in_channels > 0:
 54 |             self.c1_bottleneck = ConvModule(
 55 |                 c1_in_channels,
 56 |                 c1_channels,
 57 |                 1,
 58 |                 conv_cfg=self.conv_cfg,
 59 |                 norm_cfg=self.norm_cfg,
 60 |                 act_cfg=self.act_cfg)
 61 |         else:
 62 |             self.c1_bottleneck = None
 63 |         self.sep_bottleneck = nn.Sequential(
 64 |             DepthwiseSeparableConvModule(
 65 |                 self.channels + c1_channels,
 66 |                 self.channels,
 67 |                 3,
 68 |                 padding=1,
 69 |                 norm_cfg=self.norm_cfg,
 70 |                 act_cfg=self.act_cfg),
 71 |             DepthwiseSeparableConvModule(
 72 |                 self.channels,
 73 |                 self.channels,
 74 |                 3,
 75 |                 padding=1,
 76 |                 norm_cfg=self.norm_cfg,
 77 |                 act_cfg=self.act_cfg))
 78 | 
 79 |     def forward(self, inputs):
 80 |         """Forward function."""
 81 |         x = self._transform_inputs(inputs)
 82 |         aspp_outs = [
 83 |             resize(
 84 |                 self.image_pool(x),
 85 |                 size=x.size()[2:],
 86 |                 mode='bilinear',
 87 |                 align_corners=self.align_corners)
 88 |         ]
 89 |         aspp_outs.extend(self.aspp_modules(x))
 90 |         aspp_outs = torch.cat(aspp_outs, dim=1)
 91 |         output = self.bottleneck(aspp_outs)
 92 |         if self.c1_bottleneck is not None:
 93 |             c1_output = self.c1_bottleneck(inputs[0])
 94 |             output = resize(
 95 |                 input=output,
 96 |                 size=c1_output.shape[2:],
 97 |                 mode='bilinear',
 98 |                 align_corners=self.align_corners)
 99 |             output = torch.cat([output, c1_output], dim=1)
100 |         output = self.sep_bottleneck(output)
101 |         output = self.cls_seg(output)
102 |         return output
103 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/stdc_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | from ..builder import HEADS
 6 | from .fcn_head import FCNHead
 7 | 
 8 | 
 9 | @HEADS.register_module()
10 | class STDCHead(FCNHead):
11 |     """This head is the implementation of `Rethinking BiSeNet For Real-time
12 |     Semantic Segmentation <https://arxiv.org/abs/2104.13188>`_.
13 | 
14 |     Args:
15 |         boundary_threshold (float): The threshold of calculating boundary.
16 |             Default: 0.1.
17 |     """
18 | 
19 |     def __init__(self, boundary_threshold=0.1, **kwargs):
20 |         super(STDCHead, self).__init__(**kwargs)
21 |         self.boundary_threshold = boundary_threshold
22 |         # Using register buffer to make laplacian kernel on the same
23 |         # device of `seg_label`.
24 |         self.register_buffer(
25 |             'laplacian_kernel',
26 |             torch.tensor([-1, -1, -1, -1, 8, -1, -1, -1, -1],
27 |                          dtype=torch.float32,
28 |                          requires_grad=False).reshape((1, 1, 3, 3)))
29 |         self.fusion_kernel = torch.nn.Parameter(
30 |             torch.tensor([[6. / 10], [3. / 10], [1. / 10]],
31 |                          dtype=torch.float32).reshape(1, 3, 1, 1),
32 |             requires_grad=False)
33 | 
34 |     def losses(self, seg_logit, seg_label):
35 |         """Compute Detail Aggregation Loss."""
36 |         # Note: The paper claims `fusion_kernel` is a trainable 1x1 conv
37 |         # parameters. However, it is a constant in original repo and other
38 |         # codebase because it would not be added into computation graph
39 |         # after threshold operation.
40 |         seg_label = seg_label.float()
41 |         boundary_targets = F.conv2d(
42 |             seg_label, self.laplacian_kernel, padding=1)
43 |         boundary_targets = boundary_targets.clamp(min=0)
44 |         boundary_targets[boundary_targets > self.boundary_threshold] = 1
45 |         boundary_targets[boundary_targets <= self.boundary_threshold] = 0
46 | 
47 |         boundary_targets_x2 = F.conv2d(
48 |             seg_label, self.laplacian_kernel, stride=2, padding=1)
49 |         boundary_targets_x2 = boundary_targets_x2.clamp(min=0)
50 | 
51 |         boundary_targets_x4 = F.conv2d(
52 |             seg_label, self.laplacian_kernel, stride=4, padding=1)
53 |         boundary_targets_x4 = boundary_targets_x4.clamp(min=0)
54 | 
55 |         boundary_targets_x4_up = F.interpolate(
56 |             boundary_targets_x4, boundary_targets.shape[2:], mode='nearest')
57 |         boundary_targets_x2_up = F.interpolate(
58 |             boundary_targets_x2, boundary_targets.shape[2:], mode='nearest')
59 | 
60 |         boundary_targets_x2_up[
61 |             boundary_targets_x2_up > self.boundary_threshold] = 1
62 |         boundary_targets_x2_up[
63 |             boundary_targets_x2_up <= self.boundary_threshold] = 0
64 | 
65 |         boundary_targets_x4_up[
66 |             boundary_targets_x4_up > self.boundary_threshold] = 1
67 |         boundary_targets_x4_up[
68 |             boundary_targets_x4_up <= self.boundary_threshold] = 0
69 | 
70 |         boudary_targets_pyramids = torch.stack(
71 |             (boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up),
72 |             dim=1)
73 | 
74 |         boudary_targets_pyramids = boudary_targets_pyramids.squeeze(2)
75 |         boudary_targets_pyramid = F.conv2d(boudary_targets_pyramids,
76 |                                            self.fusion_kernel)
77 | 
78 |         boudary_targets_pyramid[
79 |             boudary_targets_pyramid > self.boundary_threshold] = 1
80 |         boudary_targets_pyramid[
81 |             boudary_targets_pyramid <= self.boundary_threshold] = 0
82 | 
83 |         seg_logit = F.interpolate(
84 |             seg_logit,
85 |             boundary_targets.shape[2:],
86 |             mode='bilinear',
87 |             align_corners=True)
88 |         loss = super(STDCHead, self).losses(seg_logit,
89 |                                             boudary_targets_pyramid.long())
90 |         return loss
91 | 


--------------------------------------------------------------------------------
/datasets/food101.py:
--------------------------------------------------------------------------------
 1 | # copy-paste from https://github.com/pytorch/vision/blob/main/torchvision/datasets/food101.py
 2 | import json
 3 | from pathlib import Path
 4 | from typing import Any, Tuple, Callable, Optional
 5 | 
 6 | import PIL.Image
 7 | 
 8 | from torchvision.datasets.utils import verify_str_arg, download_and_extract_archive
 9 | from torchvision.datasets.vision import VisionDataset
10 | 
11 | 
12 | class Food101(VisionDataset):
13 |     """`The Food-101 Data Set <https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/>`_.
14 | 
15 |     The Food-101 is a challenging data set of 101 food categories, with 101'000 images.
16 |     For each class, 250 manually reviewed test images are provided as well as 750 training images.
17 |     On purpose, the training images were not cleaned, and thus still contain some amount of noise.
18 |     This comes mostly in the form of intense colors and sometimes wrong labels. All images were
19 |     rescaled to have a maximum side length of 512 pixels.
20 | 
21 | 
22 |     Args:
23 |         root (string): Root directory of the dataset.
24 |         split (string, optional): The dataset split, supports ``"train"`` (default) and ``"test"``.
25 |         transform (callable, optional): A function/transform that  takes in an PIL image and returns a transformed
26 |             version. E.g, ``transforms.RandomCrop``.
27 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
28 |         download (bool, optional): If True, downloads the dataset from the internet and
29 |             puts it in root directory. If dataset is already downloaded, it is not
30 |             downloaded again. Default is False.
31 |     """
32 | 
33 |     _URL = "http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz"
34 |     _MD5 = "85eeb15f3717b99a5da872d97d918f87"
35 | 
36 |     def __init__(
37 |         self,
38 |         root: str,
39 |         split: str = "train",
40 |         transform: Optional[Callable] = None,
41 |         target_transform: Optional[Callable] = None,
42 |         download: bool = False,
43 |     ) -> None:
44 |         super().__init__(root, transform=transform, target_transform=target_transform)
45 |         self._split = verify_str_arg(split, "split", ("train", "test"))
46 |         self._base_folder = Path(self.root) / "food-101"
47 |         self._meta_folder = self._base_folder / "meta"
48 |         self._images_folder = self._base_folder / "images"
49 | 
50 |         if download:
51 |             self._download()
52 | 
53 |         if not self._check_exists():
54 |             raise RuntimeError("Dataset not found. You can use download=True to download it")
55 | 
56 |         self._labels = []
57 |         self._image_files = []
58 |         with open(self._meta_folder / f"{split}.json") as f:
59 |             metadata = json.loads(f.read())
60 | 
61 |         self.classes = sorted(metadata.keys())
62 |         self.class_to_idx = dict(zip(self.classes, range(len(self.classes))))
63 | 
64 |         for class_label, im_rel_paths in metadata.items():
65 |             self._labels += [self.class_to_idx[class_label]] * len(im_rel_paths)
66 |             self._image_files += [
67 |                 self._images_folder.joinpath(*f"{im_rel_path}.jpg".split("/")) for im_rel_path in im_rel_paths
68 |             ]
69 | 
70 |     def __len__(self) -> int:
71 |         return len(self._image_files)
72 | 
73 |     def __getitem__(self, idx) -> Tuple[Any, Any]:
74 |         image_file, label = self._image_files[idx], self._labels[idx]
75 |         image = PIL.Image.open(image_file).convert("RGB")
76 | 
77 |         if self.transform:
78 |             image = self.transform(image)
79 | 
80 |         if self.target_transform:
81 |             label = self.target_transform(label)
82 | 
83 |         return image, label
84 | 
85 |     def extra_repr(self) -> str:
86 |         return f"split={self._split}"
87 | 
88 |     def _check_exists(self) -> bool:
89 |         return all(folder.exists() and folder.is_dir() for folder in (self._meta_folder, self._images_folder))
90 | 
91 |     def _download(self) -> None:
92 |         if self._check_exists():
93 |             return
94 |         download_and_extract_archive(self._URL, download_root=self.root, md5=self._MD5)
95 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/losses/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import functools
  3 | 
  4 | import mmcv
  5 | import numpy as np
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | def get_class_weight(class_weight):
 10 |     """Get class weight for loss function.
 11 | 
 12 |     Args:
 13 |         class_weight (list[float] | str | None): If class_weight is a str,
 14 |             take it as a file name and read from it.
 15 |     """
 16 |     if isinstance(class_weight, str):
 17 |         # take it as a file path
 18 |         if class_weight.endswith('.npy'):
 19 |             class_weight = np.load(class_weight)
 20 |         else:
 21 |             # pkl, json or yaml
 22 |             class_weight = mmcv.load(class_weight)
 23 | 
 24 |     return class_weight
 25 | 
 26 | 
 27 | def reduce_loss(loss, reduction):
 28 |     """Reduce loss as specified.
 29 | 
 30 |     Args:
 31 |         loss (Tensor): Elementwise loss tensor.
 32 |         reduction (str): Options are "none", "mean" and "sum".
 33 | 
 34 |     Return:
 35 |         Tensor: Reduced loss tensor.
 36 |     """
 37 |     reduction_enum = F._Reduction.get_enum(reduction)
 38 |     # none: 0, elementwise_mean:1, sum: 2
 39 |     if reduction_enum == 0:
 40 |         return loss
 41 |     elif reduction_enum == 1:
 42 |         return loss.mean()
 43 |     elif reduction_enum == 2:
 44 |         return loss.sum()
 45 | 
 46 | 
 47 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
 48 |     """Apply element-wise weight and reduce loss.
 49 | 
 50 |     Args:
 51 |         loss (Tensor): Element-wise loss.
 52 |         weight (Tensor): Element-wise weights.
 53 |         reduction (str): Same as built-in losses of PyTorch.
 54 |         avg_factor (float): Average factor when computing the mean of losses.
 55 | 
 56 |     Returns:
 57 |         Tensor: Processed loss values.
 58 |     """
 59 |     # if weight is specified, apply element-wise weight
 60 |     if weight is not None:
 61 |         assert weight.dim() == loss.dim()
 62 |         if weight.dim() > 1:
 63 |             assert weight.size(1) == 1 or weight.size(1) == loss.size(1)
 64 |         loss = loss * weight
 65 | 
 66 |     # if avg_factor is not specified, just reduce the loss
 67 |     if avg_factor is None:
 68 |         loss = reduce_loss(loss, reduction)
 69 |     else:
 70 |         # if reduction is mean, then average the loss by avg_factor
 71 |         if reduction == 'mean':
 72 |             loss = loss.sum() / avg_factor
 73 |         # if reduction is 'none', then do nothing, otherwise raise an error
 74 |         elif reduction != 'none':
 75 |             raise ValueError('avg_factor can not be used with reduction="sum"')
 76 |     return loss
 77 | 
 78 | 
 79 | def weighted_loss(loss_func):
 80 |     """Create a weighted version of a given loss function.
 81 | 
 82 |     To use this decorator, the loss function must have the signature like
 83 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
 84 |     element-wise loss without any reduction. This decorator will add weight
 85 |     and reduction arguments to the function. The decorated function will have
 86 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
 87 |     avg_factor=None, **kwargs)`.
 88 | 
 89 |     :Example:
 90 | 
 91 |     >>> import torch
 92 |     >>> @weighted_loss
 93 |     >>> def l1_loss(pred, target):
 94 |     >>>     return (pred - target).abs()
 95 | 
 96 |     >>> pred = torch.Tensor([0, 2, 3])
 97 |     >>> target = torch.Tensor([1, 1, 1])
 98 |     >>> weight = torch.Tensor([1, 0, 1])
 99 | 
100 |     >>> l1_loss(pred, target)
101 |     tensor(1.3333)
102 |     >>> l1_loss(pred, target, weight)
103 |     tensor(1.)
104 |     >>> l1_loss(pred, target, reduction='none')
105 |     tensor([1., 1., 2.])
106 |     >>> l1_loss(pred, target, weight, avg_factor=2)
107 |     tensor(1.5000)
108 |     """
109 | 
110 |     @functools.wraps(loss_func)
111 |     def wrapper(pred,
112 |                 target,
113 |                 weight=None,
114 |                 reduction='mean',
115 |                 avg_factor=None,
116 |                 **kwargs):
117 |         # get element-wise loss
118 |         loss = loss_func(pred, target, **kwargs)
119 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
120 |         return loss
121 | 
122 |     return wrapper
123 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmcv_custom/apex_runner/apex_iter_based_runner.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Open-MMLab. All rights reserved.
  2 | import os.path as osp
  3 | import platform
  4 | import shutil
  5 | 
  6 | import torch
  7 | from torch.optim import Optimizer
  8 | 
  9 | import mmcv
 10 | from mmcv.runner import RUNNERS, IterBasedRunner
 11 | from .checkpoint import save_checkpoint
 12 | 
 13 | try:
 14 |     import apex
 15 | except:
 16 |     print('apex is not installed')
 17 | 
 18 | 
 19 | @RUNNERS.register_module()
 20 | class IterBasedRunnerAmp(IterBasedRunner):
 21 |     """Iteration-based Runner with AMP support.
 22 | 
 23 |     This runner train models iteration by iteration.
 24 |     """
 25 | 
 26 |     def save_checkpoint(self,
 27 |                         out_dir,
 28 |                         filename_tmpl='iter_{}.pth',
 29 |                         meta=None,
 30 |                         save_optimizer=True,
 31 |                         create_symlink=False):
 32 |         """Save checkpoint to file.
 33 | 
 34 |         Args:
 35 |             out_dir (str): Directory to save checkpoint files.
 36 |             filename_tmpl (str, optional): Checkpoint file template.
 37 |                 Defaults to 'iter_{}.pth'.
 38 |             meta (dict, optional): Metadata to be saved in checkpoint.
 39 |                 Defaults to None.
 40 |             save_optimizer (bool, optional): Whether save optimizer.
 41 |                 Defaults to True.
 42 |             create_symlink (bool, optional): Whether create symlink to the
 43 |                 latest checkpoint file. Defaults to True.
 44 |         """
 45 |         if meta is None:
 46 |             meta = dict(iter=self.iter + 1, epoch=self.epoch + 1)
 47 |         elif isinstance(meta, dict):
 48 |             meta.update(iter=self.iter + 1, epoch=self.epoch + 1)
 49 |         else:
 50 |             raise TypeError(
 51 |                 f'meta should be a dict or None, but got {type(meta)}')
 52 |         if self.meta is not None:
 53 |             meta.update(self.meta)
 54 | 
 55 |         filename = filename_tmpl.format(self.iter + 1)
 56 |         filepath = osp.join(out_dir, filename)
 57 |         optimizer = self.optimizer if save_optimizer else None
 58 |         save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
 59 |         # in some environments, `os.symlink` is not supported, you may need to
 60 |         # set `create_symlink` to False
 61 |         # if create_symlink:
 62 |         #     dst_file = osp.join(out_dir, 'latest.pth')
 63 |         #     if platform.system() != 'Windows':
 64 |         #         mmcv.symlink(filename, dst_file)
 65 |         #     else:
 66 |         #         shutil.copy(filepath, dst_file)
 67 | 
 68 |     def resume(self,
 69 |                checkpoint,
 70 |                resume_optimizer=True,
 71 |                map_location='default'):
 72 |         if map_location == 'default':
 73 |             if torch.cuda.is_available():
 74 |                 device_id = torch.cuda.current_device()
 75 |                 checkpoint = self.load_checkpoint(
 76 |                     checkpoint,
 77 |                     map_location=lambda storage, loc: storage.cuda(device_id))
 78 |             else:
 79 |                 checkpoint = self.load_checkpoint(checkpoint)
 80 |         else:
 81 |             checkpoint = self.load_checkpoint(
 82 |                 checkpoint, map_location=map_location)
 83 | 
 84 |         self._epoch = checkpoint['meta']['epoch']
 85 |         self._iter = checkpoint['meta']['iter']
 86 |         self._inner_iter = checkpoint['meta']['iter']
 87 |         if 'optimizer' in checkpoint and resume_optimizer:
 88 |             if isinstance(self.optimizer, Optimizer):
 89 |                 self.optimizer.load_state_dict(checkpoint['optimizer'])
 90 |             elif isinstance(self.optimizer, dict):
 91 |                 for k in self.optimizer.keys():
 92 |                     self.optimizer[k].load_state_dict(
 93 |                         checkpoint['optimizer'][k])
 94 |             else:
 95 |                 raise TypeError(
 96 |                     'Optimizer should be dict or torch.optim.Optimizer '
 97 |                     f'but got {type(self.optimizer)}')
 98 | 
 99 |         if 'amp' in checkpoint:
100 |             apex.amp.load_state_dict(checkpoint['amp'])
101 |             self.logger.info('load amp state dict')
102 | 
103 |         self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}')
104 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.15.0
  2 | accelerate==0.27.2
  3 | addict==2.4.0
  4 | aliyun-python-sdk-core==2.13.36
  5 | aliyun-python-sdk-kms==2.16.2
  6 | antlr4-python3-runtime==4.9.3
  7 | array-record==0.5.0
  8 | astunparse==1.6.3
  9 | av==10.0.0
 10 | black==24.2.0
 11 | brotlipy==0.7.0
 12 | cachetools==5.3.1
 13 | certifi @ file:///croot/certifi_1690232220950/work/certifi
 14 | cffi @ file:///croot/cffi_1670423208954/work
 15 | charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
 16 | click==8.1.7
 17 | cloudpickle==3.0.0
 18 | cmake==3.28.3
 19 | colorama==0.4.6
 20 | contourpy==1.2.0
 21 | crcmod==1.7
 22 | cryptography @ file:///croot/cryptography_1694444244250/work
 23 | cycler==0.12.1
 24 | decord==0.6.0
 25 | diffusers==0.26.3
 26 | dm-tree==0.1.8
 27 | easydict==1.11
 28 | einops==0.6.1
 29 | etils==1.5.2
 30 | filelock==3.12.4
 31 | flatbuffers==1.12
 32 | fonttools==4.44.0
 33 | fsspec==2023.9.1
 34 | ftfy==6.1.1
 35 | fvcore==0.1.5.post20221221
 36 | gast==0.4.0
 37 | google-auth==2.23.0
 38 | google-auth-oauthlib==0.4.6
 39 | google-pasta==0.2.0
 40 | googleapis-common-protos==1.62.0
 41 | grpcio==1.34.1
 42 | h5py==3.1.0
 43 | huggingface-hub==0.20.3
 44 | hydra-core==1.3.2
 45 | idna @ file:///croot/idna_1666125576474/work
 46 | importlib-metadata==6.8.0
 47 | importlib-resources==6.1.1
 48 | iopath==0.1.9
 49 | Jinja2==3.1.3
 50 | jmespath==0.10.0
 51 | joblib==1.3.2
 52 | keras==2.5.0rc0
 53 | keras-nightly==2.5.0.dev2021032900
 54 | Keras-Preprocessing==1.1.2
 55 | kiwisolver==1.4.5
 56 | libclang==16.0.6
 57 | lit==17.0.6
 58 | Markdown==3.4.4
 59 | markdown-it-py==3.0.0
 60 | MarkupSafe==2.1.3
 61 | matplotlib==3.8.1
 62 | mdurl==0.1.2
 63 | mkl-fft @ file:///croot/mkl_fft_1695058164594/work
 64 | mkl-random @ file:///croot/mkl_random_1695059800811/work
 65 | mkl-service==2.4.0
 66 | ml-dtypes==0.2.0
 67 | mmcv-full==1.4.2
 68 | model-index==0.1.11
 69 | mpmath==1.3.0
 70 | mypy-extensions==1.0.0
 71 | namex==0.0.7
 72 | networkx==3.2.1
 73 | numpy==1.22.4
 74 | nvidia-cublas-cu11==11.10.3.66
 75 | nvidia-cuda-cupti-cu11==11.7.101
 76 | nvidia-cuda-nvrtc-cu11==11.7.99
 77 | nvidia-cuda-runtime-cu11==11.7.99
 78 | nvidia-cudnn-cu11==8.5.0.96
 79 | nvidia-cufft-cu11==10.9.0.58
 80 | nvidia-curand-cu11==10.2.10.91
 81 | nvidia-cusolver-cu11==11.4.0.1
 82 | nvidia-cusparse-cu11==11.7.4.91
 83 | nvidia-nccl-cu11==2.14.3
 84 | nvidia-nvtx-cu11==11.7.91
 85 | oauthlib==3.2.2
 86 | omegaconf==2.3.0
 87 | opencv-python==4.8.0.76
 88 | opendatalab==0.0.10
 89 | openmim==0.3.9
 90 | openxlab==0.0.25
 91 | opt-einsum==3.3.0
 92 | ordered-set==4.1.0
 93 | oss2==2.17.0
 94 | packaging==23.1
 95 | pandas==2.1.1
 96 | pathspec==0.12.1
 97 | Pillow @ file:///croot/pillow_1695134008276/work
 98 | platformdirs==4.2.0
 99 | portalocker==2.8.2
100 | prettytable==3.9.0
101 | promise==2.3
102 | protobuf==3.20.3
103 | psutil==5.9.6
104 | pyasn1==0.5.0
105 | pyasn1-modules==0.3.0
106 | pycocotools==2.0.7
107 | pycparser @ file:///tmp/build/80754af9/pycparser_1636541352034/work
108 | pycryptodome==3.19.0
109 | Pygments==2.16.1
110 | pyOpenSSL @ file:///croot/pyopenssl_1690223430423/work
111 | pyparsing==3.1.1
112 | PySocks @ file:///tmp/build/80754af9/pysocks_1605305812635/work
113 | python-dateutil==2.8.2
114 | pytz==2023.3.post1
115 | PyYAML @ file:///croot/pyyaml_1670514731622/work
116 | regex @ file:///tmp/abs_41f5bce5-0a2e-45aa-b231-1fd2fbd57753gfpe6sjm/croots/recipe/regex_1658257178822/work
117 | requests==2.28.2
118 | requests-oauthlib==1.3.1
119 | rich==13.4.2
120 | rsa==4.9
121 | safetensors==0.3.3
122 | scikit-learn==1.3.2
123 | scipy==1.11.3
124 | seaborn==0.13.0
125 | shapely==2.0.2
126 | simplejson==3.19.2
127 | six==1.15.0
128 | sympy==1.12
129 | tabulate==0.9.0
130 | tensorboard==2.11.2
131 | tensorboard-data-server==0.6.1
132 | tensorboard-plugin-wit==1.8.1
133 | tensorflow==2.5.0
134 | tensorflow-addons==0.23.0
135 | tensorflow-datasets==4.9.3
136 | tensorflow-estimator==2.5.0
137 | tensorflow-io-gcs-filesystem==0.34.0
138 | tensorflow-metadata==1.14.0
139 | termcolor==1.1.0
140 | terminaltables==3.1.10
141 | threadpoolctl==3.2.0
142 | timm==0.9.12
143 | toml==0.10.2
144 | tomli==2.0.1
145 | torch==1.8.0
146 | torchaudio==0.8.0
147 | torchvision==0.9.0
148 | tqdm==4.65.0
149 | triton==2.0.0
150 | typeguard==2.13.3
151 | typing_extensions==4.9.0
152 | tzdata==2023.3
153 | urllib3 @ file:///croot/urllib3_1686163155763/work
154 | wcwidth==0.2.6
155 | Werkzeug==2.3.7
156 | wrapt==1.12.1
157 | yacs==0.1.8
158 | yapf==0.33.0
159 | zipp==3.15.0
160 | 


--------------------------------------------------------------------------------
/datasets/dtd.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pathlib
  3 | from typing import Any, Callable, Optional, Tuple
  4 | 
  5 | import PIL.Image
  6 | 
  7 | from torchvision.datasets.utils  import download_and_extract_archive, verify_str_arg
  8 | from torchvision.datasets.vision import VisionDataset
  9 | 
 10 | 
 11 | class DTD(VisionDataset):
 12 |     """`Describable Textures Dataset (DTD) <https://www.robots.ox.ac.uk/~vgg/data/dtd/>`_.
 13 | 
 14 |     Args:
 15 |         root (string): Root directory of the dataset.
 16 |         split (string, optional): The dataset split, supports ``"train"`` (default), ``"val"``, or ``"test"``.
 17 |         partition (int, optional): The dataset partition. Should be ``1 <= partition <= 10``. Defaults to ``1``.
 18 | 
 19 |             .. note::
 20 | 
 21 |                 The partition only changes which split each image belongs to. Thus, regardless of the selected
 22 |                 partition, combining all splits will result in all images.
 23 | 
 24 |         transform (callable, optional): A function/transform that takes in a PIL image and returns a transformed
 25 |             version. E.g, ``transforms.RandomCrop``.
 26 |         target_transform (callable, optional): A function/transform that takes in the target and transforms it.
 27 |         download (bool, optional): If True, downloads the dataset from the internet and
 28 |             puts it in root directory. If dataset is already downloaded, it is not
 29 |             downloaded again. Default is False.
 30 |     """
 31 | 
 32 |     _URL = "https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz"
 33 |     _MD5 = "fff73e5086ae6bdbea199a49dfb8a4c1"
 34 | 
 35 |     def __init__(
 36 |         self,
 37 |         root: str,
 38 |         split: str = "train",
 39 |         partition: int = 1,
 40 |         transform: Optional[Callable] = None,
 41 |         target_transform: Optional[Callable] = None,
 42 |         download: bool = False,
 43 |     ) -> None:
 44 |         self._split = verify_str_arg(split, "split", ("train", "val", "test"))
 45 |         if not isinstance(partition, int) and not (1 <= partition <= 10):
 46 |             raise ValueError(
 47 |                 f"Parameter 'partition' should be an integer with `1 <= partition <= 10`, "
 48 |                 f"but got {partition} instead"
 49 |             )
 50 |         self._partition = partition
 51 | 
 52 |         super().__init__(root, transform=transform, target_transform=target_transform)
 53 |         self._base_folder = pathlib.Path(self.root) / type(self).__name__.lower()
 54 |         self._data_folder = self._base_folder / "dtd"
 55 |         self._meta_folder = self._data_folder / "labels"
 56 |         self._images_folder = self._data_folder / "images"
 57 | 
 58 |         if download:
 59 |             self._download()
 60 | 
 61 |         if not self._check_exists():
 62 |             raise RuntimeError("Dataset not found. You can use download=True to download it")
 63 | 
 64 |         self._image_files = []
 65 |         classes = []
 66 |         with open(self._meta_folder / f"{self._split}{self._partition}.txt") as file:
 67 |             for line in file:
 68 |                 cls, name = line.strip().split("/")
 69 |                 self._image_files.append(self._images_folder.joinpath(cls, name))
 70 |                 classes.append(cls)
 71 | 
 72 |         self.classes = sorted(set(classes))
 73 |         self.class_to_idx = dict(zip(self.classes, range(len(self.classes))))
 74 |         self._labels = [self.class_to_idx[cls] for cls in classes]
 75 | 
 76 |     def __len__(self) -> int:
 77 |         return len(self._image_files)
 78 | 
 79 |     def __getitem__(self, idx: int) -> Tuple[Any, Any]:
 80 |         image_file, label = self._image_files[idx], self._labels[idx]
 81 |         image = PIL.Image.open(image_file).convert("RGB")
 82 | 
 83 |         if self.transform:
 84 |             image = self.transform(image)
 85 | 
 86 |         if self.target_transform:
 87 |             label = self.target_transform(label)
 88 | 
 89 |         return image, label
 90 | 
 91 |     def extra_repr(self) -> str:
 92 |         return f"split={self._split}, partition={self._partition}"
 93 | 
 94 |     def _check_exists(self) -> bool:
 95 |         return os.path.exists(self._data_folder) and os.path.isdir(self._data_folder)
 96 | 
 97 |     def _download(self) -> None:
 98 |         if self._check_exists():
 99 |             return
100 |         download_and_extract_archive(self._URL, download_root=str(self._base_folder), md5=self._MD5)
101 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/necks/mla_neck.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch.nn as nn
  3 | from mmcv.cnn import ConvModule, build_norm_layer
  4 | 
  5 | from ..builder import NECKS
  6 | 
  7 | 
  8 | class MLAModule(nn.Module):
  9 | 
 10 |     def __init__(self,
 11 |                  in_channels=[1024, 1024, 1024, 1024],
 12 |                  out_channels=256,
 13 |                  norm_cfg=None,
 14 |                  act_cfg=None):
 15 |         super(MLAModule, self).__init__()
 16 |         self.channel_proj = nn.ModuleList()
 17 |         for i in range(len(in_channels)):
 18 |             self.channel_proj.append(
 19 |                 ConvModule(
 20 |                     in_channels=in_channels[i],
 21 |                     out_channels=out_channels,
 22 |                     kernel_size=1,
 23 |                     norm_cfg=norm_cfg,
 24 |                     act_cfg=act_cfg))
 25 |         self.feat_extract = nn.ModuleList()
 26 |         for i in range(len(in_channels)):
 27 |             self.feat_extract.append(
 28 |                 ConvModule(
 29 |                     in_channels=out_channels,
 30 |                     out_channels=out_channels,
 31 |                     kernel_size=3,
 32 |                     padding=1,
 33 |                     norm_cfg=norm_cfg,
 34 |                     act_cfg=act_cfg))
 35 | 
 36 |     def forward(self, inputs):
 37 | 
 38 |         # feat_list -> [p2, p3, p4, p5]
 39 |         feat_list = []
 40 |         for x, conv in zip(inputs, self.channel_proj):
 41 |             feat_list.append(conv(x))
 42 | 
 43 |         # feat_list -> [p5, p4, p3, p2]
 44 |         # mid_list -> [m5, m4, m3, m2]
 45 |         feat_list = feat_list[::-1]
 46 |         mid_list = []
 47 |         for feat in feat_list:
 48 |             if len(mid_list) == 0:
 49 |                 mid_list.append(feat)
 50 |             else:
 51 |                 mid_list.append(mid_list[-1] + feat)
 52 | 
 53 |         # mid_list -> [m5, m4, m3, m2]
 54 |         # out_list -> [o2, o3, o4, o5]
 55 |         out_list = []
 56 |         for mid, conv in zip(mid_list, self.feat_extract):
 57 |             out_list.append(conv(mid))
 58 | 
 59 |         return tuple(out_list)
 60 | 
 61 | 
 62 | @NECKS.register_module()
 63 | class MLANeck(nn.Module):
 64 |     """Multi-level Feature Aggregation.
 65 | 
 66 |     This neck is `The Multi-level Feature Aggregation construction of
 67 |     SETR <https://arxiv.org/abs/2012.15840>`_.
 68 | 
 69 | 
 70 |     Args:
 71 |         in_channels (List[int]): Number of input channels per scale.
 72 |         out_channels (int): Number of output channels (used at each scale).
 73 |         norm_layer (dict): Config dict for input normalization.
 74 |             Default: norm_layer=dict(type='LN', eps=1e-6, requires_grad=True).
 75 |         norm_cfg (dict): Config dict for normalization layer. Default: None.
 76 |         act_cfg (dict): Config dict for activation layer in ConvModule.
 77 |             Default: None.
 78 |     """
 79 | 
 80 |     def __init__(self,
 81 |                  in_channels,
 82 |                  out_channels,
 83 |                  norm_layer=dict(type='LN', eps=1e-6, requires_grad=True),
 84 |                  norm_cfg=None,
 85 |                  act_cfg=None):
 86 |         super(MLANeck, self).__init__()
 87 |         assert isinstance(in_channels, list)
 88 |         self.in_channels = in_channels
 89 |         self.out_channels = out_channels
 90 | 
 91 |         # In order to build general vision transformer backbone, we have to
 92 |         # move MLA to neck.
 93 |         self.norm = nn.ModuleList([
 94 |             build_norm_layer(norm_layer, in_channels[i])[1]
 95 |             for i in range(len(in_channels))
 96 |         ])
 97 | 
 98 |         self.mla = MLAModule(
 99 |             in_channels=in_channels,
100 |             out_channels=out_channels,
101 |             norm_cfg=norm_cfg,
102 |             act_cfg=act_cfg)
103 | 
104 |     def forward(self, inputs):
105 |         assert len(inputs) == len(self.in_channels)
106 | 
107 |         # Convert from nchw to nlc
108 |         outs = []
109 |         for i in range(len(inputs)):
110 |             x = inputs[i]
111 |             n, c, h, w = x.shape
112 |             x = x.reshape(n, c, h * w).transpose(2, 1).contiguous()
113 |             x = self.norm[i](x)
114 |             x = x.transpose(1, 2).reshape(n, c, h, w).contiguous()
115 |             outs.append(x)
116 | 
117 |         outs = self.mla(outs)
118 |         return tuple(outs)
119 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/utils/up_conv_block.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | import torch.nn as nn
  4 | from mmcv.cnn import ConvModule, build_upsample_layer
  5 | 
  6 | 
  7 | class UpConvBlock(nn.Module):
  8 |     """Upsample convolution block in decoder for UNet.
  9 | 
 10 |     This upsample convolution block consists of one upsample module
 11 |     followed by one convolution block. The upsample module expands the
 12 |     high-level low-resolution feature map and the convolution block fuses
 13 |     the upsampled high-level low-resolution feature map and the low-level
 14 |     high-resolution feature map from encoder.
 15 | 
 16 |     Args:
 17 |         conv_block (nn.Sequential): Sequential of convolutional layers.
 18 |         in_channels (int): Number of input channels of the high-level
 19 |         skip_channels (int): Number of input channels of the low-level
 20 |         high-resolution feature map from encoder.
 21 |         out_channels (int): Number of output channels.
 22 |         num_convs (int): Number of convolutional layers in the conv_block.
 23 |             Default: 2.
 24 |         stride (int): Stride of convolutional layer in conv_block. Default: 1.
 25 |         dilation (int): Dilation rate of convolutional layer in conv_block.
 26 |             Default: 1.
 27 |         with_cp (bool): Use checkpoint or not. Using checkpoint will save some
 28 |             memory while slowing down the training speed. Default: False.
 29 |         conv_cfg (dict | None): Config dict for convolution layer.
 30 |             Default: None.
 31 |         norm_cfg (dict | None): Config dict for normalization layer.
 32 |             Default: dict(type='BN').
 33 |         act_cfg (dict | None): Config dict for activation layer in ConvModule.
 34 |             Default: dict(type='ReLU').
 35 |         upsample_cfg (dict): The upsample config of the upsample module in
 36 |             decoder. Default: dict(type='InterpConv'). If the size of
 37 |             high-level feature map is the same as that of skip feature map
 38 |             (low-level feature map from encoder), it does not need upsample the
 39 |             high-level feature map and the upsample_cfg is None.
 40 |         dcn (bool): Use deformable convolution in convolutional layer or not.
 41 |             Default: None.
 42 |         plugins (dict): plugins for convolutional layers. Default: None.
 43 |     """
 44 | 
 45 |     def __init__(self,
 46 |                  conv_block,
 47 |                  in_channels,
 48 |                  skip_channels,
 49 |                  out_channels,
 50 |                  num_convs=2,
 51 |                  stride=1,
 52 |                  dilation=1,
 53 |                  with_cp=False,
 54 |                  conv_cfg=None,
 55 |                  norm_cfg=dict(type='BN'),
 56 |                  act_cfg=dict(type='ReLU'),
 57 |                  upsample_cfg=dict(type='InterpConv'),
 58 |                  dcn=None,
 59 |                  plugins=None):
 60 |         super(UpConvBlock, self).__init__()
 61 |         assert dcn is None, 'Not implemented yet.'
 62 |         assert plugins is None, 'Not implemented yet.'
 63 | 
 64 |         self.conv_block = conv_block(
 65 |             in_channels=2 * skip_channels,
 66 |             out_channels=out_channels,
 67 |             num_convs=num_convs,
 68 |             stride=stride,
 69 |             dilation=dilation,
 70 |             with_cp=with_cp,
 71 |             conv_cfg=conv_cfg,
 72 |             norm_cfg=norm_cfg,
 73 |             act_cfg=act_cfg,
 74 |             dcn=None,
 75 |             plugins=None)
 76 |         if upsample_cfg is not None:
 77 |             self.upsample = build_upsample_layer(
 78 |                 cfg=upsample_cfg,
 79 |                 in_channels=in_channels,
 80 |                 out_channels=skip_channels,
 81 |                 with_cp=with_cp,
 82 |                 norm_cfg=norm_cfg,
 83 |                 act_cfg=act_cfg)
 84 |         else:
 85 |             self.upsample = ConvModule(
 86 |                 in_channels,
 87 |                 skip_channels,
 88 |                 kernel_size=1,
 89 |                 stride=1,
 90 |                 padding=0,
 91 |                 conv_cfg=conv_cfg,
 92 |                 norm_cfg=norm_cfg,
 93 |                 act_cfg=act_cfg)
 94 | 
 95 |     def forward(self, skip, x):
 96 |         """Forward function."""
 97 | 
 98 |         x = self.upsample(x)
 99 |         out = torch.cat([skip, x], dim=1)
100 |         out = self.conv_block(out)
101 | 
102 |         return out
103 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center"> <p>Dynamic-Tuning</p></h1>
 2 | 
 3 | 
 4 | 
 5 | <p align="center">
 6 |   <picture>
 7 |     <img width="20%" alt="Dynamic-Tuning" src="./logo.png">
 8 |   </picture>
 9 | </p>
10 | 
11 | 
12 | The official implementation of "2024NeurIPS Dynamic Tuning Towards Parameter and Inference Efficiency for ViT Adaptation".
13 | 
14 | > Wangbo Zhao<sup>1</sup>, Jiasheng Tang<sup>2,3</sup>,  Yizeng Han<sup>4</sup>, Yibing Song<sup>2,3</sup>, Kai Wang<sup>1</sup>, Gao Huang<sup>4</sup>, Fan Wang<sup>2</sup>, Yang You<sup>1</sup>
15 | >
16 | > <sup>1</sup>[National University of Singapore](https://www.nus.edu.sg/), <sup>2</sup>[DAMO Academy, Alibaba Group](https://damo.alibaba.com/?language=zh), <sup>3</sup>Hupan Lab, <sup>4</sup>[Tsinghua University](https://www.tsinghua.edu.cn/)
17 | >
18 | >  [Paper](https://arxiv.org/abs/2403.11808)
19 | 
20 | 
21 | ## News 🚀🚀🚀
22 | - `2024.10.16`: We update the code: add a distillation technique (our paper in NeurIPS 2024 verision), support actually efficient inference, support semantic segmentation. Our paper in NeurIPS 2024 verision will be released soon.
23 | - `2024.09.26`: DyT is accepted by NeurIPS 2024. We will update the code and paper soon.
24 | - `2024.03.23`: The code is released.
25 | 
26 | ## Abstract
27 | Existing parameter-efficient fine-tuning (PEFT) methods have achieved significant success on vision transformers (ViTs) adaptation by improving parameter efficiency. However, the exploration of enhancing inference efficiency during adaptation remains underexplored. This limits the broader application of pre-trained ViT models, especially when the model is computationally extensive. In this paper, we propose Dynamic Tuning (DyT), a novel approach to improve both parameter and inference efficiency for ViT adaptation. Specifically, besides using the lightweight adapter modules, we propose a token dispatcher to distinguish informative tokens from less important ones, allowing the latter to dynamically skip the original block, thereby reducing the redundant computation during inference. Additionally, we explore multiple design variants to find the best practice of DyT. Finally, inspired by the mixture-of-experts (MoE) mechanism, we introduce an enhanced adapter to further boost the adaptation performance. We validate DyT across various tasks, including image/video recognition and semantic segmentation. For instance, DyT achieves comparable or even superior performance compared to existing PEFT methods while evoking only 71%-85% of their FLOPs on the VTAB-1K benchmark.
28 | <p align="center">
29 | <img src="https://github.com/NUS-HPC-AI-Lab/Dynamic-Tuning/assets/56866854/b957598b-1e22-438d-9fe0-4b1317501c61" width=80% height=45%
30 | class="center">
31 | 
32 | ## 🛠 Dataset Prepare
33 | - For VTAB-1K, we recommend to adopt the split provided by [SSF](https://github.com/dongzelian/SSF). You can directly download the VTAB-1K from their repo.
34 | - For other image datasets, they will be automatically downloaded when you first run our code.
35 | - For video datasets (K400 and SSv2), you can download them from [OpenDataLab](https://opendatalab.org.cn/OpenMMLab/Kinetics-400) or their offical websites.
36 | 
37 | ## 🛠 Installation
38 | ```
39 | pip install -r requirements.txt # install torch, timm, torchvision, etc.
40 | wget https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_patch16_224_in21k-e5005f0a.pth # download the ckpt from timm
41 | ```
42 | 
43 | ## ⚙️ Fine-tuning
44 | ```
45 | bash ./train_IN21K.sh  # training on complete datasets
46 | bash ./train_vtab.sh # training on vtab benchmark
47 | bash ./train_video.sh # training on video datasets
48 | ```
49 | 
50 | ## ⚙️ Measure Inference Speed
51 | ```
52 | bash ./measure_speed.sh
53 | ```
54 | 
55 | ## Citation
56 | If you found our work useful, please consider citing us.
57 | ```
58 | @article{zhao2024dynamic,
59 |   title={Dynamic tuning towards parameter and inference efficiency for vit adaptation},
60 |   author={Zhao, Wangbo and Tang, Jiasheng and Han, Yizeng and Song, Yibing and Wang, Kai and Huang, Gao and Wang, Fan and You, Yang},
61 |   journal={arXiv preprint arXiv:2403.11808},
62 |   year={2024}
63 | }
64 | ```
65 | 
66 | 
67 | ## Acknowledge
68 | The repo is partly built based on [AdaptFormer](https://github.com/ShoufaChen/AdaptFormer), [AdViT](https://github.com/MengLcool/AdaViT), and [PETL-ViT](https://github.com/JieShibo/PETL-ViT). We are grateful for their generous contribution to open source.
69 | 
70 | 
71 | ## Contact
72 | 🔥🔥🔥 If you are interested in this work and hope to cooperate with us, please drop an email to wangbo.zhao96@gmail.com 🔥🔥🔥
73 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/uper_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | import torch.nn as nn
  4 | from mmcv.cnn import ConvModule
  5 | 
  6 | from mmseg.ops import resize
  7 | from ..builder import HEADS
  8 | from .decode_head import BaseDecodeHead
  9 | from .psp_head import PPM
 10 | 
 11 | 
 12 | @HEADS.register_module()
 13 | class UPerHead(BaseDecodeHead):
 14 |     """Unified Perceptual Parsing for Scene Understanding.
 15 | 
 16 |     This head is the implementation of `UPerNet
 17 |     <https://arxiv.org/abs/1807.10221>`_.
 18 | 
 19 |     Args:
 20 |         pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid
 21 |             Module applied on the last feature. Default: (1, 2, 3, 6).
 22 |     """
 23 | 
 24 |     def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
 25 |         super(UPerHead, self).__init__(
 26 |             input_transform='multiple_select', **kwargs)
 27 |         # PSP Module
 28 |         self.psp_modules = PPM(
 29 |             pool_scales,
 30 |             self.in_channels[-1],
 31 |             self.channels,
 32 |             conv_cfg=self.conv_cfg,
 33 |             norm_cfg=self.norm_cfg,
 34 |             act_cfg=self.act_cfg,
 35 |             align_corners=self.align_corners)
 36 |         self.bottleneck = ConvModule(
 37 |             self.in_channels[-1] + len(pool_scales) * self.channels,
 38 |             self.channels,
 39 |             3,
 40 |             padding=1,
 41 |             conv_cfg=self.conv_cfg,
 42 |             norm_cfg=self.norm_cfg,
 43 |             act_cfg=self.act_cfg)
 44 |         # FPN Module
 45 |         self.lateral_convs = nn.ModuleList()
 46 |         self.fpn_convs = nn.ModuleList()
 47 |         for in_channels in self.in_channels[:-1]:  # skip the top layer
 48 |             l_conv = ConvModule(
 49 |                 in_channels,
 50 |                 self.channels,
 51 |                 1,
 52 |                 conv_cfg=self.conv_cfg,
 53 |                 norm_cfg=self.norm_cfg,
 54 |                 act_cfg=self.act_cfg,
 55 |                 inplace=False)
 56 |             fpn_conv = ConvModule(
 57 |                 self.channels,
 58 |                 self.channels,
 59 |                 3,
 60 |                 padding=1,
 61 |                 conv_cfg=self.conv_cfg,
 62 |                 norm_cfg=self.norm_cfg,
 63 |                 act_cfg=self.act_cfg,
 64 |                 inplace=False)
 65 |             self.lateral_convs.append(l_conv)
 66 |             self.fpn_convs.append(fpn_conv)
 67 | 
 68 |         self.fpn_bottleneck = ConvModule(
 69 |             len(self.in_channels) * self.channels,
 70 |             self.channels,
 71 |             3,
 72 |             padding=1,
 73 |             conv_cfg=self.conv_cfg,
 74 |             norm_cfg=self.norm_cfg,
 75 |             act_cfg=self.act_cfg)
 76 | 
 77 |     def psp_forward(self, inputs):
 78 |         """Forward function of PSP module."""
 79 |         x = inputs[-1]
 80 |         psp_outs = [x]
 81 |         psp_outs.extend(self.psp_modules(x))
 82 |         psp_outs = torch.cat(psp_outs, dim=1)
 83 |         output = self.bottleneck(psp_outs)
 84 | 
 85 |         return output
 86 | 
 87 |     def forward(self, inputs):
 88 |         """Forward function."""
 89 | 
 90 |         inputs = self._transform_inputs(inputs)
 91 | 
 92 |         # build laterals
 93 |         laterals = [
 94 |             lateral_conv(inputs[i])
 95 |             for i, lateral_conv in enumerate(self.lateral_convs)
 96 |         ]
 97 | 
 98 |         laterals.append(self.psp_forward(inputs))
 99 | 
100 |         # build top-down path
101 |         used_backbone_levels = len(laterals)
102 |         for i in range(used_backbone_levels - 1, 0, -1):
103 |             prev_shape = laterals[i - 1].shape[2:]
104 |             laterals[i - 1] = laterals[i - 1] + resize(
105 |                 laterals[i],
106 |                 size=prev_shape,
107 |                 mode='bilinear',
108 |                 align_corners=self.align_corners)
109 | 
110 |         # build outputs
111 |         fpn_outs = [
112 |             self.fpn_convs[i](laterals[i])
113 |             for i in range(used_backbone_levels - 1)
114 |         ]
115 |         # append psp feature
116 |         fpn_outs.append(laterals[-1])
117 | 
118 |         for i in range(used_backbone_levels - 1, 0, -1):
119 |             fpn_outs[i] = resize(
120 |                 fpn_outs[i],
121 |                 size=fpn_outs[0].shape[2:],
122 |                 mode='bilinear',
123 |                 align_corners=self.align_corners)
124 |         fpn_outs = torch.cat(fpn_outs, dim=1)
125 |         output = self.fpn_bottleneck(fpn_outs)
126 |         output = self.cls_seg(output)
127 |         return output
128 | 


--------------------------------------------------------------------------------
/models/losses.py:
--------------------------------------------------------------------------------
  1 | from numpy.lib.arraysetops import isin
  2 | from timm import loss
  3 | from timm.data.transforms_factory import transforms_imagenet_train
  4 | import torch
  5 | from torch.functional import Tensor
  6 | import torch.nn as nn
  7 | 
  8 | def binaray_entropy(prob, eps=1e-7):
  9 |     neg_entro = prob * prob.clamp(min=eps).log() + (1-prob) * (1-prob).clamp(min=eps).log()
 10 |     return - neg_entro
 11 | 
 12 | 
 13 | 
 14 | 
 15 | class AdaLoss(nn.Module):
 16 |     def __init__(self, base_criterion, 
 17 |                  
 18 |                  layer_target_ratio=0.5, 
 19 |                  layer_loss_ratio=2., 
 20 |                  layer_diverse_ratio=0.1, 
 21 |                  layer_entropy_weight=0.1, 
 22 |                  layer_minimal_weight=0., 
 23 |                  layer_minimal=0.,
 24 |                  
 25 |                 token_target_ratio=0.5, 
 26 |                 token_loss_ratio=2., 
 27 |                 token_minimal=0.1, 
 28 |                 token_minimal_weight=1.
 29 |                  ):
 30 |         super().__init__()
 31 |         self.base_criterion = base_criterion
 32 |         
 33 |         # self.layer_target_ratio = layer_target_ratio
 34 |         # self.layer_loss_ratio = layer_loss_ratio
 35 |         # self.layer_diverse_ratio = layer_diverse_ratio
 36 |         # self.layer_entropy_weight = layer_entropy_weight
 37 |         # self.layer_minimal_weight = layer_minimal_weight
 38 |         # self.layer_minimal = layer_minimal
 39 |         
 40 |         self.token_target_ratio = token_target_ratio
 41 |         self.token_loss_ratio = token_loss_ratio
 42 |         self.token_minimal = token_minimal
 43 |         self.token_minimal_weight = token_minimal_weight
 44 | 
 45 | 
 46 | 
 47 | 
 48 |     def forward(self, outputs, y):
 49 |         '''
 50 |         head_select: (b, num_layers, num_head)
 51 |         '''
 52 | 
 53 |         x, token_select, _ = outputs["prediction"], outputs["token_select"], outputs["token_logits"]
 54 | 
 55 |         base_loss = self.base_criterion(x, y)
 56 |         # layer_loss = self._get_layer_loss(x, layer_select, layer_logits)
 57 |         token_loss = self._get_token_loss(x, token_select)
 58 |         
 59 |         loss = base_loss +  self.token_loss_ratio * token_loss
 60 | 
 61 |         return loss, dict(base_loss=base_loss, token_loss=self.token_loss_ratio * token_loss)
 62 |     
 63 |     def _get_token_loss(self, x, token_select):
 64 |         """
 65 |         token_select : tensor (b, num_layer, l)
 66 | 
 67 |         """
 68 |         if token_select is not None :
 69 |             token_mean = token_select.mean()
 70 |             # token_flops_loss = (token_mean - self.token_target_ratio).abs().mean()
 71 |             # token_flops_loss = (token_mean - self.token_target_ratio).clamp(min=0.).mean()
 72 |             token_flops_loss = ((token_mean - self.token_target_ratio)**2).mean()
 73 | 
 74 |             if self.token_minimal_weight > 0 :
 75 |                 token_mean = token_select.mean(-1)
 76 |                 token_minimal_loss = (self.token_minimal - token_mean).clamp(min=0.).sum()
 77 |             else :
 78 |                 token_minimal_loss = 0
 79 | 
 80 |             token_loss = token_flops_loss + self.token_minimal_weight * token_minimal_loss
 81 |         else :
 82 |             token_loss = x.new_zeros(1).mean()
 83 | 
 84 |         return token_loss
 85 | 
 86 |     
 87 |     def _get_layer_loss(self, x, layer_select, logits_set):
 88 |         if layer_select is not None :
 89 |             layer_mean = layer_select.mean()
 90 |             layer_flops_loss = (layer_mean - self.layer_target_ratio).abs().mean()
 91 | 
 92 |             if self.layer_diverse_ratio > 0 :
 93 |                 layer_mean = layer_select.mean((0,-1))
 94 |                 layer_diverse_loss = (layer_mean - self.layer_target_ratio).abs().mean()
 95 |             else :
 96 |                 layer_diverse_loss = 0
 97 | 
 98 |             if self.layer_entropy_weight > 0 :
 99 |                 layer_select_logits = logits_set['layer_select_logits']
100 |                 layer_entropy = binaray_entropy(layer_select_logits.sigmoid()).mean()
101 |             else :
102 |                 layer_entropy = 0
103 | 
104 |             if self.layer_minimal_weight > 0 :
105 |                 layer_mean = layer_select.mean(0) #(num_layers, 2)
106 |                 layer_minimal_loss = (self.layer_minimal - layer_mean).clamp(min=0.).sum()
107 |             else :
108 |                 layer_minimal_loss = 0
109 | 
110 |             layer_loss = layer_flops_loss + self.layer_diverse_ratio * layer_diverse_loss - self.layer_entropy_weight * layer_entropy \
111 |                             + self.layer_minimal_weight * layer_minimal_loss
112 |         else :
113 |             layer_loss = x.new_zeros(1).mean()
114 | 
115 |         return layer_loss
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/configs/beit/upernet/our_vit_coco-stuff164k.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254)
  3 | # Github source: https://github.com/microsoft/unilm/tree/master/beit
  4 | # Copyright (c) 2021 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # By Hangbo Bao
  7 | # Based on timm, mmseg, setr, xcit and swin code bases
  8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm
  9 | # https://github.com/fudan-zvg/SETR
 10 | # https://github.com/facebookresearch/xcit/
 11 | # https://github.com/microsoft/Swin-Transformer
 12 | # --------------------------------------------------------'
 13 | # recommand use this config for BEiT models which are self-supervised pretrained on imagenet
 14 | _base_ = [
 15 |     '../../_base_/models/upernet_beit.py',
 16 |     '../../_base_/default_runtime.py', '../../_base_/schedules/schedule_80k.py'
 17 | ]
 18 | 
 19 | crop_size = (512, 512)
 20 | 
 21 | model = dict(
 22 |     backbone=dict(
 23 |         _delete_=True,
 24 |         type='VisionTransformer21K',
 25 |         img_size=512,
 26 |         patch_size=16, 
 27 |         embed_dim=768,
 28 |         depth=12, 
 29 |         num_heads=12, 
 30 |         mlp_ratio=4.0, 
 31 |         qkv_bias=True,
 32 |         drop_path_rate=0.1,
 33 |         out_indices=[3, 5, 7, 11],
 34 |         use_rel_pos_bias=True
 35 |     ),
 36 |     decode_head=dict(
 37 |         in_channels=[768, 768, 768, 768],
 38 |         num_classes=171,
 39 |         channels=768,
 40 |     ),
 41 |     auxiliary_head=dict(
 42 |         in_channels=768,
 43 |         num_classes=171
 44 |     ), 
 45 |     test_cfg = dict(mode='slide', crop_size=crop_size, stride=(341, 341))
 46 | )
 47 | 
 48 | 
 49 | 
 50 | optimizer = dict(_delete_=True, type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.05,
 51 |                 #  constructor='LayerDecayOptimizerConstructor', 
 52 |                 # paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65)
 53 |                 )
 54 | 
 55 | 
 56 | lr_config = dict(_delete_=True, policy='poly',
 57 |                  warmup='linear',
 58 |                  warmup_iters=1500,
 59 |                  warmup_ratio=1e-6,
 60 |                  power=1.0, min_lr=0.0, by_epoch=False)
 61 | 
 62 | # By default, models are trained on 8 GPUs with 2 images per GPU
 63 | # data=dict(samples_per_gpu=2)
 64 | 
 65 | runner = dict(type='IterBasedRunnerAmp')
 66 | 
 67 | # do not use mmdet version fp16
 68 | fp16 = None
 69 | optimizer_config = dict(
 70 |     type="DistOptimizerHook",
 71 |     update_interval=1,
 72 |     grad_clip=None,
 73 |     coalesce=True,
 74 |     bucket_size_mb=-1,
 75 |     use_fp16=False,
 76 | )
 77 | 
 78 | 
 79 | 
 80 | # dataset settings
 81 | dataset_type = 'COCOStuffDataset'
 82 | data_root = '/home/zhaowangbo.zwb/dataset/coco_stuff164k/'
 83 | img_norm_cfg = dict(
 84 |     mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True)
 85 | crop_size = (512, 512)
 86 | train_pipeline = [
 87 |     dict(type='LoadImageFromFile'),
 88 |     dict(type='LoadAnnotations'),
 89 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
 90 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
 91 |     dict(type='RandomFlip', prob=0.5),
 92 |     dict(type='PhotoMetricDistortion'),
 93 |     dict(type='Normalize', **img_norm_cfg),
 94 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
 95 |     dict(type='DefaultFormatBundle'),
 96 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
 97 | ]
 98 | test_pipeline = [
 99 |     dict(type='LoadImageFromFile'),
100 |     dict(
101 |         type='MultiScaleFlipAug',
102 |         img_scale=(2048, 512),
103 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
104 |         flip=False,
105 |         transforms=[
106 |             dict(type='Resize', keep_ratio=True),
107 |             dict(type='RandomFlip'),
108 |             dict(type='Normalize', **img_norm_cfg),
109 |             dict(type='ImageToTensor', keys=['img']),
110 |             dict(type='Collect', keys=['img']),
111 |         ])
112 | ]
113 | data = dict(
114 |     samples_per_gpu=2,
115 |     workers_per_gpu=8,
116 |     train=dict(
117 |         type=dataset_type,
118 |         data_root=data_root,
119 |         img_dir='images/train2017',
120 |         ann_dir='annotations/train2017',
121 |         pipeline=train_pipeline),
122 |     val=dict(
123 |         type=dataset_type,
124 |         data_root=data_root,
125 |         img_dir='images/val2017',
126 |         ann_dir='annotations/val2017',
127 |         pipeline=test_pipeline),
128 |     test=dict(
129 |         type=dataset_type,
130 |         data_root=data_root,
131 |         img_dir='images/val2017',
132 |         ann_dir='annotations/val2017',
133 |         pipeline=test_pipeline))
134 | 
135 | evaluation = dict(interval=10000, metric='mIoU')


--------------------------------------------------------------------------------
/datasets/volume_transforms.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PIL import Image
  3 | import torch
  4 | 
  5 | 
  6 | def convert_img(img):
  7 |     """Converts (H, W, C) numpy.ndarray to (C, W, H) format
  8 |     """
  9 |     if len(img.shape) == 3:
 10 |         img = img.transpose(2, 0, 1)
 11 |     if len(img.shape) == 2:
 12 |         img = np.expand_dims(img, 0)
 13 |     return img
 14 | 
 15 | 
 16 | class ClipToTensor(object):
 17 |     """Convert a list of m (H x W x C) numpy.ndarrays in the range [0, 255]
 18 |     to a torch.FloatTensor of shape (C x m x H x W) in the range [0, 1.0]
 19 |     """
 20 | 
 21 |     def __init__(self, channel_nb=3, div_255=True, numpy=False):
 22 |         self.channel_nb = channel_nb
 23 |         self.div_255 = div_255
 24 |         self.numpy = numpy
 25 | 
 26 |     def __call__(self, clip):
 27 |         """
 28 |         Args: clip (list of numpy.ndarray): clip (list of images)
 29 |         to be converted to tensor.
 30 |         """
 31 |         # Retrieve shape
 32 |         if isinstance(clip[0], np.ndarray):
 33 |             h, w, ch = clip[0].shape
 34 |             assert ch == self.channel_nb, 'Got {0} instead of 3 channels'.format(
 35 |                 ch)
 36 |         elif isinstance(clip[0], Image.Image):
 37 |             w, h = clip[0].size
 38 |         else:
 39 |             raise TypeError('Expected numpy.ndarray or PIL.Image\
 40 |             but got list of {0}'.format(type(clip[0])))
 41 | 
 42 |         np_clip = np.zeros([self.channel_nb, len(clip), int(h), int(w)])
 43 | 
 44 |         # Convert
 45 |         for img_idx, img in enumerate(clip):
 46 |             if isinstance(img, np.ndarray):
 47 |                 pass
 48 |             elif isinstance(img, Image.Image):
 49 |                 img = np.array(img, copy=False)
 50 |             else:
 51 |                 raise TypeError('Expected numpy.ndarray or PIL.Image\
 52 |                 but got list of {0}'.format(type(clip[0])))
 53 |             img = convert_img(img)
 54 |             np_clip[:, img_idx, :, :] = img
 55 |         if self.numpy:
 56 |             if self.div_255:
 57 |                 np_clip = np_clip / 255.0
 58 |             return np_clip
 59 | 
 60 |         else:
 61 |             tensor_clip = torch.from_numpy(np_clip)
 62 | 
 63 |             if not isinstance(tensor_clip, torch.FloatTensor):
 64 |                 tensor_clip = tensor_clip.float()
 65 |             if self.div_255:
 66 |                 tensor_clip = torch.div(tensor_clip, 255)
 67 |             return tensor_clip
 68 | 
 69 | 
 70 | # Note this norms data to -1/1
 71 | class ClipToTensor_K(object):
 72 |     """Convert a list of m (H x W x C) numpy.ndarrays in the range [0, 255]
 73 |     to a torch.FloatTensor of shape (C x m x H x W) in the range [0, 1.0]
 74 |     """
 75 | 
 76 |     def __init__(self, channel_nb=3, div_255=True, numpy=False):
 77 |         self.channel_nb = channel_nb
 78 |         self.div_255 = div_255
 79 |         self.numpy = numpy
 80 | 
 81 |     def __call__(self, clip):
 82 |         """
 83 |         Args: clip (list of numpy.ndarray): clip (list of images)
 84 |         to be converted to tensor.
 85 |         """
 86 |         # Retrieve shape
 87 |         if isinstance(clip[0], np.ndarray):
 88 |             h, w, ch = clip[0].shape
 89 |             assert ch == self.channel_nb, 'Got {0} instead of 3 channels'.format(
 90 |                 ch)
 91 |         elif isinstance(clip[0], Image.Image):
 92 |             w, h = clip[0].size
 93 |         else:
 94 |             raise TypeError('Expected numpy.ndarray or PIL.Image\
 95 |             but got list of {0}'.format(type(clip[0])))
 96 | 
 97 |         np_clip = np.zeros([self.channel_nb, len(clip), int(h), int(w)])
 98 | 
 99 |         # Convert
100 |         for img_idx, img in enumerate(clip):
101 |             if isinstance(img, np.ndarray):
102 |                 pass
103 |             elif isinstance(img, Image.Image):
104 |                 img = np.array(img, copy=False)
105 |             else:
106 |                 raise TypeError('Expected numpy.ndarray or PIL.Image\
107 |                 but got list of {0}'.format(type(clip[0])))
108 |             img = convert_img(img)
109 |             np_clip[:, img_idx, :, :] = img
110 |         if self.numpy:
111 |             if self.div_255:
112 |                 np_clip = (np_clip - 127.5) / 127.5
113 |             return np_clip
114 | 
115 |         else:
116 |             tensor_clip = torch.from_numpy(np_clip)
117 | 
118 |             if not isinstance(tensor_clip, torch.FloatTensor):
119 |                 tensor_clip = tensor_clip.float()
120 |             if self.div_255:
121 |                 tensor_clip = torch.div(torch.sub(tensor_clip, 127.5), 127.5)
122 |             return tensor_clip
123 | 
124 | 
125 | class ToTensor(object):
126 |     """Converts numpy array to tensor
127 |     """
128 | 
129 |     def __call__(self, array):
130 |         tensor = torch.from_numpy(array)
131 |         return tensor
132 | 


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/configs/beit/upernet/our_vit.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254)
  3 | # Github source: https://github.com/microsoft/unilm/tree/master/beit
  4 | # Copyright (c) 2021 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # By Hangbo Bao
  7 | # Based on timm, mmseg, setr, xcit and swin code bases
  8 | # https://github.com/rwightman/pytorch-image-models/tree/master/timm
  9 | # https://github.com/fudan-zvg/SETR
 10 | # https://github.com/facebookresearch/xcit/
 11 | # https://github.com/microsoft/Swin-Transformer
 12 | # --------------------------------------------------------'
 13 | # recommand use this config for BEiT models which are self-supervised pretrained on imagenet
 14 | _base_ = [
 15 |     '../../_base_/models/upernet_beit.py',
 16 |     '../../_base_/default_runtime.py', '../../_base_/schedules/schedule_160k.py'
 17 | ]
 18 | 
 19 | crop_size = (512, 512)
 20 | 
 21 | model = dict(
 22 |     backbone=dict(
 23 |         _delete_=True,
 24 |         type='VisionTransformer21K',
 25 |         img_size=512,
 26 |         patch_size=16, 
 27 |         embed_dim=768,
 28 |         depth=12, 
 29 |         num_heads=12, 
 30 |         mlp_ratio=4.0, 
 31 |         qkv_bias=True,
 32 |         drop_path_rate=0.1,
 33 |         out_indices=[3, 5, 7, 11],
 34 |         use_rel_pos_bias=True
 35 |     ),
 36 |     decode_head=dict(
 37 |         in_channels=[768, 768, 768, 768],
 38 |         num_classes=150,
 39 |         channels=768,
 40 |     ),
 41 |     auxiliary_head=dict(
 42 |         in_channels=768,
 43 |         num_classes=150
 44 |     ), 
 45 |     test_cfg = dict(mode='slide', crop_size=crop_size, stride=(341, 341))
 46 | )
 47 | 
 48 | 
 49 | 
 50 | optimizer = dict(_delete_=True, type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.05,
 51 |                 #  constructor='LayerDecayOptimizerConstructor', 
 52 |                 # paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65)
 53 |                 )
 54 | 
 55 | 
 56 | lr_config = dict(_delete_=True, policy='poly',
 57 |                  warmup='linear',
 58 |                  warmup_iters=1500,
 59 |                  warmup_ratio=1e-6,
 60 |                  power=1.0, min_lr=0.0, by_epoch=False)
 61 | 
 62 | # By default, models are trained on 8 GPUs with 2 images per GPU
 63 | # data=dict(samples_per_gpu=2)
 64 | 
 65 | runner = dict(type='IterBasedRunnerAmp')
 66 | 
 67 | # do not use mmdet version fp16
 68 | fp16 = None
 69 | optimizer_config = dict(
 70 |     type="DistOptimizerHook",
 71 |     update_interval=1,
 72 |     grad_clip=None,
 73 |     coalesce=True,
 74 |     bucket_size_mb=-1,
 75 |     use_fp16=False,
 76 | )
 77 | 
 78 | 
 79 | 
 80 | # dataset settings
 81 | dataset_type = 'ADE20KDataset'
 82 | data_root = '/home/zhaowangbo.zwb/dataset/ADEChallengeData2016/'
 83 | img_norm_cfg = dict(
 84 |     mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True)
 85 | crop_size = (512, 512)
 86 | train_pipeline = [
 87 |     dict(type='LoadImageFromFile'),
 88 |     dict(type='LoadAnnotations', reduce_zero_label=True),
 89 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
 90 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
 91 |     dict(type='RandomFlip', prob=0.5),
 92 |     dict(type='PhotoMetricDistortion'),
 93 |     dict(type='Normalize', **img_norm_cfg),
 94 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
 95 |     dict(type='DefaultFormatBundle'),
 96 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
 97 | ]
 98 | test_pipeline = [
 99 |     dict(type='LoadImageFromFile'),
100 |     dict(
101 |         type='MultiScaleFlipAug',
102 |         img_scale=(2048, 512),
103 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
104 |         flip=False,
105 |         transforms=[
106 |             dict(type='Resize', keep_ratio=True),
107 |             dict(type='RandomFlip'),
108 |             dict(type='Normalize', **img_norm_cfg),
109 |             dict(type='ImageToTensor', keys=['img']),
110 |             dict(type='Collect', keys=['img']),
111 |         ])
112 | ]
113 | data = dict(
114 |     samples_per_gpu=2,
115 |     workers_per_gpu=4,
116 |     train=dict(
117 |         type=dataset_type,
118 |         data_root=data_root,
119 |         img_dir='images/training',
120 |         ann_dir='annotations/training',
121 |         pipeline=train_pipeline),
122 |     val=dict(
123 |         type=dataset_type,
124 |         data_root=data_root,
125 |         img_dir='images/validation',
126 |         ann_dir='annotations/validation',
127 |         pipeline=test_pipeline),
128 |     test=dict(
129 |         type=dataset_type,
130 |         data_root=data_root,
131 |         img_dir='images/validation',
132 |         ann_dir='annotations/validation',
133 |         pipeline=test_pipeline))
134 | 
135 | evaluation = dict(interval=16000, metric='mIoU')


--------------------------------------------------------------------------------
/dense_tasks/Segmentation/mmseg/models/decode_heads/ocr_head.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from mmcv.cnn import ConvModule
  6 | 
  7 | from mmseg.ops import resize
  8 | from ..builder import HEADS
  9 | from ..utils import SelfAttentionBlock as _SelfAttentionBlock
 10 | from .cascade_decode_head import BaseCascadeDecodeHead
 11 | 
 12 | 
 13 | class SpatialGatherModule(nn.Module):
 14 |     """Aggregate the context features according to the initial predicted
 15 |     probability distribution.
 16 | 
 17 |     Employ the soft-weighted method to aggregate the context.
 18 |     """
 19 | 
 20 |     def __init__(self, scale):
 21 |         super(SpatialGatherModule, self).__init__()
 22 |         self.scale = scale
 23 | 
 24 |     def forward(self, feats, probs):
 25 |         """Forward function."""
 26 |         batch_size, num_classes, height, width = probs.size()
 27 |         channels = feats.size(1)
 28 |         probs = probs.view(batch_size, num_classes, -1)
 29 |         feats = feats.view(batch_size, channels, -1)
 30 |         # [batch_size, height*width, num_classes]
 31 |         feats = feats.permute(0, 2, 1)
 32 |         # [batch_size, channels, height*width]
 33 |         probs = F.softmax(self.scale * probs, dim=2)
 34 |         # [batch_size, channels, num_classes]
 35 |         ocr_context = torch.matmul(probs, feats)
 36 |         ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3)
 37 |         return ocr_context
 38 | 
 39 | 
 40 | class ObjectAttentionBlock(_SelfAttentionBlock):
 41 |     """Make a OCR used SelfAttentionBlock."""
 42 | 
 43 |     def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg,
 44 |                  act_cfg):
 45 |         if scale > 1:
 46 |             query_downsample = nn.MaxPool2d(kernel_size=scale)
 47 |         else:
 48 |             query_downsample = None
 49 |         super(ObjectAttentionBlock, self).__init__(
 50 |             key_in_channels=in_channels,
 51 |             query_in_channels=in_channels,
 52 |             channels=channels,
 53 |             out_channels=in_channels,
 54 |             share_key_query=False,
 55 |             query_downsample=query_downsample,
 56 |             key_downsample=None,
 57 |             key_query_num_convs=2,
 58 |             key_query_norm=True,
 59 |             value_out_num_convs=1,
 60 |             value_out_norm=True,
 61 |             matmul_norm=True,
 62 |             with_out=True,
 63 |             conv_cfg=conv_cfg,
 64 |             norm_cfg=norm_cfg,
 65 |             act_cfg=act_cfg)
 66 |         self.bottleneck = ConvModule(
 67 |             in_channels * 2,
 68 |             in_channels,
 69 |             1,
 70 |             conv_cfg=self.conv_cfg,
 71 |             norm_cfg=self.norm_cfg,
 72 |             act_cfg=self.act_cfg)
 73 | 
 74 |     def forward(self, query_feats, key_feats):
 75 |         """Forward function."""
 76 |         context = super(ObjectAttentionBlock,
 77 |                         self).forward(query_feats, key_feats)
 78 |         output = self.bottleneck(torch.cat([context, query_feats], dim=1))
 79 |         if self.query_downsample is not None:
 80 |             output = resize(query_feats)
 81 | 
 82 |         return output
 83 | 
 84 | 
 85 | @HEADS.register_module()
 86 | class OCRHead(BaseCascadeDecodeHead):
 87 |     """Object-Contextual Representations for Semantic Segmentation.
 88 | 
 89 |     This head is the implementation of `OCRNet
 90 |     <https://arxiv.org/abs/1909.11065>`_.
 91 | 
 92 |     Args:
 93 |         ocr_channels (int): The intermediate channels of OCR block.
 94 |         scale (int): The scale of probability map in SpatialGatherModule in
 95 |             Default: 1.
 96 |     """
 97 | 
 98 |     def __init__(self, ocr_channels, scale=1, **kwargs):
 99 |         super(OCRHead, self).__init__(**kwargs)
100 |         self.ocr_channels = ocr_channels
101 |         self.scale = scale
102 |         self.object_context_block = ObjectAttentionBlock(
103 |             self.channels,
104 |             self.ocr_channels,
105 |             self.scale,
106 |             conv_cfg=self.conv_cfg,
107 |             norm_cfg=self.norm_cfg,
108 |             act_cfg=self.act_cfg)
109 |         self.spatial_gather_module = SpatialGatherModule(self.scale)
110 | 
111 |         self.bottleneck = ConvModule(
112 |             self.in_channels,
113 |             self.channels,
114 |             3,
115 |             padding=1,
116 |             conv_cfg=self.conv_cfg,
117 |             norm_cfg=self.norm_cfg,
118 |             act_cfg=self.act_cfg)
119 | 
120 |     def forward(self, inputs, prev_output):
121 |         """Forward function."""
122 |         x = self._transform_inputs(inputs)
123 |         feats = self.bottleneck(x)
124 |         context = self.spatial_gather_module(feats, prev_output)
125 |         object_context = self.object_context_block(feats, context)
126 |         output = self.cls_seg(object_context)
127 | 
128 |         return output
129 | 


--------------------------------------------------------------------------------