├── .gitignore ├── LICENSE.txt ├── README.md ├── docs ├── config.md └── dataset_prepare.md ├── project ├── _base_ │ ├── datasets │ │ ├── ade20k.py │ │ ├── ade20k_640x640.py │ │ ├── cityscapes_1024x1024.py │ │ ├── cityscapes_512x1024.py │ │ ├── cityscapes_769x769.py │ │ ├── isaid_869x869.py │ │ └── loveda.py │ ├── default_runtime.py │ └── schedules │ │ ├── schedule_160k.py │ │ ├── schedule_40k.py │ │ └── schedule_80k.py ├── ann │ ├── ann_r50-d8_512x512_ade20k_80k.py │ └── readme.md ├── beit │ ├── readme.md │ └── upernet_beit-base_8x2_640x640_160k_ade20k.py ├── ccnet │ ├── ccnet_r50-d8_512x512_ade20k_80k.py │ └── readme.md ├── clip_rc │ ├── bpe_simple_vocab_16e6.txt.gz │ ├── coco │ │ ├── clip_rc_fully_vit-b_512x512_80k_coco_stuff164k_100_16.py │ │ ├── clip_rc_zero_vit-b_512x512_40k_coco_stuff164k_100_16_st.py │ │ └── clip_rc_zero_vit-b_512x512_80k_coco_stuff164k_100_16.py │ ├── datasets │ │ ├── zero_cocostuff_512x512.py │ │ ├── zero_voc12_20_512x512.py │ │ └── zero_voc12_20_aug_512x512.py │ ├── readme.md │ ├── text_embedding │ │ ├── coco_multi.npy │ │ └── voc12_single.npy │ └── voc12 │ │ ├── clip_rc_fully_vit-b_512x512_40k_voc_10_16.py │ │ ├── clip_rc_zero_vit-b_512x512_20k_voc_10_16_st.py │ │ └── clip_rc_zero_vit-b_512x512_40k_voc_10_16.py ├── convnext │ ├── readme.md │ └── upernet_convnext_base_512x512_ade20k_160k.py ├── danet │ ├── danet_r50-d8_512x512_ade20k_80k.py │ └── readme.md ├── deeplabv3 │ ├── deeplabv3_r50-d8_512x512_ade20k_80k.py │ └── readme.md ├── deeplabv3plus │ ├── deeplabv3plus_r50-d8_512x512_ade20k_80k.py │ └── readme.md ├── eanet │ ├── eanet_r50-d8_769x769_cityscapes_40k.py │ └── readme.md ├── emanet │ ├── emanet_r50-d8_512x1024_cityscapes_80k.py │ └── readme.md ├── fcn │ ├── fcn_r50-d8_512x1024_cityscapes_80k.py │ ├── fcn_r50-d8_512x512_ade20k_80k.py │ ├── fcn_r50-d8_896x896_isaid_80k.py │ └── readme.md ├── gcnet │ ├── gcnet_r50-d8_512x512_ade20k_160k.py │ └── readme.md ├── mae │ ├── readme.md │ └── upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py ├── mobilenet_v2 │ ├── fcn_m-v2-d8_512x512_ade20k_160k.py │ └── readme.md ├── nonlocal_net │ ├── nonlocal_r50-d8_512x512_ade20k_80k.py │ └── readme.md ├── point_rend │ ├── pointrend_r50_512x512_ade20k_160k.py │ └── readme.md ├── pspnet │ ├── pspnet_r50-d8_512x1024_cityscapes_80k.py │ ├── pspnet_r50-d8_512x512_ade20k_80k.py │ ├── pspnet_r50-d8_512x512_loveda_80k.py │ └── readme.md ├── resnest │ ├── pspnet_s101-d8_512x512_ade20k_80k.py │ └── readme.md ├── segformer │ ├── b0 │ │ └── segformer_b0_512x512_ade_160k.py │ └── readme.md ├── segnext │ ├── base │ │ ├── segnext_base_1024x1024_cityscapes_160k.py │ │ ├── segnext_base_512x512_ade_160k.py │ │ └── segnext_base_896x896_isaid_160k.py │ ├── large │ │ ├── segnext_large_1024x1024_cityscapes_160k.py │ │ ├── segnext_large_512x512_ade_160k.py │ │ └── segnext_large_896x896_isaid_160k.py │ ├── readme.md │ ├── resources │ │ └── flops.png │ ├── small │ │ ├── segnext_small_1024x1024_cityscapes_160k.py │ │ ├── segnext_small_512x512_ade_160k.py │ │ └── segnext_small_896x896_isaid_160k.py │ └── tiny │ │ ├── segnext_tiny_1024x1024_cityscapes_160k.py │ │ ├── segnext_tiny_512x512_ade_160k.py │ │ └── segnext_tiny_896x896_isaid_160k.py ├── swin │ ├── readme.md │ └── tiny │ │ └── upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py ├── upernet │ ├── readme.md │ └── upernet_r50_512x512_ade20k_160k.py └── vit │ ├── readme.md │ └── upernet_vit-b16_ln_mln_512x512_ade20k_160k.py ├── python └── jseg │ ├── __init__.py │ ├── bricks │ ├── __init__.py │ ├── activation.py │ ├── conv.py │ ├── conv_module.py │ ├── depthwise_separable_conv_module.py │ ├── drop.py │ ├── norm.py │ └── padding.py │ ├── config │ ├── __init__.py │ └── config.py │ ├── datasets │ ├── __init__.py │ ├── ade.py │ ├── cityscapes.py │ ├── custom.py │ ├── isaid.py │ ├── isprs.py │ ├── loveda.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── loading.py │ │ ├── test_time_aug.py │ │ ├── transforms.py │ │ └── utils.py │ ├── potsdam.py │ ├── voc.py │ ├── zero_coco_stuff.py │ └── zero_voc12.py │ ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── beit.py │ │ ├── clip_encoder_rlb.py │ │ ├── clip_text_encoder.py │ │ ├── convnext.py │ │ ├── mae.py │ │ ├── mix_transformer.py │ │ ├── mobilenet_v2.py │ │ ├── mscan.py │ │ ├── resnest.py │ │ ├── resnet.py │ │ ├── resnext.py │ │ ├── swin.py │ │ └── vit.py │ ├── decode_heads │ │ ├── __init__.py │ │ ├── ann_head.py │ │ ├── aspp_head.py │ │ ├── cascade_decode_head.py │ │ ├── cc_head.py │ │ ├── cliprc_head.py │ │ ├── da_head.py │ │ ├── decode_head.py │ │ ├── ea_head.py │ │ ├── ema_head.py │ │ ├── fcn_head.py │ │ ├── fpn_head.py │ │ ├── gc_head.py │ │ ├── ham_head.py │ │ ├── nl_head.py │ │ ├── point_head.py │ │ ├── psp_head.py │ │ ├── segformer_head.py │ │ ├── sep_aspp_head.py │ │ └── uper_head.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── cross_entropy_loss.py │ │ └── utils.py │ ├── necks │ │ ├── __init__.py │ │ ├── featurepyramid.py │ │ ├── fpn.py │ │ └── multilevel_neck.py │ ├── segmentors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cascade_encoder_decoder.py │ │ ├── clip_rc.py │ │ └── encoder_decoder.py │ └── utils │ │ ├── embed.py │ │ ├── inverted_residual.py │ │ └── se_layer.py │ ├── ops │ ├── __init__.py │ ├── cc_attention.py │ ├── cliprc_ops.py │ ├── context_block.py │ ├── external_attention.py │ ├── mha.py │ ├── multi_head_attention.py │ ├── non_local.py │ ├── scale.py │ ├── self_attention_block.py │ └── wrappers.py │ ├── optims │ ├── __init__.py │ ├── lr_decay_parameter_groups_generator.py │ ├── lr_scheduler.py │ ├── optimizer.py │ └── prameter_groups_generator.py │ ├── runner │ ├── __init__.py │ └── runner.py │ ├── sampler │ ├── __init__.py │ ├── base_pixel_sampler.py │ └── ohem_pixel_sampler.py │ └── utils │ ├── __init__.py │ ├── general.py │ ├── helpers.py │ ├── inference.py │ ├── logger.py │ ├── metrics.py │ ├── registry.py │ ├── tokenizer.py │ ├── visualize.py │ └── weight_init.py ├── requirements.txt ├── setup.py └── tools ├── convert_datasets ├── cityscapes.py ├── isaid.py ├── loveda.py ├── potsdam.py ├── vaihingen.py └── voc_aug.py ├── demo.py └── run_net.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | .vscode 107 | .idea 108 | 109 | # custom 110 | *.pkl 111 | *.pkl.json 112 | *.log.json 113 | work_dirs/ 114 | work_dirs 115 | pretrained 116 | pretrained/ 117 | # Pytorch 118 | *.pth 119 | trash/ 120 | trash 121 | test_img/ 122 | events* -------------------------------------------------------------------------------- /docs/config.md: -------------------------------------------------------------------------------- 1 | # How to use configs in JSeg 2 | ## Basic usages 3 | ### .py configuration files 4 | You can do some easy computation in the .py configuration file: 5 | ```python 6 | # cfg.py 7 | import os 8 | exp_id = 1 9 | # path setting 10 | output_path = 'experiments' 11 | root_path = os.path.join(output_path, str(exp_id)) 12 | log_path = os.path.join(root_path, 'logs') 13 | 14 | # easy calculation 15 | gpus = [0,1,2,3] 16 | n_gpus = len(gpus) 17 | batch_size = 16 18 | base_lr = batch_size * 0.001 19 | 20 | # model setting 21 | model = { 22 | 'type': 'Resnet50', 23 | 'return_stages': = ['layer1','layer2','layer3','layer4'], 24 | 'pretrained': True 25 | } 26 | ``` 27 | You can load .py configuration file as load .yaml configuration file: 28 | ```python 29 | # main.py 30 | from jseg.config import init_cfg 31 | init_cfg('cfg.py') 32 | ``` 33 | 34 | Please refer to `[ROOT]/python/jseg/config/config.py` for more details. -------------------------------------------------------------------------------- /project/_base_/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'ADE20KDataset' 2 | data_root = 'datasets/ADEChallengeData2016/' 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 4 | std=[58.395, 57.12, 57.375], 5 | to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | dataset = dict( 35 | train=dict(type=dataset_type, 36 | batch_size=16, 37 | num_workers=8, 38 | shuffle=True, 39 | drop_last=False, 40 | data_root=data_root, 41 | img_dir='images/training', 42 | ann_dir='annotations/training', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | # Fixed to one 47 | batch_size=1, 48 | num_workers=1, 49 | shuffle=False, 50 | drop_last=False, 51 | data_root=data_root, 52 | img_dir='images/validation', 53 | ann_dir='annotations/validation', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /project/_base_/datasets/ade20k_640x640.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'ADE20KDataset' 2 | data_root = 'datasets/ADEChallengeData2016/' 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 4 | std=[58.395, 57.12, 57.375], 5 | to_rgb=True) 6 | crop_size = (640, 640) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2560, 640), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | dataset = dict( 35 | train=dict(type=dataset_type, 36 | batch_size=16, 37 | num_workers=8, 38 | shuffle=True, 39 | drop_last=False, 40 | data_root=data_root, 41 | img_dir='images/training', 42 | ann_dir='annotations/training', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | # Fixed to one 47 | batch_size=1, 48 | num_workers=1, 49 | shuffle=False, 50 | drop_last=False, 51 | data_root=data_root, 52 | img_dir='images/validation', 53 | ann_dir='annotations/validation', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /project/_base_/datasets/cityscapes_1024x1024.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'datasets/cityscapes/' 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 4 | std=[58.395, 57.12, 57.375], 5 | to_rgb=True) 6 | crop_size = (1024, 1024) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 1024), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | dataset = dict( 35 | train=dict(type=dataset_type, 36 | batch_size=8, 37 | num_workers=8, 38 | shuffle=True, 39 | drop_last=False, 40 | data_root=data_root, 41 | img_dir='leftImg8bit/train', 42 | ann_dir='gtFine/train', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | # Fixed to one 47 | batch_size=1, 48 | num_workers=1, 49 | shuffle=False, 50 | drop_last=False, 51 | data_root=data_root, 52 | img_dir='leftImg8bit/val', 53 | ann_dir='gtFine/val', 54 | pipeline=test_pipeline), 55 | test=dict( 56 | type=dataset_type, 57 | # Fixed to one 58 | batch_size=1, 59 | num_workers=1, 60 | shuffle=False, 61 | drop_last=False, 62 | data_root=data_root, 63 | img_dir='leftImg8bit/val', 64 | ann_dir='gtFine/val', 65 | pipeline=test_pipeline)) 66 | -------------------------------------------------------------------------------- /project/_base_/datasets/cityscapes_512x1024.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'datasets/cityscapes/' 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 4 | std=[58.395, 57.12, 57.375], 5 | to_rgb=True) 6 | crop_size = (512, 1024) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 1024), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | dataset = dict( 35 | train=dict(type=dataset_type, 36 | batch_size=8, 37 | num_workers=8, 38 | shuffle=True, 39 | drop_last=False, 40 | data_root=data_root, 41 | img_dir='leftImg8bit/train', 42 | ann_dir='gtFine/train', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | # Fixed to one 47 | batch_size=1, 48 | num_workers=1, 49 | shuffle=False, 50 | drop_last=False, 51 | data_root=data_root, 52 | img_dir='leftImg8bit/val', 53 | ann_dir='gtFine/val', 54 | pipeline=test_pipeline), 55 | test=dict( 56 | type=dataset_type, 57 | # Fixed to one 58 | batch_size=1, 59 | num_workers=1, 60 | shuffle=False, 61 | drop_last=False, 62 | data_root=data_root, 63 | img_dir='leftImg8bit/val', 64 | ann_dir='gtFine/val', 65 | pipeline=test_pipeline)) -------------------------------------------------------------------------------- /project/_base_/datasets/cityscapes_769x769.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'datasets/cityscapes/' 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 4 | std=[58.395, 57.12, 57.375], 5 | to_rgb=True) 6 | crop_size = (769, 769) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 1025), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | dataset = dict( 35 | train=dict(type=dataset_type, 36 | batch_size=8, 37 | num_workers=8, 38 | shuffle=True, 39 | drop_last=False, 40 | data_root=data_root, 41 | img_dir='leftImg8bit/train', 42 | ann_dir='gtFine/train', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | # Fixed to one 47 | batch_size=1, 48 | num_workers=1, 49 | shuffle=False, 50 | drop_last=False, 51 | data_root=data_root, 52 | img_dir='leftImg8bit/val', 53 | ann_dir='gtFine/val', 54 | pipeline=test_pipeline), 55 | test=dict( 56 | type=dataset_type, 57 | # Fixed to one 58 | batch_size=1, 59 | num_workers=1, 60 | shuffle=False, 61 | drop_last=False, 62 | data_root=data_root, 63 | img_dir='leftImg8bit/val', 64 | ann_dir='gtFine/val', 65 | pipeline=test_pipeline)) 66 | -------------------------------------------------------------------------------- /project/_base_/datasets/isaid_869x869.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'iSAIDDataset' 2 | data_root = 'datasets/iSAID_Patches' 3 | 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | to_rgb=True) 7 | 8 | crop_size = (896, 896) 9 | 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations'), 13 | dict(type='Resize', img_scale=(896, 896), ratio_range=(0.5, 2.0)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=(896, 896), 27 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']), 35 | ]) 36 | ] 37 | 38 | dataset = dict( 39 | train=dict(type=dataset_type, 40 | batch_size=16, 41 | num_workers=8, 42 | shuffle=True, 43 | drop_last=False, 44 | data_root=data_root, 45 | img_dir='train/images', 46 | ann_dir='train/Semantic_masks', 47 | pipeline=train_pipeline), 48 | val=dict( 49 | type=dataset_type, 50 | # Fixed to one 51 | batch_size=1, 52 | num_workers=1, 53 | shuffle=False, 54 | drop_last=False, 55 | data_root=data_root, 56 | img_dir='val/images', 57 | ann_dir='val/Semantic_masks', 58 | pipeline=test_pipeline)) 59 | -------------------------------------------------------------------------------- /project/_base_/datasets/loveda.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'LoveDADataset' 2 | data_root = 'dataset/LoveDA' 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 4 | std=[58.395, 57.12, 57.375], 5 | to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1024, 1024), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | dataset = dict( 35 | train=dict(type=dataset_type, 36 | batch_size=16, 37 | num_workers=8, 38 | shuffle=True, 39 | drop_last=False, 40 | data_root=data_root, 41 | img_dir='img_dir/train', 42 | ann_dir='ann_dir/train', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | # Fixed to one 47 | batch_size=1, 48 | num_workers=1, 49 | shuffle=False, 50 | drop_last=False, 51 | data_root=data_root, 52 | img_dir='img_dir/val', 53 | ann_dir='ann_dir/val', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /project/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | logger = dict(type="RunLogger") 2 | log_interval = 50 -------------------------------------------------------------------------------- /project/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | max_iter = 160000 2 | eval_interval = 8000 3 | checkpoint_interval = 8000 4 | # optimizer 5 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 6 | # scheduler 7 | scheduler = dict(type='PolyLR', 8 | max_steps=max_iter, 9 | power=0.9, 10 | min_lr=1e-4) 11 | -------------------------------------------------------------------------------- /project/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | max_iter = 40000 2 | eval_interval = 2000 3 | checkpoint_interval = 2000 4 | # optimizer 5 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 6 | # scheduler 7 | scheduler = dict(type='PolyLR', 8 | max_steps=max_iter, 9 | power=0.9, 10 | min_lr=1e-4) 11 | -------------------------------------------------------------------------------- /project/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | max_iter = 80000 2 | eval_interval = 4000 3 | checkpoint_interval = 4000 4 | # optimizer 5 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 6 | # scheduler 7 | scheduler = dict(type='PolyLR', 8 | max_steps=max_iter, 9 | power=0.9, 10 | min_lr=1e-4) 11 | -------------------------------------------------------------------------------- /project/ann/ann_r50-d8_512x512_ade20k_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | # model settings 6 | norm_cfg = dict(type='BN') 7 | model = dict( 8 | type='EncoderDecoder', 9 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 10 | backbone=dict(type='ResNetV1c', 11 | depth=50, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | dilations=(1, 1, 2, 4), 15 | strides=(1, 2, 1, 1), 16 | norm_cfg=norm_cfg, 17 | norm_eval=False, 18 | contract_dilation=True), 19 | decode_head=dict(type='ANNHead', 20 | in_channels=[1024, 2048], 21 | in_index=[2, 3], 22 | channels=512, 23 | project_channels=256, 24 | query_scales=(1, ), 25 | key_pool_scales=(1, 3, 6, 8), 26 | dropout_ratio=0.1, 27 | num_classes=150, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict(type='CrossEntropyLoss', 31 | use_sigmoid=False, 32 | loss_weight=1.0)), 33 | auxiliary_head=dict(type='FCNHead', 34 | in_channels=1024, 35 | in_index=2, 36 | channels=256, 37 | num_convs=1, 38 | concat_input=False, 39 | dropout_ratio=0.1, 40 | num_classes=150, 41 | norm_cfg=norm_cfg, 42 | align_corners=False, 43 | loss_decode=dict(type='CrossEntropyLoss', 44 | use_sigmoid=False, 45 | loss_weight=0.4)), 46 | # model training and testing settings 47 | train_cfg=dict(), 48 | test_cfg=dict(mode='whole')) 49 | -------------------------------------------------------------------------------- /project/ann/readme.md: -------------------------------------------------------------------------------- 1 | # ANN -------------------------------------------------------------------------------- /project/beit/readme.md: -------------------------------------------------------------------------------- 1 | # BEiT -------------------------------------------------------------------------------- /project/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py' 3 | ] 4 | 5 | norm_cfg = dict(type='BN') 6 | model = dict( 7 | type='EncoderDecoder', 8 | pretrained='jittorhub://beit_base_patch16_224_pt22k_ft22k.pkl', 9 | backbone=dict(type='BEiT', 10 | img_size=(640, 640), 11 | patch_size=16, 12 | in_channels=3, 13 | embed_dims=768, 14 | num_layers=12, 15 | num_heads=12, 16 | mlp_ratio=4, 17 | out_indices=(3, 5, 7, 11), 18 | qv_bias=True, 19 | attn_drop_rate=0.0, 20 | drop_path_rate=0.1, 21 | norm_cfg=dict(type='LN', eps=1e-6), 22 | act_cfg=dict(type='GELU'), 23 | norm_eval=False, 24 | init_values=0.1), 25 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 26 | decode_head=dict(type='UPerHead', 27 | in_channels=[768, 768, 768, 768], 28 | in_index=[0, 1, 2, 3], 29 | pool_scales=(1, 2, 3, 6), 30 | channels=768, 31 | dropout_ratio=0.1, 32 | num_classes=150, 33 | norm_cfg=norm_cfg, 34 | align_corners=False, 35 | loss_decode=dict(type='CrossEntropyLoss', 36 | use_sigmoid=False, 37 | loss_weight=1.0)), 38 | auxiliary_head=dict(type='FCNHead', 39 | in_channels=768, 40 | in_index=2, 41 | channels=256, 42 | num_convs=1, 43 | concat_input=False, 44 | dropout_ratio=0.1, 45 | num_classes=150, 46 | norm_cfg=norm_cfg, 47 | align_corners=False, 48 | loss_decode=dict(type='CrossEntropyLoss', 49 | use_sigmoid=False, 50 | loss_weight=0.4)), 51 | # model training and testing settings 52 | train_cfg=dict(), 53 | test_cfg=dict(mode='slide', crop_size=(640, 640), stride=(426, 426))) 54 | 55 | parameter_groups_generator = dict(type="LRDecayParameterGroupsGenerator", 56 | paramwise_cfg=dict(num_layers=12, 57 | decay_rate=0.9)) 58 | 59 | optimizer = dict( 60 | type='CustomAdamW', 61 | lr=3e-5, 62 | betas=(0.9, 0.999), 63 | weight_decay=0.05, 64 | ) 65 | 66 | max_iter = 160000 67 | eval_interval = 8000 68 | checkpoint_interval = 8000 69 | 70 | scheduler = dict(type='PolyLR', 71 | warmup='linear', 72 | warmup_iters=1500, 73 | warmup_ratio=1e-6, 74 | max_steps=max_iter, 75 | power=1.0, 76 | min_lr=0) 77 | -------------------------------------------------------------------------------- /project/ccnet/ccnet_r50-d8_512x512_ade20k_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='CCHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | recurrence=2, 25 | dropout_ratio=0.1, 26 | num_classes=150, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict(type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | loss_weight=1.0)), 32 | auxiliary_head=dict(type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=150, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict(type='CrossEntropyLoss', 43 | use_sigmoid=False, 44 | loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /project/ccnet/readme.md: -------------------------------------------------------------------------------- 1 | # CCNet -------------------------------------------------------------------------------- /project/clip_rc/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/clip_rc/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /project/clip_rc/datasets/zero_cocostuff_512x512.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'ZeroCOCOStuffDataset' 2 | data_root = '/home/zy/datasets/coco_stuff164k' 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 4 | std=[58.395, 57.12, 57.375], 5 | to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True, min_size=512), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | dataset = dict( 35 | train=dict(type=dataset_type, 36 | batch_size=16, 37 | num_workers=8, 38 | shuffle=True, 39 | drop_last=False, 40 | data_root=data_root, 41 | img_dir='images/train2017', 42 | ann_dir='annotations/train2017', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | # Fixed to one 47 | batch_size=1, 48 | num_workers=1, 49 | shuffle=False, 50 | drop_last=False, 51 | data_root=data_root, 52 | img_dir='images/val2017', 53 | ann_dir='annotations/val2017', 54 | pipeline=test_pipeline)) 55 | -------------------------------------------------------------------------------- /project/clip_rc/datasets/zero_voc12_20_512x512.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ZeroPascalVOCDataset20' 3 | data_root = '/home/zy/datasets/pascal_voc/VOC2012' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 5 | std=[58.395, 57.12, 57.375], 6 | to_rgb=True) 7 | 8 | crop_size = (512, 512) 9 | 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations', reduce_zero_label=True), 13 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(2048, 512), 28 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 29 | flip=False, 30 | transforms=[ 31 | dict(type='Resize', keep_ratio=True, min_size=512), 32 | dict(type='RandomFlip'), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | 39 | dataset = dict( 40 | train=dict(type=dataset_type, 41 | batch_size=4, 42 | num_workers=8, 43 | shuffle=True, 44 | drop_last=False, 45 | data_root=data_root, 46 | img_dir='JPEGImages', 47 | ann_dir='SegmentationClass', 48 | split='ImageSets/Segmentation/train.txt', 49 | pipeline=train_pipeline), 50 | val=dict( 51 | type=dataset_type, 52 | # Fixed to one 53 | batch_size=1, 54 | num_workers=1, 55 | shuffle=False, 56 | drop_last=False, 57 | data_root=data_root, 58 | img_dir='JPEGImages', 59 | ann_dir='SegmentationClass', 60 | split='ImageSets/Segmentation/val.txt', 61 | pipeline=test_pipeline)) 62 | -------------------------------------------------------------------------------- /project/clip_rc/datasets/zero_voc12_20_aug_512x512.py: -------------------------------------------------------------------------------- 1 | _base_ = './zero_voc12_20_512x512.py' 2 | # dataset settings, merge voc12 and voc12aug 3 | dataset = dict(train=dict(ann_dir='SegmentationClassAug', 4 | split='ImageSets/Segmentation/trainaug.txt')) # merge voc12 and voc12aug 5 | -------------------------------------------------------------------------------- /project/clip_rc/readme.md: -------------------------------------------------------------------------------- 1 | # Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation (CVPR 2024) 2 | 3 | 4 | The repository contains official Jittor implementations of the paper: Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation. 5 | 6 | The paper is in [**Here**](https://openaccess.thecvf.com/content/CVPR2024/papers/Zhang_Exploring_Regional_Clues_in_CLIP_for_Zero-Shot_Semantic_Segmentation_CVPR_2024_paper.pdf). 7 | 8 | **Notes**: CLIP-ViT-B-16 Pre-trained models can be found in [there](https://bhpan.buaa.edu.cn/link/AA95601A0FBCA5403485078A0160952FEC) 9 | 10 | ## Pretrained models 11 | 12 | | Dataset | Setting | pAcc | mIoU(S) | mIoU(U) | hIoU | Model Zoo | 13 | | :-------------: | :---------: | :---: | :-----: | :-----: | :--: | :----------------------------------------------------------: | 14 | | PASCAL VOC 2012 | Inductive | 95.8 | 92.8 | 84.4 | 88.4 | [[Drive](https://bhpan.buaa.edu.cn/link/AA10306CBF37904DDCB835F3BE2D7B1C15)] | 15 | | PASCAL VOC 2012 | Transductive | 97.0 | 93.9 | 92.2 | 93.0 | [[Drive](https://bhpan.buaa.edu.cn/link/AAE085202961AF45CD957E9F98BB7449FB)] | 16 | | PASCAL VOC 2012 | Fully | 97.1 | 94.1 | 93.4 | 93.7 | [[Drive](https://bhpan.buaa.edu.cn/link/AAA98108D9C3DD408C82B42EC206DD95DD)] | 17 | | COCO Stuff 164K | Inductive | 63.1 | 40.9 | 41.6 | 41.2 | [[Drive](https://bhpan.buaa.edu.cn/link/AA12C2F1BBA0804EC6820A8CB160062091)]| 18 | | COCO Stuff 164K | Transductive | 69.9 | 42.0 | 60.8 | 49.7 | [[Drive](https://bhpan.buaa.edu.cn/link/AA492DE7FE832E43D299C221931127CB1D)]| 19 | | COCO Stuff 164K | Fully | 70.8 | 42.9 | 64.1 | 51.4 | [[Drive](https://bhpan.buaa.edu.cn/link/AACE6B7E6F7DED41FDA09AF4CB308F4E2A)] | 20 | -------------------------------------------------------------------------------- /project/clip_rc/text_embedding/coco_multi.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/clip_rc/text_embedding/coco_multi.npy -------------------------------------------------------------------------------- /project/clip_rc/text_embedding/voc12_single.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/clip_rc/text_embedding/voc12_single.npy -------------------------------------------------------------------------------- /project/clip_rc/voc12/clip_rc_fully_vit-b_512x512_40k_voc_10_16.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../datasets/zero_voc12_20_aug_512x512.py', 3 | '../../_base_/default_runtime.py' 4 | ] 5 | 6 | img_size = 512 7 | in_channels = 512 8 | out_indices = [11] 9 | 10 | region_level_bridge_size = 16 11 | base_class = [ 12 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 13 | ] 14 | novel_class = [] 15 | both_class = base_class 16 | 17 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 18 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 19 | 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') 20 | 21 | pretrained = 'ViT-B-16.pkl' 22 | 23 | model = dict( 24 | type='CLIPRC', 25 | pretrained=pretrained, 26 | pretrained_text=pretrained, 27 | class_names=CLASSES, 28 | backbone=dict( 29 | type='CLIPVisionTransformerWithRLB', 30 | patch_size=16, 31 | width=768, 32 | output_dim=512, 33 | get_embeddings=True, 34 | drop_path_rate=0.1, 35 | layers=12, 36 | input_resolution=img_size, 37 | out_indices=out_indices, 38 | # setting of vpt 39 | num_tokens=10, 40 | prompt_dim=768, 41 | total_d_layer=11, 42 | # setting of RLB 43 | region_level_bridge_size=region_level_bridge_size), 44 | text_encoder=dict(type='CLIPTextEncoder', 45 | context_length=77, 46 | embed_dim=512, 47 | transformer_width=512, 48 | transformer_heads=8, 49 | transformer_layers=12), 50 | decode_head=dict( 51 | type='ATMSingleHeadSeg', 52 | img_size=img_size, 53 | in_channels=in_channels, 54 | seen_idx=base_class, 55 | all_idx=both_class, 56 | channels=in_channels, 57 | num_layers=3, 58 | num_classes=len(both_class), # useless, to decode_head, 59 | num_heads=8, 60 | use_proj=False, 61 | use_stages=len(out_indices), 62 | embed_dims=in_channels), 63 | test_cfg=dict(mode='slide', 64 | crop_size=(img_size, img_size), 65 | stride=(426, 426)), 66 | base_class=base_class, 67 | novel_class=novel_class, 68 | both_class=both_class, 69 | ft_backbone=False, 70 | exclude_key='prompt', 71 | load_text_embedding='project/clip_rc/text_embedding/voc12_single.npy') 72 | 73 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 74 | custom_keys={ 75 | 'backbone': dict(lr_mult=10.0), 76 | 'text_encoder': dict(lr_mult=0.0), 77 | 'norm': dict(decay_mult=0.), 78 | 'ln': dict(decay_mult=0.), 79 | 'head': dict(lr_mult=10.), 80 | }) 81 | 82 | optimizer = dict( 83 | type='CustomAdamW', 84 | lr=0.00002, 85 | betas=(0.9, 0.999), 86 | weight_decay=0.01, 87 | ) 88 | 89 | max_iter = 40000 90 | eval_interval = 2000 91 | checkpoint_interval = 2000 92 | 93 | scheduler = dict(type='PolyLR', 94 | warmup='linear', 95 | warmup_iters=1500, 96 | warmup_ratio=1e-6, 97 | max_steps=max_iter, 98 | power=0.9, 99 | min_lr=1e-6) 100 | -------------------------------------------------------------------------------- /project/clip_rc/voc12/clip_rc_zero_vit-b_512x512_40k_voc_10_16.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../datasets/zero_voc12_20_aug_512x512.py', 3 | '../../_base_/default_runtime.py' 4 | ] 5 | 6 | img_size = 512 7 | in_channels = 512 8 | out_indices = [11] 9 | 10 | region_level_bridge_size = 16 11 | 12 | base_class = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] 13 | novel_class = [15, 16, 17, 18, 19] 14 | both_class = [ 15 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 16 | ] 17 | 18 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 19 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 20 | 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') 21 | 22 | pretrained = 'ViT-B-16.pkl' 23 | 24 | model = dict( 25 | type='CLIPRC', 26 | pretrained=pretrained, 27 | pretrained_text=pretrained, 28 | class_names=CLASSES, 29 | backbone=dict( 30 | type='CLIPVisionTransformerWithRLB', 31 | patch_size=16, 32 | width=768, 33 | output_dim=512, 34 | get_embeddings=True, 35 | drop_path_rate=0.1, 36 | layers=12, 37 | input_resolution=img_size, 38 | out_indices=out_indices, 39 | # setting of vpt 40 | num_tokens=10, 41 | prompt_dim=768, 42 | total_d_layer=11, 43 | # setting of RLB 44 | region_level_bridge_size=region_level_bridge_size), 45 | text_encoder=dict(type='CLIPTextEncoder', 46 | context_length=77, 47 | embed_dim=512, 48 | transformer_width=512, 49 | transformer_heads=8, 50 | transformer_layers=12), 51 | decode_head=dict( 52 | type='ATMSingleHeadSeg', 53 | img_size=img_size, 54 | in_channels=in_channels, 55 | seen_idx=base_class, 56 | all_idx=both_class, 57 | channels=in_channels, 58 | num_layers=3, 59 | num_classes=len(base_class), # useless, to decode_head 60 | num_heads=8, 61 | use_proj=False, 62 | use_stages=len(out_indices), 63 | embed_dims=in_channels), 64 | test_cfg=dict(mode='slide', 65 | crop_size=(img_size, img_size), 66 | stride=(426, 426)), 67 | base_class=base_class, 68 | novel_class=novel_class, 69 | both_class=both_class, 70 | ft_backbone=False, 71 | exclude_key='prompt', 72 | load_text_embedding='project/clip_rc/text_embedding/voc12_single.npy') 73 | 74 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 75 | custom_keys={ 76 | 'backbone': dict(lr_mult=10.0), 77 | 'text_encoder': dict(lr_mult=0.0), 78 | 'norm': dict(decay_mult=0.), 79 | 'ln': dict(decay_mult=0.), 80 | 'head': dict(lr_mult=10.), 81 | }) 82 | 83 | optimizer = dict( 84 | type='CustomAdamW', 85 | lr=0.00002, 86 | betas=(0.9, 0.999), 87 | weight_decay=0.01, 88 | ) 89 | 90 | max_iter = 40000 91 | eval_interval = 2000 92 | checkpoint_interval = 2000 93 | 94 | scheduler = dict(type='PolyLR', 95 | warmup='linear', 96 | warmup_iters=1500, 97 | warmup_ratio=1e-6, 98 | max_steps=max_iter, 99 | power=0.9, 100 | min_lr=1e-6) 101 | -------------------------------------------------------------------------------- /project/convnext/readme.md: -------------------------------------------------------------------------------- 1 | # ConvNeXt -------------------------------------------------------------------------------- /project/convnext/upernet_convnext_base_512x512_ade20k_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='EncoderDecoder', 9 | pretrained= 10 | 'jittorhub://convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pkl', 11 | backbone=dict(type='ConvNeXt', 12 | arch='base', 13 | out_indices=[0, 1, 2, 3], 14 | drop_path_rate=0.4, 15 | layer_scale_init_value=1.0, 16 | gap_before_final_norm=False), 17 | decode_head=dict(type='UPerHead', 18 | in_channels=[128, 256, 512, 1024], 19 | in_index=[0, 1, 2, 3], 20 | pool_scales=(1, 2, 3, 6), 21 | channels=512, 22 | dropout_ratio=0.1, 23 | num_classes=150, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0)), 28 | auxiliary_head=dict(type='FCNHead', 29 | in_channels=512, 30 | in_index=2, 31 | channels=256, 32 | num_convs=1, 33 | concat_input=False, 34 | dropout_ratio=0.1, 35 | num_classes=150, 36 | align_corners=False, 37 | loss_decode=dict(type='CrossEntropyLoss', 38 | use_sigmoid=False, 39 | loss_weight=0.4)), 40 | # model training and testing settings 41 | train_cfg=dict(), 42 | test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341))) 43 | 44 | parameter_groups_generator = dict(type="LRDecayParameterGroupsGenerator", 45 | paramwise_cfg={ 46 | 'decay_rate': 0.9, 47 | 'decay_type': 'stage_wise', 48 | 'num_layers': 12 49 | }) 50 | 51 | optimizer = dict( 52 | type='CustomAdamW', 53 | lr=0.0001, 54 | betas=(0.9, 0.999), 55 | weight_decay=0.05, 56 | ) 57 | 58 | max_iter = 160000 59 | scheduler = dict(type='PolyLR', 60 | warmup='linear', 61 | warmup_iters=1500, 62 | warmup_ratio=1e-6, 63 | max_steps=max_iter, 64 | power=1.0, 65 | min_lr=0) 66 | -------------------------------------------------------------------------------- /project/danet/danet_r50-d8_512x512_ade20k_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='DAHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | pam_channels=64, 25 | dropout_ratio=0.1, 26 | num_classes=150, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict(type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | loss_weight=1.0)), 32 | auxiliary_head=dict(type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=150, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict(type='CrossEntropyLoss', 43 | use_sigmoid=False, 44 | loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /project/danet/readme.md: -------------------------------------------------------------------------------- 1 | # DANet -------------------------------------------------------------------------------- /project/deeplabv3/deeplabv3_r50-d8_512x512_ade20k_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='ASPPHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | dilations=(1, 12, 24, 36), 25 | dropout_ratio=0.1, 26 | num_classes=150, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict(type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | loss_weight=1.0)), 32 | auxiliary_head=dict(type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=150, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict(type='CrossEntropyLoss', 43 | use_sigmoid=False, 44 | loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /project/deeplabv3/readme.md: -------------------------------------------------------------------------------- 1 | # DeepLabV3 -------------------------------------------------------------------------------- /project/deeplabv3plus/deeplabv3plus_r50-d8_512x512_ade20k_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='DepthwiseSeparableASPPHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | dilations=(1, 12, 24, 36), 25 | c1_in_channels=256, 26 | c1_channels=48, 27 | dropout_ratio=0.1, 28 | num_classes=150, 29 | norm_cfg=norm_cfg, 30 | align_corners=False, 31 | loss_decode=dict(type='CrossEntropyLoss', 32 | use_sigmoid=False, 33 | loss_weight=1.0)), 34 | auxiliary_head=dict(type='FCNHead', 35 | in_channels=1024, 36 | in_index=2, 37 | channels=256, 38 | num_convs=1, 39 | concat_input=False, 40 | dropout_ratio=0.1, 41 | num_classes=150, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict(type='CrossEntropyLoss', 45 | use_sigmoid=False, 46 | loss_weight=0.4)), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /project/deeplabv3plus/readme.md: -------------------------------------------------------------------------------- 1 | # DeepLabV3+ -------------------------------------------------------------------------------- /project/eanet/eanet_r50-d8_769x769_cityscapes_40k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_40k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='EAHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | align_corners=True, 27 | loss_decode=dict(type='CrossEntropyLoss', 28 | use_sigmoid=False, 29 | loss_weight=1.0, 30 | class_weight=[ 31 | 0.8373, 0.918, 0.866, 1.0345, 1.0166, 32 | 0.9969, 0.9754, 1.0489, 0.8786, 33 | 1.0023, 0.9539, 0.9843, 1.1116, 34 | 0.9037, 1.0865, 1.0955, 1.0865, 35 | 1.1529, 1.0507 36 | ]), 37 | sampler=dict(type='OHEMPixelSampler', 38 | thresh=0.7, 39 | min_kept=100000)), 40 | auxiliary_head=dict(type='FCNHead', 41 | in_channels=1024, 42 | in_index=2, 43 | channels=256, 44 | num_convs=1, 45 | concat_input=False, 46 | dropout_ratio=0.1, 47 | num_classes=19, 48 | norm_cfg=norm_cfg, 49 | align_corners=True, 50 | loss_decode=dict(type='CrossEntropyLoss', 51 | use_sigmoid=False, 52 | loss_weight=0.4)), 53 | # model training and testing settings 54 | train_cfg=dict(), 55 | test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) 56 | -------------------------------------------------------------------------------- /project/eanet/readme.md: -------------------------------------------------------------------------------- 1 | # EANet 2 | 3 | ### Cityscapes 4 | 5 | | Method | Backbone | Pretrained | Iters | mIoU(ss/ms) | Params | FLOPs | Config | Download | 6 | | :-------: | :-------------: | :-----: | :---: | :--: | :----: | :----: | :----: | :-------: | 7 | | EANet | ResNet50 | IN-1K | 40K | 79.9/81.0 | - | - | [config](eanet_r50-d8_769x769_cityscapes_40k.py) | [Jittor Hub](https://cg.cs.tsinghua.edu.cn/jittor/assets/build/checkpoints/eanet_r50-d8_769x769_cityscapes_40k.pkl) | 8 | 9 | -------------------------------------------------------------------------------- /project/emanet/emanet_r50-d8_512x1024_cityscapes_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/cityscapes_512x1024.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='EMAHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=256, 24 | ema_channels=512, 25 | num_bases=64, 26 | num_stages=3, 27 | momentum=0.1, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict(type='CrossEntropyLoss', 33 | use_sigmoid=False, 34 | loss_weight=1.0)), 35 | auxiliary_head=dict(type='FCNHead', 36 | in_channels=1024, 37 | in_index=2, 38 | channels=256, 39 | num_convs=1, 40 | concat_input=False, 41 | dropout_ratio=0.1, 42 | num_classes=19, 43 | norm_cfg=norm_cfg, 44 | align_corners=False, 45 | loss_decode=dict(type='CrossEntropyLoss', 46 | use_sigmoid=False, 47 | loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='whole')) 51 | -------------------------------------------------------------------------------- /project/emanet/readme.md: -------------------------------------------------------------------------------- 1 | # EMANet -------------------------------------------------------------------------------- /project/fcn/fcn_r50-d8_512x1024_cityscapes_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/cityscapes_512x1024.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='FCNHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | num_convs=2, 25 | concat_input=True, 26 | dropout_ratio=0.1, 27 | num_classes=19, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict(type='CrossEntropyLoss', 31 | use_sigmoid=False, 32 | loss_weight=1.0)), 33 | auxiliary_head=dict(type='FCNHead', 34 | in_channels=1024, 35 | in_index=2, 36 | channels=256, 37 | num_convs=1, 38 | concat_input=False, 39 | dropout_ratio=0.1, 40 | num_classes=19, 41 | norm_cfg=norm_cfg, 42 | align_corners=False, 43 | loss_decode=dict(type='CrossEntropyLoss', 44 | use_sigmoid=False, 45 | loss_weight=0.4)), 46 | # model training and testing settings 47 | train_cfg=dict(), 48 | test_cfg=dict(mode='whole')) 49 | -------------------------------------------------------------------------------- /project/fcn/fcn_r50-d8_512x512_ade20k_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='FCNHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | num_convs=2, 25 | concat_input=True, 26 | dropout_ratio=0.1, 27 | num_classes=150, 28 | norm_cfg=norm_cfg, 29 | align_corners=False, 30 | loss_decode=dict(type='CrossEntropyLoss', 31 | use_sigmoid=False, 32 | loss_weight=1.0)), 33 | auxiliary_head=dict(type='FCNHead', 34 | in_channels=1024, 35 | in_index=2, 36 | channels=256, 37 | num_convs=1, 38 | concat_input=False, 39 | dropout_ratio=0.1, 40 | num_classes=150, 41 | norm_cfg=norm_cfg, 42 | align_corners=False, 43 | loss_decode=dict(type='CrossEntropyLoss', 44 | use_sigmoid=False, 45 | loss_weight=0.4)), 46 | # model training and testing settings 47 | train_cfg=dict(), 48 | test_cfg=dict(mode='whole')) 49 | -------------------------------------------------------------------------------- /project/fcn/fcn_r50-d8_896x896_isaid_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/isaid_869x869.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | 7 | # model settings 8 | norm_cfg = dict(type='BN') 9 | model = dict( 10 | type='EncoderDecoder', 11 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 12 | backbone=dict(type='ResNetV1c', 13 | depth=50, 14 | num_stages=4, 15 | out_indices=(0, 1, 2, 3), 16 | dilations=(1, 1, 2, 4), 17 | strides=(1, 2, 1, 1), 18 | norm_cfg=norm_cfg, 19 | norm_eval=False, 20 | contract_dilation=True), 21 | decode_head=dict(type='FCNHead', 22 | in_channels=2048, 23 | in_index=3, 24 | channels=512, 25 | num_convs=2, 26 | concat_input=True, 27 | dropout_ratio=0.1, 28 | num_classes=16, 29 | norm_cfg=norm_cfg, 30 | align_corners=False, 31 | loss_decode=dict(type='CrossEntropyLoss', 32 | use_sigmoid=False, 33 | loss_weight=1.0)), 34 | auxiliary_head=dict(type='FCNHead', 35 | in_channels=1024, 36 | in_index=2, 37 | channels=256, 38 | num_convs=1, 39 | concat_input=False, 40 | dropout_ratio=0.1, 41 | num_classes=16, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict(type='CrossEntropyLoss', 45 | use_sigmoid=False, 46 | loss_weight=0.4)), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /project/fcn/readme.md: -------------------------------------------------------------------------------- 1 | # FCN -------------------------------------------------------------------------------- /project/gcnet/gcnet_r50-d8_512x512_ade20k_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_160k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='GCHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | ratio=1 / 4., 25 | pooling_type='att', 26 | fusion_types=('channel_add', ), 27 | dropout_ratio=0.1, 28 | num_classes=150, 29 | norm_cfg=norm_cfg, 30 | align_corners=False, 31 | loss_decode=dict(type='CrossEntropyLoss', 32 | use_sigmoid=False, 33 | loss_weight=1.0)), 34 | auxiliary_head=dict(type='FCNHead', 35 | in_channels=1024, 36 | in_index=2, 37 | channels=256, 38 | num_convs=1, 39 | concat_input=False, 40 | dropout_ratio=0.1, 41 | num_classes=150, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict(type='CrossEntropyLoss', 45 | use_sigmoid=False, 46 | loss_weight=0.4)), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /project/gcnet/readme.md: -------------------------------------------------------------------------------- 1 | # GCNet -------------------------------------------------------------------------------- /project/mae/readme.md: -------------------------------------------------------------------------------- 1 | # MAE -------------------------------------------------------------------------------- /project/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py: -------------------------------------------------------------------------------- 1 | _base_ = ['../_base_/datasets/ade20k.py', '../_base_/default_runtime.py'] 2 | 3 | norm_cfg = dict(type='BN') 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained='jittorhub://mae_pretrain_vit_base.pkl', 7 | backbone=dict(type='MAE', 8 | img_size=(512, 512), 9 | patch_size=16, 10 | in_channels=3, 11 | embed_dims=768, 12 | num_layers=12, 13 | num_heads=12, 14 | mlp_ratio=4, 15 | out_indices=(3, 5, 7, 11), 16 | attn_drop_rate=0.0, 17 | drop_path_rate=0.1, 18 | norm_cfg=dict(type='LN', eps=1e-6), 19 | act_cfg=dict(type='GELU'), 20 | norm_eval=False, 21 | init_values=1.0), 22 | neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), 23 | decode_head=dict(type='UPerHead', 24 | in_channels=[768, 768, 768, 768], 25 | in_index=[0, 1, 2, 3], 26 | pool_scales=(1, 2, 3, 6), 27 | channels=768, 28 | dropout_ratio=0.1, 29 | num_classes=150, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict(type='CrossEntropyLoss', 33 | use_sigmoid=False, 34 | loss_weight=1.0)), 35 | auxiliary_head=dict(type='FCNHead', 36 | in_channels=768, 37 | in_index=2, 38 | channels=256, 39 | num_convs=1, 40 | concat_input=False, 41 | dropout_ratio=0.1, 42 | num_classes=150, 43 | norm_cfg=norm_cfg, 44 | align_corners=False, 45 | loss_decode=dict(type='CrossEntropyLoss', 46 | use_sigmoid=False, 47 | loss_weight=0.4)), 48 | # model training and testing settings 49 | train_cfg=dict(), 50 | test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341))) 51 | 52 | parameter_groups_generator = dict(type="LRDecayParameterGroupsGenerator", 53 | paramwise_cfg=dict(num_layers=12, 54 | decay_rate=0.65)) 55 | 56 | optimizer = dict( 57 | type='CustomAdamW', 58 | lr=1e-4, 59 | betas=(0.9, 0.999), 60 | weight_decay=0.05, 61 | ) 62 | 63 | max_iter = 160000 64 | eval_interval = 8000 65 | checkpoint_interval = 8000 66 | 67 | scheduler = dict(type='PolyLR', 68 | warmup='linear', 69 | warmup_iters=1500, 70 | warmup_ratio=1e-6, 71 | max_steps=max_iter, 72 | power=1.0, 73 | min_lr=0) 74 | -------------------------------------------------------------------------------- /project/mobilenet_v2/fcn_m-v2-d8_512x512_ade20k_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | # pretrained='jittorhub://mobilenet_v2.pkl', 11 | backbone=dict(type='MobileNetV2', 12 | widen_factor=1., 13 | strides=(1, 2, 2, 1, 1, 1, 1), 14 | dilations=(1, 1, 1, 2, 2, 4, 4), 15 | out_indices=(1, 2, 4, 6)), 16 | decode_head=dict(type='FCNHead', 17 | in_channels=320, 18 | in_index=3, 19 | channels=512, 20 | num_convs=2, 21 | concat_input=True, 22 | dropout_ratio=0.1, 23 | num_classes=150, 24 | norm_cfg=norm_cfg, 25 | align_corners=False, 26 | loss_decode=dict(type='CrossEntropyLoss', 27 | use_sigmoid=False, 28 | loss_weight=1.0)), 29 | auxiliary_head=dict(type='FCNHead', 30 | in_channels=96, 31 | in_index=2, 32 | channels=256, 33 | num_convs=1, 34 | concat_input=False, 35 | dropout_ratio=0.1, 36 | num_classes=150, 37 | norm_cfg=norm_cfg, 38 | align_corners=False, 39 | loss_decode=dict(type='CrossEntropyLoss', 40 | use_sigmoid=False, 41 | loss_weight=0.4)), 42 | # model training and testing settings 43 | train_cfg=dict(), 44 | test_cfg=dict(mode='whole')) 45 | -------------------------------------------------------------------------------- /project/mobilenet_v2/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/mobilenet_v2/readme.md -------------------------------------------------------------------------------- /project/nonlocal_net/nonlocal_r50-d8_512x512_ade20k_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='NLHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | dropout_ratio=0.1, 25 | reduction=2, 26 | use_scale=True, 27 | mode='embedded_gaussian', 28 | num_classes=150, 29 | norm_cfg=norm_cfg, 30 | align_corners=False, 31 | loss_decode=dict(type='CrossEntropyLoss', 32 | use_sigmoid=False, 33 | loss_weight=1.0)), 34 | auxiliary_head=dict(type='FCNHead', 35 | in_channels=1024, 36 | in_index=2, 37 | channels=256, 38 | num_convs=1, 39 | concat_input=False, 40 | dropout_ratio=0.1, 41 | num_classes=150, 42 | norm_cfg=norm_cfg, 43 | align_corners=False, 44 | loss_decode=dict(type='CrossEntropyLoss', 45 | use_sigmoid=False, 46 | loss_weight=0.4)), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='whole')) 50 | -------------------------------------------------------------------------------- /project/nonlocal_net/readme.md: -------------------------------------------------------------------------------- 1 | # NonLocal Net -------------------------------------------------------------------------------- /project/point_rend/pointrend_r50_512x512_ade20k_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_160k.py' 4 | ] 5 | 6 | norm_cfg = dict(type='BN') 7 | model = dict( 8 | type='CascadeEncoderDecoder', 9 | num_stages=2, 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 1, 1), 16 | strides=(1, 2, 2, 2), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | neck=dict(type='FPN', 21 | in_channels=[256, 512, 1024, 2048], 22 | out_channels=256, 23 | num_outs=4), 24 | decode_head=[ 25 | dict(type='FPNHead', 26 | in_channels=[256, 256, 256, 256], 27 | in_index=[0, 1, 2, 3], 28 | feature_strides=[4, 8, 16, 32], 29 | channels=128, 30 | dropout_ratio=-1, 31 | num_classes=150, 32 | norm_cfg=norm_cfg, 33 | align_corners=False, 34 | loss_decode=dict(type='CrossEntropyLoss', 35 | use_sigmoid=False, 36 | loss_weight=1.0)), 37 | dict(type='PointHead', 38 | in_channels=[256], 39 | in_index=[0], 40 | channels=256, 41 | num_fcs=3, 42 | coarse_pred_each_layer=True, 43 | dropout_ratio=-1, 44 | num_classes=150, 45 | align_corners=False, 46 | loss_decode=dict(type='CrossEntropyLoss', 47 | use_sigmoid=False, 48 | loss_weight=1.0)) 49 | ], 50 | # model training and testing settings 51 | train_cfg=dict(num_points=2048, 52 | oversample_ratio=3, 53 | importance_sample_ratio=0.75), 54 | test_cfg=dict(mode='whole', 55 | subdivision_steps=2, 56 | subdivision_num_points=8196, 57 | scale_factor=2)) 58 | 59 | scheduler = dict(type='PolyLR', 60 | warmup='linear', 61 | warmup_iters=200, 62 | warmup_ratio=1e-6, 63 | max_steps=160000, 64 | power=1.0, 65 | min_lr=0) 66 | -------------------------------------------------------------------------------- /project/point_rend/readme.md: -------------------------------------------------------------------------------- 1 | # PointRend -------------------------------------------------------------------------------- /project/pspnet/pspnet_r50-d8_512x1024_cityscapes_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/cityscapes_512x1024.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='PSPHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | pool_scales=(1, 2, 3, 6), 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict(type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | loss_weight=1.0)), 32 | auxiliary_head=dict(type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=19, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict(type='CrossEntropyLoss', 43 | use_sigmoid=False, 44 | loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /project/pspnet/pspnet_r50-d8_512x512_ade20k_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | # model settings 6 | norm_cfg = dict(type='BN') 7 | model = dict( 8 | type='EncoderDecoder', 9 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 10 | backbone=dict(type='ResNetV1c', 11 | depth=50, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | dilations=(1, 1, 2, 4), 15 | strides=(1, 2, 1, 1), 16 | norm_cfg=norm_cfg, 17 | norm_eval=False, 18 | contract_dilation=True), 19 | decode_head=dict(type='PSPHead', 20 | in_channels=2048, 21 | in_index=3, 22 | channels=512, 23 | pool_scales=(1, 2, 3, 6), 24 | dropout_ratio=0.1, 25 | num_classes=150, 26 | norm_cfg=norm_cfg, 27 | align_corners=False, 28 | loss_decode=dict(type='CrossEntropyLoss', 29 | use_sigmoid=False, 30 | loss_weight=1.0)), 31 | auxiliary_head=dict(type='FCNHead', 32 | in_channels=1024, 33 | in_index=2, 34 | channels=256, 35 | num_convs=1, 36 | concat_input=False, 37 | dropout_ratio=0.1, 38 | num_classes=150, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict(type='CrossEntropyLoss', 42 | use_sigmoid=False, 43 | loss_weight=0.4)), 44 | # model training and testing settings 45 | train_cfg=dict(), 46 | test_cfg=dict(mode='whole')) 47 | -------------------------------------------------------------------------------- /project/pspnet/pspnet_r50-d8_512x512_loveda_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/loveda.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 2, 4), 16 | strides=(1, 2, 1, 1), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='PSPHead', 21 | in_channels=2048, 22 | in_index=3, 23 | channels=512, 24 | pool_scales=(1, 2, 3, 6), 25 | dropout_ratio=0.1, 26 | num_classes=7, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict(type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | loss_weight=1.0)), 32 | auxiliary_head=dict(type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=7, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict(type='CrossEntropyLoss', 43 | use_sigmoid=False, 44 | loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | -------------------------------------------------------------------------------- /project/pspnet/readme.md: -------------------------------------------------------------------------------- 1 | # PSPNet -------------------------------------------------------------------------------- /project/resnest/pspnet_s101-d8_512x512_ade20k_80k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', 3 | '../_base_/schedules/schedule_80k.py' 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnest101.pkl', 11 | backbone=dict(type='ResNeSt', 12 | depth=101, 13 | stem_channels=128, 14 | radix=2, 15 | reduction_factor=4, 16 | avg_down_stride=True, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | dilations=(1, 1, 2, 4), 20 | strides=(1, 2, 1, 1), 21 | norm_cfg=norm_cfg, 22 | norm_eval=False, 23 | contract_dilation=True), 24 | decode_head=dict(type='PSPHead', 25 | in_channels=2048, 26 | in_index=3, 27 | channels=512, 28 | pool_scales=(1, 2, 3, 6), 29 | dropout_ratio=0.1, 30 | num_classes=150, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict(type='CrossEntropyLoss', 34 | use_sigmoid=False, 35 | loss_weight=1.0)), 36 | auxiliary_head=dict(type='FCNHead', 37 | in_channels=1024, 38 | in_index=2, 39 | channels=256, 40 | num_convs=1, 41 | concat_input=False, 42 | dropout_ratio=0.1, 43 | num_classes=150, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict(type='CrossEntropyLoss', 47 | use_sigmoid=False, 48 | loss_weight=0.4)), 49 | # model training and testing settings 50 | train_cfg=dict(), 51 | test_cfg=dict(mode='whole')) 52 | -------------------------------------------------------------------------------- /project/resnest/readme.md: -------------------------------------------------------------------------------- 1 | # ResNeSt -------------------------------------------------------------------------------- /project/segformer/b0/segformer_b0_512x512_ade_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = ['../../_base_/datasets/ade20k.py', '../../_base_/default_runtime.py'] 2 | 3 | # model settings 4 | norm_cfg = dict(type='BN') 5 | model = dict( 6 | type='EncoderDecoder', 7 | pretrained='jittorhub://mit_b0.pkl', 8 | backbone=dict(type='mit_b0'), 9 | decode_head=dict(type='SegFormerHead', 10 | in_channels=[32, 64, 160, 256], 11 | in_index=[0, 1, 2, 3], 12 | feature_strides=[4, 8, 16, 32], 13 | channels=128, 14 | dropout_ratio=0.1, 15 | num_classes=150, 16 | norm_cfg=norm_cfg, 17 | align_corners=False, 18 | decoder_params=dict(embed_dim=256), 19 | loss_decode=dict(type='CrossEntropyLoss', 20 | use_sigmoid=False, 21 | loss_weight=1.0)), 22 | # model training and testing settings 23 | train_cfg=dict(), 24 | test_cfg=dict(mode='whole')) 25 | 26 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 27 | std=[58.395, 57.12, 57.375], 28 | to_rgb=True) 29 | test_pipeline = [ 30 | dict(type='LoadImageFromFile'), 31 | dict( 32 | type='MultiScaleFlipAug', 33 | img_scale=(2048, 512), 34 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 35 | flip=False, 36 | transforms=[ 37 | dict(type='Resize', keep_ratio=True), 38 | dict(type='ResizeToMultiple', size_divisor=32), 39 | dict(type='RandomFlip'), 40 | dict(type='Normalize', **img_norm_cfg), 41 | dict(type='ImageToTensor', keys=['img']), 42 | dict(type='Collect', keys=['img']), 43 | ]) 44 | ] 45 | 46 | dataset = dict( 47 | val=dict(pipeline=test_pipeline)) 48 | 49 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 50 | custom_keys={ 51 | 'pos_block': dict(decay_mult=0.), 52 | 'norm': dict(decay_mult=0.), 53 | 'head': dict(lr_mult=10.) 54 | }) 55 | 56 | optimizer = dict( 57 | type='CustomAdamW', 58 | lr=0.00006, 59 | betas=(0.9, 0.999), 60 | weight_decay=0.01, 61 | ) 62 | 63 | max_iter = 160000 64 | eval_interval = 8000 65 | checkpoint_interval = 8000 66 | 67 | scheduler = dict(type='PolyLR', 68 | warmup='linear', 69 | warmup_iters=1500, 70 | warmup_ratio=1e-6, 71 | max_steps=max_iter, 72 | power=1.0, 73 | min_lr=0) 74 | -------------------------------------------------------------------------------- /project/segformer/readme.md: -------------------------------------------------------------------------------- 1 | # SegFormer -------------------------------------------------------------------------------- /project/segnext/base/segnext_base_1024x1024_cityscapes_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/cityscapes_1024x1024.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_b.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[64, 128, 320, 512], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.2, 16 | depths=[3, 3, 12, 3]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[128, 320, 512], 19 | in_index=[1, 2, 3], 20 | channels=512, 21 | dropout_ratio=0.1, 22 | num_classes=19, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=512), 29 | # model training and testing settings 30 | train_cfg=dict(), 31 | # test_cfg=dict(mode='whole')) 32 | test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768))) 33 | 34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 35 | std=[58.395, 57.12, 57.375], 36 | to_rgb=True) 37 | test_pipeline = [ 38 | dict(type='LoadImageFromFile'), 39 | dict( 40 | type='MultiScaleFlipAug', 41 | img_scale=(2048, 1024), 42 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 43 | flip=False, 44 | transforms=[ 45 | dict(type='Resize', keep_ratio=True), 46 | dict(type='ResizeToMultiple', size_divisor=32), 47 | dict(type='RandomFlip'), 48 | dict(type='Normalize', **img_norm_cfg), 49 | dict(type='ImageToTensor', keys=['img']), 50 | dict(type='Collect', keys=['img']), 51 | ]) 52 | ] 53 | dataset = dict( 54 | val=dict(pipeline=test_pipeline)) 55 | 56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 57 | custom_keys={ 58 | 'pos_block': dict(decay_mult=0.), 59 | 'norm': dict(decay_mult=0.), 60 | 'head': dict(lr_mult=10.) 61 | }) 62 | 63 | optimizer = dict( 64 | type='CustomAdamW', 65 | lr=0.00006, 66 | betas=(0.9, 0.999), 67 | weight_decay=0.01, 68 | ) 69 | 70 | max_iter = 160000 71 | eval_interval = 8000 72 | checkpoint_interval = 8000 73 | 74 | scheduler = dict(type='PolyLR', 75 | warmup='linear', 76 | warmup_iters=1500, 77 | warmup_ratio=1e-6, 78 | max_steps=max_iter, 79 | power=1.0, 80 | min_lr=0) 81 | -------------------------------------------------------------------------------- /project/segnext/base/segnext_base_512x512_ade_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/ade20k.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_b.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[64, 128, 320, 512], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.2, 16 | depths=[3, 3, 12, 3]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[128, 320, 512], 19 | in_index=[1, 2, 3], 20 | channels=512, 21 | dropout_ratio=0.1, 22 | num_classes=150, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=512), 29 | # model training and testing settings 30 | train_cfg=dict(), 31 | test_cfg=dict(mode='whole')) 32 | 33 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 34 | std=[58.395, 57.12, 57.375], 35 | to_rgb=True) 36 | test_pipeline = [ 37 | dict(type='LoadImageFromFile'), 38 | dict( 39 | type='MultiScaleFlipAug', 40 | img_scale=(2048, 512), 41 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 42 | flip=False, 43 | transforms=[ 44 | dict(type='Resize', keep_ratio=True), 45 | dict(type='ResizeToMultiple', size_divisor=32), 46 | dict(type='RandomFlip'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='ImageToTensor', keys=['img']), 49 | dict(type='Collect', keys=['img']), 50 | ]) 51 | ] 52 | dataset = dict( 53 | val=dict(pipeline=test_pipeline)) 54 | 55 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 56 | custom_keys={ 57 | 'pos_block': dict(decay_mult=0.), 58 | 'norm': dict(decay_mult=0.), 59 | 'head': dict(lr_mult=10.) 60 | }) 61 | 62 | optimizer = dict( 63 | type='CustomAdamW', 64 | lr=0.00006, 65 | betas=(0.9, 0.999), 66 | weight_decay=0.01, 67 | ) 68 | 69 | max_iter = 160000 70 | eval_interval = 8000 71 | checkpoint_interval = 8000 72 | 73 | scheduler = dict(type='PolyLR', 74 | warmup='linear', 75 | warmup_iters=1500, 76 | warmup_ratio=1e-6, 77 | max_steps=max_iter, 78 | power=1.0, 79 | min_lr=0) 80 | -------------------------------------------------------------------------------- /project/segnext/base/segnext_base_896x896_isaid_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/isaid_869x869.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_b.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[64, 128, 320, 512], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.2, 16 | depths=[3, 3, 12, 3]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[128, 320, 512], 19 | in_index=[1, 2, 3], 20 | channels=512, 21 | dropout_ratio=0.1, 22 | num_classes=16, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=512), 29 | # model training and testing settings 30 | train_cfg=dict(), 31 | test_cfg=dict(mode='whole')) 32 | 33 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 34 | std=[58.395, 57.12, 57.375], 35 | to_rgb=True) 36 | test_pipeline = [ 37 | dict(type='LoadImageFromFile'), 38 | dict( 39 | type='MultiScaleFlipAug', 40 | img_scale=(896, 896), 41 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 42 | flip=False, 43 | transforms=[ 44 | dict(type='Resize', keep_ratio=True), 45 | dict(type='ResizeToMultiple', size_divisor=32), 46 | dict(type='RandomFlip'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='ImageToTensor', keys=['img']), 49 | dict(type='Collect', keys=['img']), 50 | ]) 51 | ] 52 | dataset = dict( 53 | val=dict(pipeline=test_pipeline)) 54 | 55 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 56 | custom_keys={ 57 | 'pos_block': dict(decay_mult=0.), 58 | 'norm': dict(decay_mult=0.), 59 | 'head': dict(lr_mult=10.) 60 | }) 61 | 62 | optimizer = dict( 63 | type='CustomAdamW', 64 | lr=0.00006, 65 | betas=(0.9, 0.999), 66 | weight_decay=0.01, 67 | ) 68 | 69 | max_iter = 160000 70 | eval_interval = 8000 71 | checkpoint_interval = 8000 72 | 73 | scheduler = dict(type='PolyLR', 74 | warmup='linear', 75 | warmup_iters=1500, 76 | warmup_ratio=1e-6, 77 | max_steps=max_iter, 78 | power=1.0, 79 | min_lr=0) 80 | -------------------------------------------------------------------------------- /project/segnext/large/segnext_large_1024x1024_cityscapes_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/cityscapes_1024x1024.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_l.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[64, 128, 320, 512], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.3, 16 | depths=[3, 5, 27, 3]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[128, 320, 512], 19 | in_index=[1, 2, 3], 20 | channels=1024, 21 | dropout_ratio=0.1, 22 | num_classes=19, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=1024), 29 | # model training and testing settings 30 | train_cfg=dict(), 31 | # test_cfg=dict(mode='whole')) 32 | test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768))) 33 | 34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 35 | std=[58.395, 57.12, 57.375], 36 | to_rgb=True) 37 | test_pipeline = [ 38 | dict(type='LoadImageFromFile'), 39 | dict( 40 | type='MultiScaleFlipAug', 41 | img_scale=(2048, 1024), 42 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 43 | flip=False, 44 | transforms=[ 45 | dict(type='Resize', keep_ratio=True), 46 | dict(type='ResizeToMultiple', size_divisor=32), 47 | dict(type='RandomFlip'), 48 | dict(type='Normalize', **img_norm_cfg), 49 | dict(type='ImageToTensor', keys=['img']), 50 | dict(type='Collect', keys=['img']), 51 | ]) 52 | ] 53 | dataset = dict( 54 | val=dict(pipeline=test_pipeline)) 55 | 56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 57 | custom_keys={ 58 | 'pos_block': dict(decay_mult=0.), 59 | 'norm': dict(decay_mult=0.), 60 | 'head': dict(lr_mult=10.) 61 | }) 62 | 63 | optimizer = dict( 64 | type='CustomAdamW', 65 | lr=0.00006, 66 | betas=(0.9, 0.999), 67 | weight_decay=0.01, 68 | ) 69 | 70 | max_iter = 160000 71 | eval_interval = 8000 72 | checkpoint_interval = 8000 73 | 74 | scheduler = dict(type='PolyLR', 75 | warmup='linear', 76 | warmup_iters=1500, 77 | warmup_ratio=1e-6, 78 | max_steps=max_iter, 79 | power=1.0, 80 | min_lr=0) 81 | -------------------------------------------------------------------------------- /project/segnext/large/segnext_large_512x512_ade_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/ade20k.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_l.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[64, 128, 320, 512], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.3, 16 | depths=[3, 5, 27, 3]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[128, 320, 512], 19 | in_index=[1, 2, 3], 20 | channels=1024, 21 | dropout_ratio=0.1, 22 | num_classes=150, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=1024), 29 | # model training and testing settings 30 | train_cfg=dict(), 31 | test_cfg=dict(mode='whole')) 32 | 33 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 34 | std=[58.395, 57.12, 57.375], 35 | to_rgb=True) 36 | test_pipeline = [ 37 | dict(type='LoadImageFromFile'), 38 | dict( 39 | type='MultiScaleFlipAug', 40 | img_scale=(2048, 512), 41 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 42 | flip=False, 43 | transforms=[ 44 | dict(type='Resize', keep_ratio=True), 45 | dict(type='ResizeToMultiple', size_divisor=32), 46 | dict(type='RandomFlip'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='ImageToTensor', keys=['img']), 49 | dict(type='Collect', keys=['img']), 50 | ]) 51 | ] 52 | dataset = dict( 53 | val=dict(pipeline=test_pipeline)) 54 | 55 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 56 | custom_keys={ 57 | 'pos_block': dict(decay_mult=0.), 58 | 'norm': dict(decay_mult=0.), 59 | 'head': dict(lr_mult=10.) 60 | }) 61 | 62 | optimizer = dict( 63 | type='CustomAdamW', 64 | lr=0.00006, 65 | betas=(0.9, 0.999), 66 | weight_decay=0.01, 67 | ) 68 | 69 | max_iter = 160000 70 | eval_interval = 8000 71 | checkpoint_interval = 8000 72 | 73 | scheduler = dict(type='PolyLR', 74 | warmup='linear', 75 | warmup_iters=1500, 76 | warmup_ratio=1e-6, 77 | max_steps=max_iter, 78 | power=1.0, 79 | min_lr=0) 80 | -------------------------------------------------------------------------------- /project/segnext/large/segnext_large_896x896_isaid_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/isaid_869x869.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_l.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[64, 128, 320, 512], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.3, 16 | depths=[3, 5, 27, 3]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[128, 320, 512], 19 | in_index=[1, 2, 3], 20 | channels=1024, 21 | dropout_ratio=0.1, 22 | num_classes=16, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=1024), 29 | # model training and testing settings 30 | train_cfg=dict(), 31 | test_cfg=dict(mode='whole')) 32 | 33 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 34 | std=[58.395, 57.12, 57.375], 35 | to_rgb=True) 36 | test_pipeline = [ 37 | dict(type='LoadImageFromFile'), 38 | dict( 39 | type='MultiScaleFlipAug', 40 | img_scale=(896, 896), 41 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 42 | flip=False, 43 | transforms=[ 44 | dict(type='Resize', keep_ratio=True), 45 | dict(type='ResizeToMultiple', size_divisor=32), 46 | dict(type='RandomFlip'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='ImageToTensor', keys=['img']), 49 | dict(type='Collect', keys=['img']), 50 | ]) 51 | ] 52 | dataset = dict( 53 | val=dict(pipeline=test_pipeline)) 54 | 55 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 56 | custom_keys={ 57 | 'pos_block': dict(decay_mult=0.), 58 | 'norm': dict(decay_mult=0.), 59 | 'head': dict(lr_mult=10.) 60 | }) 61 | 62 | optimizer = dict( 63 | type='CustomAdamW', 64 | lr=0.00006, 65 | betas=(0.9, 0.999), 66 | weight_decay=0.01, 67 | ) 68 | 69 | max_iter = 160000 70 | eval_interval = 8000 71 | checkpoint_interval = 8000 72 | 73 | scheduler = dict(type='PolyLR', 74 | warmup='linear', 75 | warmup_iters=1500, 76 | warmup_ratio=1e-6, 77 | max_steps=max_iter, 78 | power=1.0, 79 | min_lr=0) 80 | -------------------------------------------------------------------------------- /project/segnext/resources/flops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/segnext/resources/flops.png -------------------------------------------------------------------------------- /project/segnext/small/segnext_small_1024x1024_cityscapes_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/cityscapes_1024x1024.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_s.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[64, 128, 320, 512], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.1, 16 | depths=[2, 2, 4, 2]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[128, 320, 512], 19 | in_index=[1, 2, 3], 20 | channels=256, 21 | dropout_ratio=0.1, 22 | num_classes=19, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=256, 29 | ham_kwargs=dict(MD_R=16)), 30 | # model training and testing settings 31 | train_cfg=dict(), 32 | # test_cfg=dict(mode='whole')) 33 | test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768))) 34 | 35 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 36 | std=[58.395, 57.12, 57.375], 37 | to_rgb=True) 38 | test_pipeline = [ 39 | dict(type='LoadImageFromFile'), 40 | dict( 41 | type='MultiScaleFlipAug', 42 | img_scale=(2048, 1024), 43 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 44 | flip=False, 45 | transforms=[ 46 | dict(type='Resize', keep_ratio=True), 47 | dict(type='ResizeToMultiple', size_divisor=32), 48 | dict(type='RandomFlip'), 49 | dict(type='Normalize', **img_norm_cfg), 50 | dict(type='ImageToTensor', keys=['img']), 51 | dict(type='Collect', keys=['img']), 52 | ]) 53 | ] 54 | dataset = dict( 55 | val=dict(pipeline=test_pipeline)) 56 | 57 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 58 | custom_keys={ 59 | 'pos_block': dict(decay_mult=0.), 60 | 'norm': dict(decay_mult=0.), 61 | 'head': dict(lr_mult=10.) 62 | }) 63 | 64 | optimizer = dict( 65 | type='CustomAdamW', 66 | lr=0.00006, 67 | betas=(0.9, 0.999), 68 | weight_decay=0.01, 69 | ) 70 | 71 | max_iter = 160000 72 | eval_interval = 8000 73 | checkpoint_interval = 8000 74 | 75 | scheduler = dict(type='PolyLR', 76 | warmup='linear', 77 | warmup_iters=1500, 78 | warmup_ratio=1e-6, 79 | max_steps=max_iter, 80 | power=1.0, 81 | min_lr=0) 82 | -------------------------------------------------------------------------------- /project/segnext/small/segnext_small_512x512_ade_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/ade20k.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_s.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[64, 128, 320, 512], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.1, 16 | depths=[2, 2, 4, 2]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[128, 320, 512], 19 | in_index=[1, 2, 3], 20 | channels=256, 21 | dropout_ratio=0.1, 22 | num_classes=150, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=256, 29 | ham_kwargs=dict(MD_R=16)), 30 | # model training and testing settings 31 | train_cfg=dict(), 32 | test_cfg=dict(mode='whole')) 33 | 34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 35 | std=[58.395, 57.12, 57.375], 36 | to_rgb=True) 37 | test_pipeline = [ 38 | dict(type='LoadImageFromFile'), 39 | dict( 40 | type='MultiScaleFlipAug', 41 | img_scale=(2048, 512), 42 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 43 | flip=False, 44 | transforms=[ 45 | dict(type='Resize', keep_ratio=True), 46 | dict(type='ResizeToMultiple', size_divisor=32), 47 | dict(type='RandomFlip'), 48 | dict(type='Normalize', **img_norm_cfg), 49 | dict(type='ImageToTensor', keys=['img']), 50 | dict(type='Collect', keys=['img']), 51 | ]) 52 | ] 53 | dataset = dict( 54 | val=dict(pipeline=test_pipeline)) 55 | 56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 57 | custom_keys={ 58 | 'pos_block': dict(decay_mult=0.), 59 | 'norm': dict(decay_mult=0.), 60 | 'head': dict(lr_mult=10.) 61 | }) 62 | 63 | optimizer = dict( 64 | type='CustomAdamW', 65 | lr=0.00006, 66 | betas=(0.9, 0.999), 67 | weight_decay=0.01, 68 | ) 69 | 70 | max_iter = 160000 71 | eval_interval = 8000 72 | checkpoint_interval = 8000 73 | 74 | scheduler = dict(type='PolyLR', 75 | warmup='linear', 76 | warmup_iters=1500, 77 | warmup_ratio=1e-6, 78 | max_steps=max_iter, 79 | power=1.0, 80 | min_lr=0) 81 | -------------------------------------------------------------------------------- /project/segnext/small/segnext_small_896x896_isaid_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/isaid_869x869.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_s.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[64, 128, 320, 512], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.1, 16 | depths=[2, 2, 4, 2]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[128, 320, 512], 19 | in_index=[1, 2, 3], 20 | channels=256, 21 | dropout_ratio=0.1, 22 | num_classes=16, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=256, 29 | ham_kwargs=dict(MD_R=16)), 30 | # model training and testing settings 31 | train_cfg=dict(), 32 | test_cfg=dict(mode='whole')) 33 | 34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 35 | std=[58.395, 57.12, 57.375], 36 | to_rgb=True) 37 | test_pipeline = [ 38 | dict(type='LoadImageFromFile'), 39 | dict( 40 | type='MultiScaleFlipAug', 41 | img_scale=(896, 896), 42 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 43 | flip=False, 44 | transforms=[ 45 | dict(type='Resize', keep_ratio=True), 46 | dict(type='ResizeToMultiple', size_divisor=32), 47 | dict(type='RandomFlip'), 48 | dict(type='Normalize', **img_norm_cfg), 49 | dict(type='ImageToTensor', keys=['img']), 50 | dict(type='Collect', keys=['img']), 51 | ]) 52 | ] 53 | dataset = dict( 54 | val=dict(pipeline=test_pipeline)) 55 | 56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 57 | custom_keys={ 58 | 'pos_block': dict(decay_mult=0.), 59 | 'norm': dict(decay_mult=0.), 60 | 'head': dict(lr_mult=10.) 61 | }) 62 | 63 | optimizer = dict( 64 | type='CustomAdamW', 65 | lr=0.00006, 66 | betas=(0.9, 0.999), 67 | weight_decay=0.01, 68 | ) 69 | 70 | max_iter = 160000 71 | eval_interval = 8000 72 | checkpoint_interval = 8000 73 | 74 | scheduler = dict(type='PolyLR', 75 | warmup='linear', 76 | warmup_iters=1500, 77 | warmup_ratio=1e-6, 78 | max_steps=max_iter, 79 | power=1.0, 80 | min_lr=0) 81 | -------------------------------------------------------------------------------- /project/segnext/tiny/segnext_tiny_1024x1024_cityscapes_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/cityscapes_1024x1024.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_t.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[32, 64, 160, 256], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.1, 16 | depths=[3, 3, 5, 2]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[64, 160, 256], 19 | in_index=[1, 2, 3], 20 | channels=256, 21 | dropout_ratio=0.1, 22 | num_classes=19, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=256, 29 | ham_kwargs=dict(MD_R=16)), 30 | # model training and testing settings 31 | train_cfg=dict(), 32 | # test_cfg=dict(mode='whole')) 33 | test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768))) 34 | 35 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 36 | std=[58.395, 57.12, 57.375], 37 | to_rgb=True) 38 | test_pipeline = [ 39 | dict(type='LoadImageFromFile'), 40 | dict( 41 | type='MultiScaleFlipAug', 42 | img_scale=(2048, 1024), 43 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 44 | flip=False, 45 | transforms=[ 46 | dict(type='Resize', keep_ratio=True), 47 | dict(type='ResizeToMultiple', size_divisor=32), 48 | dict(type='RandomFlip'), 49 | dict(type='Normalize', **img_norm_cfg), 50 | dict(type='ImageToTensor', keys=['img']), 51 | dict(type='Collect', keys=['img']), 52 | ]) 53 | ] 54 | dataset = dict( 55 | val=dict(pipeline=test_pipeline)) 56 | 57 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 58 | custom_keys={ 59 | 'pos_block': dict(decay_mult=0.), 60 | 'norm': dict(decay_mult=0.), 61 | 'head': dict(lr_mult=10.) 62 | }) 63 | 64 | optimizer = dict( 65 | type='CustomAdamW', 66 | lr=0.00006, 67 | betas=(0.9, 0.999), 68 | weight_decay=0.01, 69 | ) 70 | 71 | max_iter = 160000 72 | eval_interval = 8000 73 | checkpoint_interval = 8000 74 | 75 | scheduler = dict(type='PolyLR', 76 | warmup='linear', 77 | warmup_iters=1500, 78 | warmup_ratio=1e-6, 79 | max_steps=max_iter, 80 | power=1.0, 81 | min_lr=0) 82 | -------------------------------------------------------------------------------- /project/segnext/tiny/segnext_tiny_512x512_ade_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/ade20k.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_t.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[32, 64, 160, 256], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.1, 16 | depths=[3, 3, 5, 2]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[64, 160, 256], 19 | in_index=[1, 2, 3], 20 | channels=256, 21 | dropout_ratio=0.1, 22 | num_classes=150, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=256, 29 | ham_kwargs=dict(MD_R=16)), 30 | # model training and testing settings 31 | train_cfg=dict(), 32 | test_cfg=dict(mode='whole')) 33 | 34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 35 | std=[58.395, 57.12, 57.375], 36 | to_rgb=True) 37 | test_pipeline = [ 38 | dict(type='LoadImageFromFile'), 39 | dict( 40 | type='MultiScaleFlipAug', 41 | img_scale=(2048, 512), 42 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 43 | flip=False, 44 | transforms=[ 45 | dict(type='Resize', keep_ratio=True), 46 | dict(type='ResizeToMultiple', size_divisor=32), 47 | dict(type='RandomFlip'), 48 | dict(type='Normalize', **img_norm_cfg), 49 | dict(type='ImageToTensor', keys=['img']), 50 | dict(type='Collect', keys=['img']), 51 | ]) 52 | ] 53 | dataset = dict( 54 | val=dict(pipeline=test_pipeline)) 55 | 56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 57 | custom_keys={ 58 | 'pos_block': dict(decay_mult=0.), 59 | 'norm': dict(decay_mult=0.), 60 | 'head': dict(lr_mult=10.) 61 | }) 62 | 63 | optimizer = dict( 64 | type='CustomAdamW', 65 | lr=0.00006, 66 | betas=(0.9, 0.999), 67 | weight_decay=0.01, 68 | ) 69 | 70 | max_iter = 160000 71 | eval_interval = 8000 72 | checkpoint_interval = 8000 73 | 74 | scheduler = dict(type='PolyLR', 75 | warmup='linear', 76 | warmup_iters=1500, 77 | warmup_ratio=1e-6, 78 | max_steps=max_iter, 79 | power=1.0, 80 | min_lr=0) 81 | -------------------------------------------------------------------------------- /project/segnext/tiny/segnext_tiny_896x896_isaid_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/isaid_869x869.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='GN', num_groups=32) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://mscan_t.pkl', 11 | backbone=dict(type='MSCAN', 12 | embed_dims=[32, 64, 160, 256], 13 | mlp_ratios=[8, 8, 4, 4], 14 | drop_rate=0.0, 15 | drop_path_rate=0.1, 16 | depths=[3, 3, 5, 2]), 17 | decode_head=dict(type='LightHamHead', 18 | in_channels=[64, 160, 256], 19 | in_index=[1, 2, 3], 20 | channels=256, 21 | dropout_ratio=0.1, 22 | num_classes=16, 23 | norm_cfg=norm_cfg, 24 | align_corners=False, 25 | loss_decode=dict(type='CrossEntropyLoss', 26 | use_sigmoid=False, 27 | loss_weight=1.0), 28 | ham_channels=256, 29 | ham_kwargs=dict(MD_R=16)), 30 | # model training and testing settings 31 | train_cfg=dict(), 32 | test_cfg=dict(mode='whole')) 33 | 34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], 35 | std=[58.395, 57.12, 57.375], 36 | to_rgb=True) 37 | test_pipeline = [ 38 | dict(type='LoadImageFromFile'), 39 | dict( 40 | type='MultiScaleFlipAug', 41 | img_scale=(896, 896), 42 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 43 | flip=False, 44 | transforms=[ 45 | dict(type='Resize', keep_ratio=True), 46 | dict(type='ResizeToMultiple', size_divisor=32), 47 | dict(type='RandomFlip'), 48 | dict(type='Normalize', **img_norm_cfg), 49 | dict(type='ImageToTensor', keys=['img']), 50 | dict(type='Collect', keys=['img']), 51 | ]) 52 | ] 53 | dataset = dict( 54 | val=dict(pipeline=test_pipeline)) 55 | 56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 57 | custom_keys={ 58 | 'pos_block': dict(decay_mult=0.), 59 | 'norm': dict(decay_mult=0.), 60 | 'head': dict(lr_mult=10.) 61 | }) 62 | 63 | optimizer = dict( 64 | type='CustomAdamW', 65 | lr=0.00006, 66 | betas=(0.9, 0.999), 67 | weight_decay=0.01, 68 | ) 69 | 70 | max_iter = 160000 71 | eval_interval = 8000 72 | checkpoint_interval = 8000 73 | 74 | scheduler = dict(type='PolyLR', 75 | warmup='linear', 76 | warmup_iters=1500, 77 | warmup_ratio=1e-6, 78 | max_steps=max_iter, 79 | power=1.0, 80 | min_lr=0) 81 | -------------------------------------------------------------------------------- /project/swin/readme.md: -------------------------------------------------------------------------------- 1 | # swin -------------------------------------------------------------------------------- /project/swin/tiny/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/ade20k.py', 3 | '../../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | backbone_norm_cfg = dict(type='LN') 9 | model = dict( 10 | type='EncoderDecoder', 11 | pretrained='jittorhub://swin_tiny_patch4_window7_224.pkl', 12 | backbone=dict(type='SwinTransformer', 13 | pretrain_img_size=224, 14 | embed_dims=96, 15 | patch_size=4, 16 | window_size=7, 17 | mlp_ratio=4, 18 | depths=[2, 2, 6, 2], 19 | num_heads=[3, 6, 12, 24], 20 | strides=(4, 2, 2, 2), 21 | out_indices=(0, 1, 2, 3), 22 | qkv_bias=True, 23 | qk_scale=None, 24 | patch_norm=True, 25 | drop_rate=0., 26 | attn_drop_rate=0., 27 | drop_path_rate=0.3, 28 | use_abs_pos_embed=False, 29 | act_cfg=dict(type='GELU'), 30 | norm_cfg=backbone_norm_cfg), 31 | decode_head=dict(type='UPerHead', 32 | in_channels=[96, 192, 384, 768], 33 | in_index=[0, 1, 2, 3], 34 | pool_scales=(1, 2, 3, 6), 35 | channels=512, 36 | dropout_ratio=0.1, 37 | num_classes=150, 38 | norm_cfg=norm_cfg, 39 | align_corners=False, 40 | loss_decode=dict(type='CrossEntropyLoss', 41 | use_sigmoid=False, 42 | loss_weight=1.0)), 43 | auxiliary_head=dict(type='FCNHead', 44 | in_channels=384, 45 | in_index=2, 46 | channels=256, 47 | num_convs=1, 48 | concat_input=False, 49 | dropout_ratio=0.1, 50 | num_classes=150, 51 | norm_cfg=norm_cfg, 52 | align_corners=False, 53 | loss_decode=dict(type='CrossEntropyLoss', 54 | use_sigmoid=False, 55 | loss_weight=0.4)), 56 | # model training and testing settings 57 | train_cfg=dict(), 58 | test_cfg=dict(mode='whole')) 59 | 60 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 61 | custom_keys={ 62 | 'absolute_pos_embed': 63 | dict(decay_mult=0.), 64 | 'relative_position_bias_table': 65 | dict(decay_mult=0.), 66 | 'norm': 67 | dict(decay_mult=0.) 68 | }) 69 | 70 | optimizer = dict( 71 | type='CustomAdamW', 72 | lr=0.00006, 73 | betas=(0.9, 0.999), 74 | weight_decay=0.01, 75 | ) 76 | 77 | max_iter = 160000 78 | eval_interval = 8000 79 | checkpoint_interval = 8000 80 | 81 | scheduler = dict(type='PolyLR', 82 | warmup='linear', 83 | warmup_iters=1500, 84 | warmup_ratio=1e-6, 85 | max_steps=max_iter, 86 | power=1.0, 87 | min_lr=0) 88 | -------------------------------------------------------------------------------- /project/upernet/readme.md: -------------------------------------------------------------------------------- 1 | # upernet -------------------------------------------------------------------------------- /project/upernet/upernet_r50_512x512_ade20k_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl', 11 | backbone=dict(type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 1, 1), 16 | strides=(1, 2, 2, 2), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | contract_dilation=True), 20 | decode_head=dict(type='UPerHead', 21 | in_channels=[256, 512, 1024, 2048], 22 | in_index=[0, 1, 2, 3], 23 | pool_scales=(1, 2, 3, 6), 24 | channels=512, 25 | dropout_ratio=0.1, 26 | num_classes=150, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | loss_decode=dict(type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | loss_weight=1.0)), 32 | auxiliary_head=dict(type='FCNHead', 33 | in_channels=1024, 34 | in_index=2, 35 | channels=256, 36 | num_convs=1, 37 | concat_input=False, 38 | dropout_ratio=0.1, 39 | num_classes=150, 40 | norm_cfg=norm_cfg, 41 | align_corners=False, 42 | loss_decode=dict(type='CrossEntropyLoss', 43 | use_sigmoid=False, 44 | loss_weight=0.4)), 45 | # model training and testing settings 46 | train_cfg=dict(), 47 | test_cfg=dict(mode='whole')) 48 | 49 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 50 | 51 | max_iter = 160000 52 | eval_interval = 8000 53 | checkpoint_interval = 8000 54 | 55 | scheduler = dict(type='PolyLR', max_steps=max_iter, power=0.9, min_lr=1e-4) 56 | -------------------------------------------------------------------------------- /project/vit/readme.md: -------------------------------------------------------------------------------- 1 | # Vision Transformer -------------------------------------------------------------------------------- /project/vit/upernet_vit-b16_ln_mln_512x512_ade20k_160k.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ade20k.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN') 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='jittorhub://vit_base_p16_224-80ecf9dd.pkl', 11 | backbone=dict( 12 | type='VisionTransformer', 13 | img_size=(512, 512), 14 | patch_size=16, 15 | in_channels=3, 16 | embed_dims=768, 17 | num_layers=12, 18 | num_heads=12, 19 | mlp_ratio=4, 20 | out_indices=(2, 5, 8, 11), 21 | qkv_bias=True, 22 | drop_rate=0.0, 23 | attn_drop_rate=0.0, 24 | drop_path_rate=0, 25 | with_cls_token=True, 26 | norm_cfg=dict(type='LN', eps=1e-6), 27 | act_cfg=dict(type='GELU'), 28 | norm_eval=False, 29 | final_norm=True, 30 | interpolate_mode='bicubic'), 31 | neck=dict( 32 | type='MultiLevelNeck', 33 | in_channels=[768, 768, 768, 768], 34 | out_channels=768, 35 | scales=[4, 2, 1, 0.5]), 36 | decode_head=dict( 37 | type='UPerHead', 38 | in_channels=[768, 768, 768, 768], 39 | in_index=[0, 1, 2, 3], 40 | pool_scales=(1, 2, 3, 6), 41 | channels=512, 42 | dropout_ratio=0.1, 43 | num_classes=150, 44 | norm_cfg=norm_cfg, 45 | align_corners=False, 46 | loss_decode=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 48 | auxiliary_head=dict( 49 | type='FCNHead', 50 | in_channels=768, 51 | in_index=3, 52 | channels=256, 53 | num_convs=1, 54 | concat_input=False, 55 | dropout_ratio=0.1, 56 | num_classes=150, 57 | norm_cfg=norm_cfg, 58 | align_corners=False, 59 | loss_decode=dict( 60 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 61 | # model training and testing settings 62 | train_cfg=dict(), 63 | test_cfg=dict(mode='whole')) # yapf: disable 64 | 65 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator", 66 | custom_keys={ 67 | 'pos_embed': dict(decay_mult=0.), 68 | 'cls_token': dict(decay_mult=0.), 69 | 'norm': dict(decay_mult=0.) 70 | }) 71 | 72 | optimizer = dict( 73 | type='CustomAdamW', 74 | lr=0.0001, 75 | betas=(0.9, 0.999), 76 | weight_decay=0.05, 77 | ) 78 | 79 | max_iter = 160000 80 | eval_interval = 8000 81 | checkpoint_interval = 8000 82 | 83 | scheduler = dict(type='PolyLR', 84 | warmup='linear', 85 | warmup_iters=1500, 86 | warmup_ratio=1e-6, 87 | max_steps=max_iter, 88 | power=1.0, 89 | min_lr=0) 90 | -------------------------------------------------------------------------------- /python/jseg/__init__.py: -------------------------------------------------------------------------------- 1 | from . import models 2 | from . import runner 3 | from . import config 4 | from . import datasets 5 | from . import ops 6 | from . import utils 7 | from . import optims 8 | from . import sampler 9 | 10 | # version must use ' instead of " 11 | __version__ = '0.1.0.0' 12 | -------------------------------------------------------------------------------- /python/jseg/bricks/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv import build_conv_layer 2 | from .activation import build_activation_layer 3 | from .norm import build_norm_layer 4 | from .drop import build_dropout 5 | from .padding import build_padding_layer 6 | from .conv_module import ConvModule 7 | from .depthwise_separable_conv_module import DepthwiseSeparableConvModule 8 | -------------------------------------------------------------------------------- /python/jseg/bricks/activation.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jittor import nn 3 | 4 | from jseg.utils.registry import ACTIVATION_LAYERS, build_from_cfg 5 | 6 | for module in [ 7 | nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.ReLU6, nn.ELU, nn.Sigmoid, nn.Tanh 8 | ]: 9 | if module.__name__ == 'relu': 10 | ACTIVATION_LAYERS.register_module(name='ReLU', module=module) 11 | elif module.__name__ == 'relu6': 12 | ACTIVATION_LAYERS.register_module(name='ReLU6', module=module) 13 | else: 14 | ACTIVATION_LAYERS.register_module(module=module) 15 | 16 | 17 | @ACTIVATION_LAYERS.register_module() 18 | class HSigmoid(nn.Module): 19 | 20 | def __init__(self, bias=3.0, divisor=6.0, min_value=0.0, max_value=1.0): 21 | super().__init__() 22 | self.bias = bias 23 | self.divisor = divisor 24 | assert self.divisor != 0 25 | self.min_value = min_value 26 | self.max_value = max_value 27 | 28 | def execute(self, x): 29 | x = (x + self.bias) / self.divisor 30 | 31 | return x.clamp_(self.min_value, self.max_value) 32 | 33 | 34 | @ACTIVATION_LAYERS.register_module() 35 | class HSwish(nn.Module): 36 | 37 | def __init__(self): 38 | super().__init__() 39 | self.act = nn.ReLU6() 40 | 41 | def execute(self, x): 42 | return x * self.act(x + 3) / 6 43 | 44 | 45 | @ACTIVATION_LAYERS.register_module(name='Clip') 46 | @ACTIVATION_LAYERS.register_module() 47 | class Clamp(nn.Module): 48 | 49 | def __init__(self, min=-1., max=1.): 50 | super().__init__() 51 | self.min = min 52 | self.max = max 53 | 54 | def execute(self, x): 55 | return jt.clamp(x, min_v=self.min, max_v=self.max) 56 | 57 | 58 | class GELU(nn.Module): 59 | 60 | def execute(self, input): 61 | return nn.gelu(input) 62 | 63 | 64 | ACTIVATION_LAYERS.register_module(module=GELU) 65 | 66 | 67 | def build_activation_layer(cfg): 68 | return build_from_cfg(cfg, ACTIVATION_LAYERS) 69 | -------------------------------------------------------------------------------- /python/jseg/bricks/conv.py: -------------------------------------------------------------------------------- 1 | import math 2 | from jittor import nn 3 | from jseg.utils.registry import CONV_LAYERS 4 | from typing import Tuple, Union 5 | 6 | CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) 7 | CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) 8 | CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) 9 | CONV_LAYERS.register_module('Conv', module=nn.Conv2d) 10 | 11 | 12 | class Conv2dAdaptivePadding(nn.Conv2d): 13 | 14 | def __init__(self, 15 | in_channels: int, 16 | out_channels: int, 17 | kernel_size: Union[int, Tuple[int, int]], 18 | stride: Union[int, Tuple[int, int]] = 1, 19 | padding: Union[int, Tuple[int, int]] = 0, 20 | dilation: Union[int, Tuple[int, int]] = 1, 21 | groups: int = 1, 22 | bias: bool = True): 23 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, 24 | dilation, groups, bias) 25 | 26 | def execute(self, x): 27 | img_h, img_w = x.size()[-2:] 28 | kernel_h, kernel_w = self.weight.size()[-2:] 29 | stride_h, stride_w = self.stride 30 | output_h = math.ceil(img_h / stride_h) 31 | output_w = math.ceil(img_w / stride_w) 32 | pad_h = (max((output_h - 1) * self.stride[0] + 33 | (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0)) 34 | pad_w = (max((output_w - 1) * self.stride[1] + 35 | (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0)) 36 | if pad_h > 0 or pad_w > 0: 37 | x = nn.pad(x, [ 38 | pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 39 | ]) 40 | return nn.conv2d(x, self.weight, self.bias, self.stride, self.padding, 41 | self.dilation, self.groups) 42 | 43 | 44 | CONV_LAYERS.register_module('Conv2dAdaptivePadding', 45 | module=Conv2dAdaptivePadding) 46 | 47 | 48 | def build_conv_layer(cfg, *args, **kwargs): 49 | """Build convolution layer. 50 | 51 | Args: 52 | cfg (None or dict): The conv layer config, which should contain: 53 | - type (str): Layer type. 54 | - layer args: Args needed to instantiate an conv layer. 55 | args (argument list): Arguments passed to the `__init__` 56 | method of the corresponding conv layer. 57 | kwargs (keyword arguments): Keyword arguments passed to the `__init__` 58 | method of the corresponding conv layer. 59 | 60 | Returns: 61 | nn.Module: Created conv layer. 62 | """ 63 | if cfg is None: 64 | cfg_ = dict(type='Conv2d') 65 | else: 66 | if not isinstance(cfg, dict): 67 | raise TypeError('cfg must be a dict') 68 | if 'type' not in cfg: 69 | raise KeyError('the cfg dict must contain the key "type"') 70 | cfg_ = cfg.copy() 71 | 72 | layer_type = cfg_.pop('type') 73 | conv_layer = CONV_LAYERS.get(layer_type) 74 | 75 | layer = conv_layer(*args, **kwargs, **cfg_) 76 | 77 | return layer 78 | -------------------------------------------------------------------------------- /python/jseg/bricks/depthwise_separable_conv_module.py: -------------------------------------------------------------------------------- 1 | from jittor import nn 2 | 3 | from .conv_module import ConvModule 4 | 5 | 6 | class DepthwiseSeparableConvModule(nn.Module): 7 | def __init__(self, 8 | in_channels, 9 | out_channels, 10 | kernel_size, 11 | stride=1, 12 | padding=0, 13 | dilation=1, 14 | norm_cfg=None, 15 | act_cfg=dict(type='ReLU'), 16 | dw_norm_cfg='default', 17 | dw_act_cfg='default', 18 | pw_norm_cfg='default', 19 | pw_act_cfg='default', 20 | **kwargs): 21 | super().__init__() 22 | assert 'groups' not in kwargs, 'groups should not be specified' 23 | 24 | # if norm/activation config of depthwise/pointwise ConvModule is not 25 | # specified, use default config. 26 | dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg 27 | dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg 28 | pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg 29 | pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg 30 | 31 | # depthwise convolution 32 | self.depthwise_conv = ConvModule( 33 | in_channels, 34 | in_channels, 35 | kernel_size, 36 | stride=stride, 37 | padding=padding, 38 | dilation=dilation, 39 | groups=in_channels, 40 | norm_cfg=dw_norm_cfg, 41 | act_cfg=dw_act_cfg, 42 | **kwargs) 43 | 44 | self.pointwise_conv = ConvModule( 45 | in_channels, 46 | out_channels, 47 | 1, 48 | norm_cfg=pw_norm_cfg, 49 | act_cfg=pw_act_cfg, 50 | **kwargs) 51 | 52 | def execute(self, x): 53 | x = self.depthwise_conv(x) 54 | x = self.pointwise_conv(x) 55 | return x 56 | -------------------------------------------------------------------------------- /python/jseg/bricks/drop.py: -------------------------------------------------------------------------------- 1 | from jittor.nn import Dropout, DropPath 2 | from jseg.utils.registry import DROPOUT_LAYERS, build_from_cfg 3 | 4 | DROPOUT_LAYERS.register_module(name='Dropout', module=Dropout) 5 | DROPOUT_LAYERS.register_module(name='DropPath', module=DropPath) 6 | 7 | 8 | def build_dropout(cfg, **default_args): 9 | """Builder for drop out layers.""" 10 | return build_from_cfg(cfg, DROPOUT_LAYERS, **default_args) 11 | -------------------------------------------------------------------------------- /python/jseg/bricks/padding.py: -------------------------------------------------------------------------------- 1 | from jittor import nn 2 | from jseg.utils.registry import PADDING_LAYERS 3 | 4 | PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) 5 | PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) 6 | PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) 7 | 8 | 9 | def build_padding_layer(cfg, *args, **kwargs): 10 | if not isinstance(cfg, dict): 11 | raise TypeError('cfg must be a dict') 12 | if 'type' not in cfg: 13 | raise KeyError('the cfg dict must contain the key "type"') 14 | 15 | cfg_ = cfg.copy() 16 | padding_type = cfg_.pop('type') 17 | padding_layer = PADDING_LAYERS.get(padding_type) 18 | 19 | layer = padding_layer(*args, **kwargs, **cfg_) 20 | 21 | return layer 22 | -------------------------------------------------------------------------------- /python/jseg/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import * 2 | -------------------------------------------------------------------------------- /python/jseg/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .isaid import iSAIDDataset 3 | from .ade import ADE20KDataset 4 | from .voc import PascalVOCDataset 5 | from .cityscapes import CityscapesDataset 6 | from .loveda import LoveDADataset 7 | from .isprs import ISPRSDataset 8 | from .potsdam import PotsdamDataset 9 | from .zero_voc12 import ZeroPascalVOCDataset20 10 | from .zero_coco_stuff import ZeroCOCOStuffDataset 11 | 12 | __all__ = [ 13 | 'CustomDataset', 'iSAIDDataset', 'ADE20KDataset', 'PascalVOCDataset', 14 | 'CityscapesDataset', 'LoveDADataset', 'ISPRSDataset', 'PotsdamDataset', 15 | 'ZeroPascalVOCDataset20', 'ZeroCOCOStuffDataset' 16 | ] 17 | -------------------------------------------------------------------------------- /python/jseg/datasets/isaid.py: -------------------------------------------------------------------------------- 1 | from jseg.utils.registry import DATASETS 2 | from .custom import CustomDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class iSAIDDataset(CustomDataset): 7 | CLASSES = ('background', 'ship', 'store_tank', 'baseball_diamond', 8 | 'tennis_court', 'basketball_court', 'Ground_Track_Field', 9 | 'Bridge', 'Large_Vehicle', 'Small_Vehicle', 'Helicopter', 10 | 'Swimming_pool', 'Roundabout', 'Soccer_ball_field', 'plane', 11 | 'Harbor') 12 | PALETTE = [[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127], 13 | [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, 127], 14 | [0, 0, 127], [0, 0, 191], [0, 0, 255], [0, 191, 127], 15 | [0, 127, 191], [0, 127, 255], [0, 100, 155]] 16 | 17 | def __init__(self, **kwargs): 18 | super(iSAIDDataset, 19 | self).__init__(img_suffix='.png', 20 | seg_map_suffix='_instance_color_RGB.png', 21 | **kwargs) 22 | -------------------------------------------------------------------------------- /python/jseg/datasets/isprs.py: -------------------------------------------------------------------------------- 1 | from jseg.utils.registry import DATASETS 2 | from .custom import CustomDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class ISPRSDataset(CustomDataset): 7 | """ISPRS dataset. 8 | In segmentation map annotation for LoveDA, 0 is the ignore index. 9 | ``reduce_zero_label`` should be set to True. The ``img_suffix`` and 10 | ``seg_map_suffix`` are both fixed to '.png'. 11 | """ 12 | CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree', 13 | 'car', 'clutter') 14 | 15 | PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], 16 | [255, 255, 0], [255, 0, 0]] 17 | 18 | def __init__(self, **kwargs): 19 | super(ISPRSDataset, self).__init__( 20 | img_suffix='.png', 21 | seg_map_suffix='.png', 22 | reduce_zero_label=True, 23 | **kwargs) 24 | -------------------------------------------------------------------------------- /python/jseg/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .compose import Compose 2 | from .formating import (Collect, to_tensor) 3 | from .loading import LoadAnnotations, LoadImageFromFile 4 | from .test_time_aug import MultiScaleFlipAug 5 | from .transforms import (ResizeToMultiple, CLAHE, Normalize, Pad, 6 | PhotoMetricDistortion, RandomCrop, RandomFlip, 7 | RandomRotate, Rerange, Resize, RGB2Gray, SegRescale) 8 | from .utils import imread, imresize, imrescale, imflip, impad_to_multiple, impad, imnormalize, imrotate, clahe, bgr2hsv, hsv2bgr 9 | 10 | __all__ = [ 11 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'Transpose', 12 | 'Collect', 'LoadAnnotations', 'LoadImageFromFile', 'MultiScaleFlipAug', 13 | 'ResizeToMultiple', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 14 | 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', 15 | 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray', 'imread', 'imresize', 16 | 'imrescale', 'imflip', 'impad_to_multiple', 'impad', 'imnormalize', 17 | 'imrotate', 'clahe', 'bgr2hsv', 'hsv2bgr' 18 | ] 19 | -------------------------------------------------------------------------------- /python/jseg/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from jseg.utils.registry import build_from_cfg, TRANSFORMS 4 | 5 | 6 | @TRANSFORMS.register_module() 7 | class Compose(object): 8 | def __init__(self, transforms): 9 | assert isinstance(transforms, collections.abc.Sequence) 10 | self.transforms = [] 11 | for transform in transforms: 12 | if isinstance(transform, dict): 13 | transform = build_from_cfg(transform, TRANSFORMS) 14 | self.transforms.append(transform) 15 | elif callable(transform): 16 | self.transforms.append(transform) 17 | else: 18 | raise TypeError('transform must be callable or a dict') 19 | 20 | def __call__(self, data): 21 | for t in self.transforms: 22 | data = t(data) 23 | if data is None: 24 | return None 25 | return data 26 | 27 | def __repr__(self): 28 | format_string = self.__class__.__name__ + '(' 29 | for t in self.transforms: 30 | format_string += '\n' 31 | format_string += f' {t}' 32 | format_string += '\n)' 33 | return format_string 34 | -------------------------------------------------------------------------------- /python/jseg/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from jseg.utils.registry import TRANSFORMS 4 | import jittor as jt 5 | 6 | 7 | def to_tensor(data): 8 | return jt.Var(data) 9 | 10 | 11 | @TRANSFORMS.register_module() 12 | class DefaultFormatBundle(object): 13 | def __call__(self, results): 14 | 15 | if 'img' in results: 16 | img = results['img'] 17 | if len(img.shape) < 3: 18 | img = np.expand_dims(img, -1) 19 | img = np.ascontiguousarray(img.transpose(2, 0, 1)) 20 | results['img'] = to_tensor(img) 21 | if 'gt_semantic_seg' in results: 22 | # convert to long 23 | results['gt_semantic_seg'] = to_tensor( 24 | results['gt_semantic_seg'][None, ...].astype(np.int64)) 25 | return results 26 | 27 | def __repr__(self): 28 | return self.__class__.__name__ 29 | 30 | 31 | @TRANSFORMS.register_module() 32 | class ImageToTensor(object): 33 | def __init__(self, keys): 34 | self.keys = keys 35 | 36 | def __call__(self, results): 37 | for key in self.keys: 38 | img = results[key] 39 | if len(img.shape) < 3: 40 | img = np.expand_dims(img, -1) 41 | results[key] = to_tensor(img.transpose(2, 0, 1)) 42 | return results 43 | 44 | def __repr__(self): 45 | return self.__class__.__name__ + f'(keys={self.keys})' 46 | 47 | 48 | @TRANSFORMS.register_module() 49 | class Collect(object): 50 | def __init__(self, 51 | keys, 52 | meta_keys=('filename', 'ori_filename', 'ori_shape', 53 | 'img_shape', 'pad_shape', 'scale_factor', 'flip', 54 | 'flip_direction', 'img_norm_cfg')): 55 | self.keys = keys 56 | self.meta_keys = meta_keys 57 | 58 | def __call__(self, results): 59 | data = {} 60 | img_meta = {} 61 | for key in self.meta_keys: 62 | img_meta[key] = results[key] 63 | data['img_metas'] = img_meta 64 | for key in self.keys: 65 | data[key] = results[key] 66 | return data 67 | 68 | def __repr__(self): 69 | return self.__class__.__name__ + f'(keys={self.keys}, meta_keys={self.meta_keys})' 70 | -------------------------------------------------------------------------------- /python/jseg/datasets/pipelines/loading.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import numpy as np 3 | from jseg.utils.registry import TRANSFORMS 4 | from .utils import imread 5 | 6 | 7 | @TRANSFORMS.register_module() 8 | class LoadImageFromFile(object): 9 | def __init__(self, to_float32=False, color_type='color', backend='cv2'): 10 | self.to_float32 = to_float32 11 | self.color_type = color_type 12 | self.backend = backend 13 | 14 | def __call__(self, results): 15 | if results.get('img_prefix') is not None: 16 | filename = osp.join(results['img_prefix'], 17 | results['img_info']['filename']) 18 | else: 19 | filename = results['img_info']['filename'] 20 | img = imread(filename, flag=self.color_type, backend=self.backend) 21 | 22 | if self.to_float32: 23 | img = img.astype(np.float32) 24 | 25 | results['filename'] = filename 26 | results['ori_filename'] = results['img_info']['filename'] 27 | results['img'] = img 28 | results['img_shape'] = img.shape 29 | results['ori_shape'] = img.shape 30 | # Set initial values for default meta_keys 31 | results['pad_shape'] = img.shape 32 | results['scale_factor'] = 1.0 33 | num_channels = 1 if len(img.shape) < 3 else img.shape[2] 34 | results['img_norm_cfg'] = dict(mean=np.zeros(num_channels, 35 | dtype=np.float32), 36 | std=np.ones(num_channels, 37 | dtype=np.float32), 38 | to_rgb=False) 39 | return results 40 | 41 | def __repr__(self): 42 | repr_str = self.__class__.__name__ 43 | repr_str += f'(to_float32={self.to_float32},' 44 | repr_str += f"color_type='{self.color_type}'," 45 | repr_str += f"backend='{self.backend}')" 46 | return repr_str 47 | 48 | 49 | @TRANSFORMS.register_module() 50 | class LoadAnnotations(object): 51 | def __init__(self, reduce_zero_label=False, backend='pillow'): 52 | self.reduce_zero_label = reduce_zero_label 53 | self.backend = backend 54 | 55 | def __call__(self, results): 56 | if results.get('seg_prefix', None) is not None: 57 | filename = osp.join(results['seg_prefix'], 58 | results['ann_info']['seg_map']) 59 | else: 60 | filename = results['ann_info']['seg_map'] 61 | gt_semantic_seg = imread( 62 | filename, flag='unchanged', 63 | backend=self.backend).squeeze().astype(np.uint8) 64 | # modify if custom classes 65 | if results.get('label_map', None) is not None: 66 | gt_semantic_seg_copy = gt_semantic_seg.copy() 67 | for old_id, new_id in results['label_map'].items(): 68 | gt_semantic_seg[gt_semantic_seg_copy == old_id] = new_id 69 | # reduce zero_label 70 | if self.reduce_zero_label: 71 | # avoid using underflow conversion 72 | gt_semantic_seg[gt_semantic_seg == 0] = 255 73 | gt_semantic_seg = gt_semantic_seg - 1 74 | gt_semantic_seg[gt_semantic_seg == 254] = 255 75 | results['gt_semantic_seg'] = gt_semantic_seg 76 | results['seg_fields'].append('gt_semantic_seg') 77 | return results 78 | 79 | def __repr__(self): 80 | repr_str = self.__class__.__name__ 81 | repr_str += f'(reduce_zero_label={self.reduce_zero_label},' 82 | repr_str += f"backend='{self.backend}')" 83 | return repr_str 84 | -------------------------------------------------------------------------------- /python/jseg/datasets/potsdam.py: -------------------------------------------------------------------------------- 1 | from jseg.utils.registry import DATASETS 2 | from .custom import CustomDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class PotsdamDataset(CustomDataset): 7 | """ISPRS Potsdam dataset. 8 | In segmentation map annotation for Potsdam dataset, 0 is the ignore index. 9 | ``reduce_zero_label`` should be set to True. The ``img_suffix`` and 10 | ``seg_map_suffix`` are both fixed to '.png'. 11 | """ 12 | CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree', 13 | 'car', 'clutter') 14 | 15 | PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], 16 | [255, 255, 0], [255, 0, 0]] 17 | 18 | def __init__(self, **kwargs): 19 | super(PotsdamDataset, self).__init__( 20 | img_suffix='.png', 21 | seg_map_suffix='.png', 22 | reduce_zero_label=True, 23 | **kwargs) 24 | -------------------------------------------------------------------------------- /python/jseg/datasets/voc.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from jseg.utils.registry import DATASETS 4 | from .custom import CustomDataset 5 | 6 | 7 | @DATASETS.register_module() 8 | class PascalVOCDataset(CustomDataset): 9 | CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 10 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 11 | 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 12 | 'train', 'tvmonitor') 13 | 14 | PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], 15 | [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], 16 | [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], 17 | [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], 18 | [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] 19 | 20 | def __init__(self, split, **kwargs): 21 | super(PascalVOCDataset, self).__init__(img_suffix='.jpg', 22 | seg_map_suffix='.png', 23 | split=split, 24 | **kwargs) 25 | assert osp.exists(self.img_dir) and self.split is not None 26 | -------------------------------------------------------------------------------- /python/jseg/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * 2 | from .decode_heads import * 3 | from .losses import * 4 | from .necks import * 5 | from .segmentors import * 6 | -------------------------------------------------------------------------------- /python/jseg/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .mix_transformer import * 2 | from .resnet import * 3 | from .mscan import MSCAN 4 | from .swin import SwinTransformer 5 | from .resnext import ResNeXt 6 | from .resnest import ResNeSt 7 | from .convnext import ConvNeXt 8 | from .vit import VisionTransformer 9 | from .beit import BEiT 10 | from .mae import MAE 11 | from .mobilenet_v2 import MobileNetV2 12 | from .clip_encoder_rlb import CLIPVisionTransformerWithRLB 13 | from .clip_text_encoder import CLIPTextEncoder 14 | -------------------------------------------------------------------------------- /python/jseg/models/backbones/clip_text_encoder.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jittor import nn 3 | from jseg.utils.registry import BACKBONES 4 | 5 | from jseg.ops.cliprc_ops import Transformer, LayerNorm 6 | 7 | 8 | @BACKBONES.register_module() 9 | class CLIPTextEncoder(nn.Module): 10 | 11 | def __init__(self, 12 | context_length=77, 13 | vocab_size=49408, 14 | transformer_width=512, 15 | transformer_heads=8, 16 | transformer_layers=12, 17 | embed_dim=1024, 18 | pretrained=None, 19 | **kwargs): 20 | super().__init__() 21 | 22 | self.pretrained = pretrained 23 | 24 | self.context_length = context_length 25 | self.transformer = Transformer(width=transformer_width, 26 | layers=transformer_layers, 27 | heads=transformer_heads, 28 | attn_mask=self.build_attention_mask()) 29 | 30 | self.vocab_size = vocab_size 31 | self.token_embedding = nn.Embedding(vocab_size, transformer_width) 32 | self.positional_embedding = jt.empty( 33 | (self.context_length, transformer_width)) 34 | self.ln_final = LayerNorm(transformer_width) 35 | self.text_projection = jt.empty((transformer_width, embed_dim)) 36 | 37 | def init_weights(self, pretrained=None): 38 | pretrained = pretrained or self.pretrained 39 | if isinstance(pretrained, str): 40 | checkpoint = jt.load(pretrained) 41 | 42 | state_dict = {} 43 | 44 | for k in checkpoint.keys(): 45 | if k.startswith('transformer.'): 46 | state_dict[k] = checkpoint[k] 47 | 48 | if k == 'positional_embedding' or k == 'text_projection' or k.startswith( 49 | 'token_embedding') or k.startswith('ln_final'): 50 | if k == 'positional_embedding' and checkpoint[k].size( 51 | 0) > self.context_length: 52 | checkpoint[k] = checkpoint[k][:self.context_length] 53 | print('positional_embedding is tuncated from 77 to', 54 | self.context_length) 55 | state_dict[k] = checkpoint[k] 56 | 57 | u, w = self.load_state_dict(state_dict, False) 58 | print(u, w, 'are misaligned params in text encoder') 59 | 60 | def build_attention_mask(self): 61 | # lazily create causal attention mask, with full attention between the vision tokens 62 | # pytorch uses additive attention mask; fill with -inf 63 | mask = jt.empty((self.context_length, self.context_length)) 64 | mask.fill_(float("-inf")) 65 | mask = jt.triu_(mask, 1) # zero out the lower diagonal 66 | return mask 67 | 68 | def execute(self, text): 69 | x = self.token_embedding(text) 70 | x = x + self.positional_embedding 71 | x = x.permute(1, 0, 2) 72 | x = self.transformer(x) 73 | x = x.permute(1, 0, 2) 74 | x = self.ln_final(x) 75 | x = x[jt.arange(x.shape[0]), 76 | text.argmax(dim=-1)] @ self.text_projection 77 | return x 78 | -------------------------------------------------------------------------------- /python/jseg/models/decode_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .segformer_head import SegFormerHead 2 | from .fcn_head import FCNHead 3 | from .psp_head import PSPHead 4 | from .ham_head import LightHamHead 5 | from .uper_head import UPerHead 6 | from .ea_head import EAHead 7 | from .cc_head import CCHead 8 | from .da_head import DAHead 9 | from .aspp_head import ASPPHead 10 | from .sep_aspp_head import DepthwiseSeparableASPPHead 11 | from .point_head import PointHead 12 | from .fpn_head import FPNHead 13 | from .nl_head import NonLocal2d 14 | # from .lraspp_head import LRASPPHead 15 | from .gc_head import GCHead 16 | from .ema_head import EMAHead 17 | from .ann_head import ANNHead 18 | from .cliprc_head import ATMSingleHeadSeg -------------------------------------------------------------------------------- /python/jseg/models/decode_heads/cascade_decode_head.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | from .decode_head import BaseDecodeHead 4 | 5 | 6 | class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): 7 | def __init__(self, *args, **kwargs): 8 | super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs) 9 | 10 | @abstractmethod 11 | def execute(self, inputs, prev_output): 12 | pass 13 | 14 | def execute_train(self, inputs, prev_output, img_metas, gt_semantic_seg, 15 | train_cfg): 16 | seg_logits = self.execute(inputs, prev_output) 17 | losses = self.losses(seg_logits, gt_semantic_seg) 18 | 19 | return losses 20 | 21 | def execute_test(self, inputs, prev_output, img_metas, test_cfg): 22 | return self.execute(inputs, prev_output) 23 | -------------------------------------------------------------------------------- /python/jseg/models/decode_heads/cc_head.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from .fcn_head import FCNHead 3 | from jseg.utils.registry import HEADS 4 | 5 | try: 6 | from jseg.ops import CrissCrossAttention 7 | except ModuleNotFoundError: 8 | CrissCrossAttention = None 9 | 10 | 11 | @HEADS.register_module() 12 | class CCHead(FCNHead): 13 | def __init__(self, recurrence=2, **kwargs): 14 | if CrissCrossAttention is None: 15 | raise RuntimeError('Please install mmcv-full for ' 16 | 'CrissCrossAttention ops') 17 | super(CCHead, self).__init__(num_convs=2, **kwargs) 18 | self.recurrence = recurrence 19 | self.cca = CrissCrossAttention(self.channels) 20 | 21 | def execute(self, inputs): 22 | """Forward function.""" 23 | x = self._transform_inputs(inputs) 24 | output = self.convs[0](x) 25 | for _ in range(self.recurrence): 26 | output = self.cca(output) 27 | output = self.convs[1](output) 28 | if self.concat_input: 29 | output = self.conv_cat(jt.concat([x, output], dim=1)) 30 | output = self.cls_seg(output) 31 | return output 32 | -------------------------------------------------------------------------------- /python/jseg/models/decode_heads/ea_head.py: -------------------------------------------------------------------------------- 1 | from .decode_head import BaseDecodeHead 2 | from jseg.utils.registry import HEADS 3 | from jseg.ops import External_attention 4 | 5 | 6 | @HEADS.register_module() 7 | class EAHead(BaseDecodeHead): 8 | 9 | def __init__(self, **kwargs): 10 | super(EAHead, self).__init__(**kwargs) 11 | self.ea = External_attention(self.in_channels, self.channels) 12 | 13 | def execute(self, inputs): 14 | x = self._transform_inputs(inputs) 15 | x = self.ea(x) 16 | output = self.cls_seg(x) 17 | return output 18 | -------------------------------------------------------------------------------- /python/jseg/models/decode_heads/fcn_head.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jittor import nn 3 | from jseg.bricks import ConvModule 4 | 5 | from jseg.utils.registry import HEADS 6 | from .decode_head import BaseDecodeHead 7 | 8 | 9 | @HEADS.register_module() 10 | class FCNHead(BaseDecodeHead): 11 | def __init__(self, 12 | num_convs=2, 13 | kernel_size=3, 14 | concat_input=True, 15 | dilation=1, 16 | **kwargs): 17 | assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) 18 | self.num_convs = num_convs 19 | self.concat_input = concat_input 20 | self.kernel_size = kernel_size 21 | super(FCNHead, self).__init__(**kwargs) 22 | if num_convs == 0: 23 | assert self.in_channels == self.channels 24 | 25 | conv_padding = (kernel_size // 2) * dilation 26 | convs = [] 27 | convs.append( 28 | ConvModule( 29 | self.in_channels, 30 | self.channels, 31 | kernel_size=kernel_size, 32 | padding=conv_padding, 33 | dilation=dilation, 34 | conv_cfg=self.conv_cfg, 35 | norm_cfg=self.norm_cfg, 36 | act_cfg=self.act_cfg)) 37 | for i in range(num_convs - 1): 38 | convs.append( 39 | ConvModule( 40 | self.channels, 41 | self.channels, 42 | kernel_size=kernel_size, 43 | padding=conv_padding, 44 | dilation=dilation, 45 | conv_cfg=self.conv_cfg, 46 | norm_cfg=self.norm_cfg, 47 | act_cfg=self.act_cfg)) 48 | if num_convs == 0: 49 | self.convs = nn.Identity() 50 | else: 51 | self.convs = nn.Sequential(*convs) 52 | if self.concat_input: 53 | self.conv_cat = ConvModule( 54 | self.in_channels + self.channels, 55 | self.channels, 56 | kernel_size=kernel_size, 57 | padding=kernel_size // 2, 58 | conv_cfg=self.conv_cfg, 59 | norm_cfg=self.norm_cfg, 60 | act_cfg=self.act_cfg) 61 | 62 | def _execute_feature(self, inputs): 63 | x = self._transform_inputs(inputs) 64 | feats = self.convs(x) 65 | if self.concat_input: 66 | feats = self.conv_cat(jt.concat([x, feats], dim=1)) 67 | return feats 68 | 69 | def execute(self, inputs): 70 | output = self._execute_feature(inputs) 71 | output = self.cls_seg(output) 72 | return output 73 | -------------------------------------------------------------------------------- /python/jseg/models/decode_heads/fpn_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from jittor import nn 3 | from jseg.bricks import ConvModule 4 | 5 | from jseg.ops import Upsample, resize 6 | from jseg.utils.registry import HEADS 7 | from .decode_head import BaseDecodeHead 8 | 9 | 10 | @HEADS.register_module() 11 | class FPNHead(BaseDecodeHead): 12 | 13 | def __init__(self, feature_strides, **kwargs): 14 | super(FPNHead, self).__init__(input_transform='multiple_select', 15 | **kwargs) 16 | assert len(feature_strides) == len(self.in_channels) 17 | assert min(feature_strides) == feature_strides[0] 18 | self.feature_strides = feature_strides 19 | 20 | self.scale_heads = nn.ModuleList() 21 | for i in range(len(feature_strides)): 22 | head_length = max( 23 | 1, 24 | int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) 25 | scale_head = [] 26 | for k in range(head_length): 27 | scale_head.append( 28 | ConvModule( 29 | self.in_channels[i] if k == 0 else self.channels, 30 | self.channels, 31 | 3, 32 | padding=1, 33 | conv_cfg=self.conv_cfg, 34 | norm_cfg=self.norm_cfg, 35 | act_cfg=self.act_cfg)) 36 | if feature_strides[i] != feature_strides[0]: 37 | scale_head.append( 38 | Upsample(scale_factor=2, 39 | mode='bilinear', 40 | align_corners=self.align_corners)) 41 | self.scale_heads.append(nn.Sequential(*scale_head)) 42 | 43 | def execute(self, inputs): 44 | 45 | x = self._transform_inputs(inputs) 46 | 47 | output = self.scale_heads[0](x[0]) 48 | for i in range(1, len(self.feature_strides)): 49 | # non inplace 50 | output = output + resize(self.scale_heads[i](x[i]), 51 | size=output.shape[2:], 52 | mode='bilinear', 53 | align_corners=self.align_corners) 54 | 55 | output = self.cls_seg(output) 56 | return output 57 | -------------------------------------------------------------------------------- /python/jseg/models/decode_heads/gc_head.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jseg.ops import ContextBlock 3 | 4 | from jseg.utils.registry import HEADS 5 | from .fcn_head import FCNHead 6 | 7 | 8 | @HEADS.register_module() 9 | class GCHead(FCNHead): 10 | 11 | def __init__(self, 12 | ratio=1 / 4., 13 | pooling_type='att', 14 | fusion_types=('channel_add', ), 15 | **kwargs): 16 | super(GCHead, self).__init__(num_convs=2, **kwargs) 17 | self.ratio = ratio 18 | self.pooling_type = pooling_type 19 | self.fusion_types = fusion_types 20 | self.gc_block = ContextBlock(in_channels=self.channels, 21 | ratio=self.ratio, 22 | pooling_type=self.pooling_type, 23 | fusion_types=self.fusion_types) 24 | 25 | def execute(self, inputs): 26 | x = self._transform_inputs(inputs) 27 | output = self.convs[0](x) 28 | output = self.gc_block(output) 29 | output = self.convs[1](output) 30 | if self.concat_input: 31 | output = self.conv_cat(jt.concat([x, output], dim=1)) 32 | output = self.cls_seg(output) 33 | return output 34 | -------------------------------------------------------------------------------- /python/jseg/models/decode_heads/nl_head.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jseg.ops.non_local import NonLocal2d 3 | 4 | from jseg.utils.registry import HEADS 5 | from .fcn_head import FCNHead 6 | 7 | 8 | @HEADS.register_module() 9 | class NLHead(FCNHead): 10 | def __init__(self, 11 | reduction=2, 12 | use_scale=True, 13 | mode='embedded_gaussian', 14 | **kwargs): 15 | super(NLHead, self).__init__(num_convs=2, **kwargs) 16 | self.reduction = reduction 17 | self.use_scale = use_scale 18 | self.mode = mode 19 | self.nl_block = NonLocal2d(in_channels=self.channels, 20 | reduction=self.reduction, 21 | use_scale=self.use_scale, 22 | conv_cfg=self.conv_cfg, 23 | norm_cfg=self.norm_cfg, 24 | mode=self.mode) 25 | 26 | def execute(self, inputs): 27 | """execute function.""" 28 | x = self._transform_inputs(inputs) 29 | output = self.convs[0](x) 30 | output = self.nl_block(output) 31 | output = self.convs[1](output) 32 | if self.concat_input: 33 | output = self.conv_cat(jt.concat([x, output], dim=1)) 34 | output = self.cls_seg(output) 35 | return output 36 | -------------------------------------------------------------------------------- /python/jseg/models/decode_heads/psp_head.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jittor import nn 3 | from jseg.bricks import ConvModule 4 | 5 | from jseg.ops import resize 6 | from jseg.utils.registry import HEADS 7 | from .decode_head import BaseDecodeHead 8 | 9 | 10 | class PPM(nn.ModuleList): 11 | 12 | def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, 13 | act_cfg, align_corners, **kwargs): 14 | super(PPM, self).__init__() 15 | self.pool_scales = pool_scales 16 | self.align_corners = align_corners 17 | self.in_channels = in_channels 18 | self.channels = channels 19 | self.conv_cfg = conv_cfg 20 | self.norm_cfg = norm_cfg 21 | self.act_cfg = act_cfg 22 | for pool_scale in pool_scales: 23 | self.append( 24 | nn.Sequential( 25 | nn.AdaptiveAvgPool2d(pool_scale), 26 | ConvModule(self.in_channels, 27 | self.channels, 28 | 1, 29 | conv_cfg=self.conv_cfg, 30 | norm_cfg=self.norm_cfg, 31 | act_cfg=self.act_cfg, 32 | **kwargs))) 33 | 34 | def execute(self, x): 35 | ppm_outs = [] 36 | for ppm in self: 37 | ppm_out = ppm(x) 38 | upsampled_ppm_out = resize(ppm_out, 39 | size=x.size()[2:], 40 | mode='bilinear', 41 | align_corners=self.align_corners) 42 | ppm_outs.append(upsampled_ppm_out) 43 | return ppm_outs 44 | 45 | 46 | @HEADS.register_module() 47 | class PSPHead(BaseDecodeHead): 48 | 49 | def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): 50 | super(PSPHead, self).__init__(**kwargs) 51 | assert isinstance(pool_scales, (list, tuple)) 52 | self.pool_scales = pool_scales 53 | self.psp_modules = PPM(self.pool_scales, 54 | self.in_channels, 55 | self.channels, 56 | conv_cfg=self.conv_cfg, 57 | norm_cfg=self.norm_cfg, 58 | act_cfg=self.act_cfg, 59 | align_corners=self.align_corners) 60 | self.bottleneck = ConvModule(self.in_channels + 61 | len(pool_scales) * self.channels, 62 | self.channels, 63 | 3, 64 | padding=1, 65 | conv_cfg=self.conv_cfg, 66 | norm_cfg=self.norm_cfg, 67 | act_cfg=self.act_cfg) 68 | 69 | def execute(self, inputs): 70 | x = self._transform_inputs(inputs) 71 | psp_outs = [x] 72 | psp_outs.extend(self.psp_modules(x)) 73 | psp_outs = jt.concat(psp_outs, dim=1) 74 | output = self.bottleneck(psp_outs) 75 | output = self.cls_seg(output) 76 | return output 77 | -------------------------------------------------------------------------------- /python/jseg/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .cross_entropy_loss import (CrossEntropyLoss, cross_entropy) 3 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 4 | 5 | __all__ = [ 6 | 'accuracy', 'Accuracy', 'cross_entropy', 'CrossEntropyLoss', 'reduce_loss', 7 | 'weight_reduce_loss', 'weighted_loss' 8 | ] 9 | -------------------------------------------------------------------------------- /python/jseg/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | from jittor import nn 2 | import jittor as jt 3 | 4 | eps = jt.Var(1.1920928955078125e-07) 5 | 6 | 7 | def accuracy(pred, target, topk=1, thresh=None, ignore_index=None): 8 | assert isinstance(topk, (int, tuple)) 9 | if isinstance(topk, int): 10 | topk = (topk, ) 11 | return_single = True 12 | else: 13 | return_single = False 14 | 15 | maxk = max(topk) 16 | if pred.size(0) == 0: 17 | accu = [jt.Var(0.) for i in range(len(topk))] 18 | return accu[0] if return_single else accu 19 | assert pred.ndim == target.ndim + 1 20 | assert pred.size(0) == target.size(0) 21 | assert maxk <= pred.size(1), \ 22 | f'maxk {maxk} exceeds pred dimension {pred.size(1)}' 23 | pred_value, pred_label = pred.topk(maxk, dim=1) 24 | # transpose to shape (maxk, N, ...) 25 | pred_label = pred_label.transpose(0, 1) 26 | correct = pred_label == (target.unsqueeze(0).expand_as(pred_label)) 27 | if thresh is not None: 28 | # Only prediction values larger than thresh are counted as correct 29 | correct = correct & (pred_value > thresh).t() 30 | if ignore_index is not None: 31 | correct = correct[:, target != ignore_index] 32 | res = [] 33 | for k in topk: 34 | # Avoid causing ZeroDivisionError when all pixels 35 | # of an image are ignored 36 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdims=True) + eps 37 | if ignore_index is not None: 38 | total_num = target[target != ignore_index].numel() + eps 39 | else: 40 | total_num = target.numel() + eps 41 | res.append(correct_k.multiply(100.0 / total_num)) 42 | return res[0] if return_single else res 43 | 44 | 45 | class Accuracy(nn.Module): 46 | 47 | def __init__(self, topk=(1, ), thresh=None, ignore_index=None): 48 | super().__init__() 49 | self.topk = topk 50 | self.thresh = thresh 51 | self.ignore_index = ignore_index 52 | 53 | def execute(self, pred, target): 54 | return accuracy(pred, target, self.topk, self.thresh, 55 | self.ignore_index) 56 | -------------------------------------------------------------------------------- /python/jseg/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import warnings 3 | 4 | 5 | def get_enum(reduction: str) -> int: 6 | if reduction == 'none': 7 | ret = 0 8 | elif reduction == 'mean': 9 | ret = 1 10 | elif reduction == 'elementwise_mean': 11 | warnings.warn( 12 | "reduction='elementwise_mean' is deprecated, please use reduction='mean' instead." 13 | ) 14 | ret = 1 15 | elif reduction == 'sum': 16 | ret = 2 17 | else: 18 | ret = -1 # TODO: remove once JIT exceptions support control flow 19 | raise ValueError( 20 | "{} is not a valid value for reduction".format(reduction)) 21 | return ret 22 | 23 | 24 | def reduce_loss(loss, reduction): 25 | reduction_enum = get_enum(reduction) 26 | # none: 0, elementwise_mean:1, sum: 2 27 | if reduction_enum == 0: 28 | return loss 29 | elif reduction_enum == 1: 30 | return loss.mean() 31 | elif reduction_enum == 2: 32 | return loss.sum() 33 | 34 | 35 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 36 | # if weight is specified, apply element-wise weight 37 | if weight is not None: 38 | assert weight.ndim == loss.ndim 39 | if weight.ndim > 1: 40 | assert weight.size(1) == 1 or weight.size(1) == loss.size(1) 41 | loss = loss * weight 42 | 43 | # if avg_factor is not specified, just reduce the loss 44 | if avg_factor is None: 45 | loss = reduce_loss(loss, reduction) 46 | else: 47 | # if reduction is mean, then average the loss by avg_factor 48 | if reduction == 'mean': 49 | loss = loss.sum() / avg_factor 50 | # if reduction is 'none', then do nothing, otherwise raise an error 51 | elif reduction != 'none': 52 | raise ValueError('avg_factor can not be used with reduction="sum"') 53 | return loss 54 | 55 | 56 | def weighted_loss(loss_func): 57 | @functools.wraps(loss_func) 58 | def wrapper(pred, 59 | target, 60 | weight=None, 61 | reduction='mean', 62 | avg_factor=None, 63 | **kwargs): 64 | # get element-wise loss 65 | loss = loss_func(pred, target, **kwargs) 66 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 67 | return loss 68 | 69 | return wrapper 70 | -------------------------------------------------------------------------------- /python/jseg/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .multilevel_neck import MultiLevelNeck 2 | from .featurepyramid import Feature2Pyramid 3 | from .fpn import FPN -------------------------------------------------------------------------------- /python/jseg/models/necks/featurepyramid.py: -------------------------------------------------------------------------------- 1 | from jittor import nn 2 | from jseg.bricks import build_norm_layer 3 | 4 | from jseg.utils.registry import NECKS 5 | 6 | 7 | @NECKS.register_module() 8 | class Feature2Pyramid(nn.Module): 9 | def __init__(self, 10 | embed_dim, 11 | rescales=[4, 2, 1, 0.5], 12 | norm_cfg=dict(type='BN')): 13 | super(Feature2Pyramid, self).__init__() 14 | self.rescales = rescales 15 | self.upsample_4x = None 16 | for k in self.rescales: 17 | if k == 4: 18 | self.upsample_4x = nn.Sequential( 19 | nn.ConvTranspose2d( 20 | embed_dim, embed_dim, kernel_size=2, stride=2), 21 | build_norm_layer(norm_cfg, embed_dim)[1], 22 | nn.GELU(), 23 | nn.ConvTranspose2d( 24 | embed_dim, embed_dim, kernel_size=2, stride=2), 25 | ) 26 | elif k == 2: 27 | self.upsample_2x = nn.Sequential( 28 | nn.ConvTranspose2d( 29 | embed_dim, embed_dim, kernel_size=2, stride=2)) 30 | elif k == 1: 31 | self.identity = nn.Identity() 32 | elif k == 0.5: 33 | self.downsample_2x = nn.MaxPool2d(kernel_size=2, stride=2) 34 | elif k == 0.25: 35 | self.downsample_4x = nn.MaxPool2d(kernel_size=4, stride=4) 36 | else: 37 | raise KeyError(f'invalid {k} for feature2pyramid') 38 | 39 | def execute(self, inputs): 40 | assert len(inputs) == len(self.rescales) 41 | outputs = [] 42 | if self.upsample_4x is not None: 43 | ops = [ 44 | self.upsample_4x, self.upsample_2x, self.identity, 45 | self.downsample_2x 46 | ] 47 | else: 48 | ops = [ 49 | self.upsample_2x, self.identity, self.downsample_2x, 50 | self.downsample_4x 51 | ] 52 | for i in range(len(inputs)): 53 | outputs.append(ops[i](inputs[i])) 54 | return tuple(outputs) 55 | -------------------------------------------------------------------------------- /python/jseg/models/necks/multilevel_neck.py: -------------------------------------------------------------------------------- 1 | from jittor import nn 2 | from jseg.utils.weight_init import xavier_init 3 | from jseg.ops import resize 4 | from jseg.utils.registry import NECKS 5 | import collections 6 | 7 | 8 | @NECKS.register_module() 9 | class MultiLevelNeck(nn.Module): 10 | """MultiLevelNeck. 11 | 12 | A neck structure connect vit backbone and decoder_heads. 13 | 14 | Args: 15 | in_channels (List[int]): Number of input channels per scale. 16 | out_channels (int): Number of output channels (used at each scale). 17 | scales (List[float]): Scale factors for each input feature map. 18 | Default: [0.5, 1, 2, 4] 19 | norm_cfg (dict): Config dict for normalization layer. Default: None. 20 | act_cfg (dict): Config dict for activation layer in ConvModule. 21 | Default: None. 22 | """ 23 | 24 | def __init__(self, in_channels, out_channels, scales=[0.5, 1, 2, 4]): 25 | super(MultiLevelNeck, self).__init__() 26 | assert isinstance(in_channels, list) 27 | self.in_channels = in_channels 28 | self.out_channels = out_channels 29 | self.scales = scales 30 | self.num_outs = len(scales) 31 | self.lateral_convs = nn.ModuleList() 32 | self.convs = nn.ModuleList() 33 | for in_channel in in_channels: 34 | 35 | self.lateral_convs.append( 36 | nn.Sequential( 37 | collections.OrderedDict([ 38 | ('conv', nn.Conv(in_channel, out_channels, 1)), 39 | ]))) 40 | 41 | for _ in range(self.num_outs): 42 | self.convs.append( 43 | nn.Sequential( 44 | collections.OrderedDict([('conv', 45 | nn.Conv(out_channels, 46 | out_channels, 47 | 3, 48 | padding=1))]))) 49 | 50 | # default init_weights for conv(msra) and norm in ConvModule 51 | def init_weights(self): 52 | for m in self.modules(): 53 | if isinstance(m, nn.Conv2d): 54 | xavier_init(m, distribution='uniform') 55 | 56 | def execute(self, inputs): 57 | assert len(inputs) == len(self.in_channels) 58 | inputs = [ 59 | lateral_conv(inputs[i]) 60 | for i, lateral_conv in enumerate(self.lateral_convs) 61 | ] 62 | # for len(inputs) not equal to self.num_outs 63 | if len(inputs) == 1: 64 | inputs = [inputs[0] for _ in range(self.num_outs)] 65 | outs = [] 66 | for i in range(self.num_outs): 67 | x_resize = resize(inputs[i], 68 | scale_factor=self.scales[i], 69 | mode='bilinear') 70 | outs.append(self.convs[i](x_resize)) 71 | return tuple(outs) 72 | -------------------------------------------------------------------------------- /python/jseg/models/segmentors/__init__.py: -------------------------------------------------------------------------------- 1 | from .encoder_decoder import EncoderDecoder 2 | from .cascade_encoder_decoder import CascadeEncoderDecoder 3 | from .clip_rc import CLIPRC 4 | 5 | __all__ = ['EncoderDecoder', 'CascadeEncoderDecoder', 'CLIPRC'] 6 | -------------------------------------------------------------------------------- /python/jseg/models/segmentors/cascade_encoder_decoder.py: -------------------------------------------------------------------------------- 1 | from jittor import nn 2 | 3 | from jseg.utils.general import add_prefix 4 | from jseg.ops import resize 5 | from jseg.utils.registry import MODELS, build_from_cfg, HEADS 6 | from .encoder_decoder import EncoderDecoder 7 | 8 | 9 | @MODELS.register_module() 10 | class CascadeEncoderDecoder(EncoderDecoder): 11 | 12 | def __init__(self, 13 | num_stages, 14 | backbone, 15 | decode_head, 16 | neck=None, 17 | auxiliary_head=None, 18 | train_cfg=None, 19 | test_cfg=None, 20 | pretrained=None): 21 | self.num_stages = num_stages 22 | super(CascadeEncoderDecoder, 23 | self).__init__(backbone=backbone, 24 | decode_head=decode_head, 25 | neck=neck, 26 | auxiliary_head=auxiliary_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | pretrained=pretrained) 30 | 31 | def _init_decode_head(self, decode_head): 32 | """Initialize ``decode_head``""" 33 | assert isinstance(decode_head, list) 34 | assert len(decode_head) == self.num_stages 35 | self.decode_head = nn.ModuleList() 36 | for i in range(self.num_stages): 37 | self.decode_head.append(build_from_cfg(decode_head[i], HEADS)) 38 | self.align_corners = self.decode_head[-1].align_corners 39 | self.num_classes = self.decode_head[-1].num_classes 40 | self.out_channels = self.decode_head[-1].out_channels 41 | 42 | def encode_decode(self, img, img_metas): 43 | """Encode images with backbone and decode into a semantic segmentation 44 | map of the same size as input.""" 45 | x = self.extract_feat(img) 46 | out = self.decode_head[0].execute_test(x, img_metas, self.test_cfg) 47 | for i in range(1, self.num_stages): 48 | out = self.decode_head[i].execute_test(x, out, img_metas, 49 | self.test_cfg) 50 | out = resize(input=out, 51 | size=img.shape[2:], 52 | mode='bilinear', 53 | align_corners=self.align_corners) 54 | return out 55 | 56 | def _decode_head_execute_train(self, x, img_metas, gt_semantic_seg): 57 | """Run execute function and calculate loss for decode head in 58 | training.""" 59 | losses = dict() 60 | 61 | loss_decode = self.decode_head[0].execute_train( 62 | x, img_metas, gt_semantic_seg, self.train_cfg) 63 | 64 | losses.update(add_prefix(loss_decode, 'decode_0')) 65 | 66 | for i in range(1, self.num_stages): 67 | # execute test again, maybe unnecessary for most methods. 68 | if i == 1: 69 | prev_outputs = self.decode_head[0].execute_test( 70 | x, img_metas, self.test_cfg) 71 | else: 72 | prev_outputs = self.decode_head[i - 1].execute_test( 73 | x, prev_outputs, img_metas, self.test_cfg) 74 | loss_decode = self.decode_head[i].execute_train( 75 | x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg) 76 | losses.update(add_prefix(loss_decode, f'decode_{i}')) 77 | 78 | return losses 79 | -------------------------------------------------------------------------------- /python/jseg/models/utils/se_layer.py: -------------------------------------------------------------------------------- 1 | from jittor import nn 2 | from jseg.bricks import ConvModule 3 | 4 | from jseg.utils.helpers import make_divisible 5 | from jseg.utils.general import is_tuple_of 6 | 7 | 8 | class SELayer(nn.Module): 9 | 10 | def __init__(self, 11 | channels, 12 | ratio=16, 13 | conv_cfg=None, 14 | act_cfg=(dict(type='ReLU'), 15 | dict(type='HSigmoid', bias=3.0, divisor=6.0))): 16 | super(SELayer, self).__init__() 17 | if isinstance(act_cfg, dict): 18 | act_cfg = (act_cfg, act_cfg) 19 | assert len(act_cfg) == 2 20 | assert is_tuple_of(act_cfg, dict) 21 | self.global_avgpool = nn.AdaptiveAvgPool2d(1) 22 | self.conv1 = ConvModule(in_channels=channels, 23 | out_channels=make_divisible( 24 | channels // ratio, 8), 25 | kernel_size=1, 26 | stride=1, 27 | conv_cfg=conv_cfg, 28 | act_cfg=act_cfg[0]) 29 | self.conv2 = ConvModule(in_channels=make_divisible( 30 | channels // ratio, 8), 31 | out_channels=channels, 32 | kernel_size=1, 33 | stride=1, 34 | conv_cfg=conv_cfg, 35 | act_cfg=act_cfg[1]) 36 | 37 | def execute(self, x): 38 | out = self.global_avgpool(x) 39 | out = self.conv1(out) 40 | out = self.conv2(out) 41 | return x * out 42 | -------------------------------------------------------------------------------- /python/jseg/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .wrappers import Upsample, resize 2 | from .external_attention import External_attention 3 | from .cc_attention import CrissCrossAttention 4 | from .scale import Scale 5 | from .self_attention_block import SelfAttentionBlock 6 | from .multi_head_attention import MultiheadAttention 7 | from .context_block import ContextBlock 8 | 9 | __all__ = [ 10 | 'Upsample', 'resize', 'External_attention', 'CrissCrossAttention', 'Scale', 11 | 'SelfAttentionBlock', 'MultiHeadAttention', 'ContextBlock' 12 | ] 13 | -------------------------------------------------------------------------------- /python/jseg/ops/cc_attention.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jittor import nn 3 | 4 | from .scale import Scale 5 | 6 | 7 | def NEG_INF_DIAG(n): 8 | return jt.diag(jt.Var(float('-inf')).repeat(n), 0) 9 | 10 | 11 | class CrissCrossAttention(nn.Module): 12 | def __init__(self, in_channels): 13 | super().__init__() 14 | self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) 15 | self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) 16 | self.value_conv = nn.Conv2d(in_channels, in_channels, 1) 17 | self.gamma = Scale(0.) 18 | self.in_channels = in_channels 19 | 20 | def execute(self, x): 21 | B, C, H, W = x.size() 22 | query = self.query_conv(x) 23 | key = self.key_conv(x) 24 | value = self.value_conv(x) 25 | energy_H = jt.linalg.einsum('bchw,bciw->bwhi', query, 26 | key) + NEG_INF_DIAG(H) 27 | energy_H = energy_H.transpose(1, 2) 28 | energy_W = jt.linalg.einsum('bchw,bchj->bhwj', query, key) 29 | attn = nn.softmax(jt.concat([energy_H, energy_W], dim=-1), 30 | dim=-1) # [B,H,W,(H+W)] 31 | out = jt.linalg.einsum('bciw,bhwi->bchw', value, attn[..., :H]) 32 | out += jt.linalg.einsum('bchj,bhwj->bchw', value, attn[..., H:]) 33 | 34 | out = self.gamma(out) + x 35 | 36 | return out 37 | 38 | def __repr__(self): 39 | s = self.__class__.__name__ 40 | s += f'(in_channels={self.in_channels})' 41 | return s 42 | -------------------------------------------------------------------------------- /python/jseg/ops/external_attention.py: -------------------------------------------------------------------------------- 1 | from jittor import Module, nn 2 | from jseg.bricks import ConvModule 3 | 4 | 5 | class External_attention(Module): 6 | ''' 7 | Arguments: 8 | c (int): The input and output channel number. 9 | ''' 10 | def __init__(self, in_channels, channels, k=256): 11 | super(External_attention, self).__init__() 12 | 13 | self.in_channels = in_channels 14 | self.channels = channels 15 | self.k = k 16 | 17 | self.conv1 = ConvModule(self.in_channels, self.channels, 1) 18 | 19 | self.linear_0 = ConvModule(self.channels, self.k, 1) 20 | 21 | self.linear_1 = ConvModule(self.k, self.channels, 1) 22 | 23 | self.conv2 = ConvModule(self.channels, self.channels, 1) 24 | 25 | def execute(self, x): 26 | x = self.conv1(x) 27 | idn = x 28 | b, c, h, w = x.size() 29 | x = self.linear_0(x) # b, k, h, w 30 | x = x.view(b, self.k, h * w) # b * k * n 31 | 32 | x = nn.softmax(x, dim=-1) # b, k, n 33 | x = x / (1e-9 + x.sum(dim=1, keepdims=True)) # b, k, n 34 | 35 | x = x.view(b, self.k, h, w) 36 | x = self.linear_1(x) # b, c, h, w 37 | 38 | x = x + idn 39 | x = self.conv2(x) 40 | return x 41 | -------------------------------------------------------------------------------- /python/jseg/ops/scale.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jittor import nn 3 | 4 | 5 | class Scale(nn.Module): 6 | """A learnable scale parameter. 7 | 8 | This layer scales the input by a learnable factor. It multiplies a 9 | learnable scale parameter of shape (1,) with input of any shape. 10 | 11 | Args: 12 | scale (float): Initial value of scale factor. Default: 1.0 13 | """ 14 | 15 | def __init__(self, scale=1.0): 16 | super().__init__() 17 | self.scale = jt.Var(scale) 18 | 19 | def execute(self, x): 20 | return x * self.scale 21 | -------------------------------------------------------------------------------- /python/jseg/ops/wrappers.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from jittor import nn 3 | from jittor import Function 4 | 5 | 6 | # TODO Save memory 7 | class Resize(Function): 8 | 9 | def execute(self, input, size, scale_factor, mode, align_corners): 10 | self.input_size = input.shape[2:] 11 | self.scale_factor = scale_factor 12 | self.mode = mode 13 | self.align_corners = align_corners 14 | return nn.interpolate(input, size, scale_factor, mode, align_corners) 15 | 16 | def grad(self, grad_output): 17 | return nn.interpolate(grad_output, self.input_size, self.scale_factor, 18 | self.mode, self.align_corners) 19 | 20 | 21 | interpolate = Resize.apply 22 | 23 | 24 | def resize(input, 25 | size=None, 26 | scale_factor=None, 27 | mode='nearest', 28 | align_corners=None, 29 | warning=True): 30 | if warning: 31 | if size is not None and align_corners: 32 | input_h, input_w = tuple(int(x) for x in input.shape[2:]) 33 | output_h, output_w = tuple(int(x) for x in size) 34 | if output_h > input_h or output_w > output_h: 35 | if ((output_h > 1 and output_w > 1 and input_h > 1 36 | and input_w > 1) and (output_h - 1) % (input_h - 1) 37 | and (output_w - 1) % (input_w - 1)): 38 | warnings.warn( 39 | f'When align_corners={align_corners}, ' 40 | 'the output would more aligned if ' 41 | f'input size {(input_h, input_w)} is `x+1` and ' 42 | f'out size {(output_h, output_w)} is `nx+1`') 43 | if size is not None: 44 | size = tuple(int(x) for x in size) 45 | return interpolate(input, size, scale_factor, mode, align_corners) 46 | 47 | 48 | class Upsample(nn.Module): 49 | 50 | def __init__(self, 51 | size=None, 52 | scale_factor=None, 53 | mode='nearest', 54 | align_corners=None): 55 | super(Upsample, self).__init__() 56 | self.size = size 57 | if isinstance(scale_factor, tuple): 58 | self.scale_factor = tuple(float(factor) for factor in scale_factor) 59 | else: 60 | self.scale_factor = float(scale_factor) if scale_factor else None 61 | self.mode = mode 62 | self.align_corners = align_corners 63 | 64 | def execute(self, x): 65 | if not self.size: 66 | size = [int(t * self.scale_factor) for t in x.shape[-2:]] 67 | else: 68 | size = self.size 69 | return resize(x, size, None, self.mode, self.align_corners) 70 | -------------------------------------------------------------------------------- /python/jseg/optims/__init__.py: -------------------------------------------------------------------------------- 1 | from .lr_scheduler import * 2 | from .optimizer import * 3 | from .prameter_groups_generator import * 4 | from .lr_decay_parameter_groups_generator import * -------------------------------------------------------------------------------- /python/jseg/optims/prameter_groups_generator.py: -------------------------------------------------------------------------------- 1 | from jseg.utils.registry import MODELS 2 | 3 | 4 | @MODELS.register_module() 5 | def CustomPrameterGroupsGenerator(named_params, model, custom_keys={}, logger=None): 6 | def get_custom_parameter_groups(name): 7 | for ck in custom_keys.keys(): 8 | if ck in name: 9 | return custom_keys[ck] 10 | return None 11 | 12 | normal_group_list = [] 13 | custom_group_list = [] 14 | 15 | for p in named_params: 16 | name, param = p 17 | custom_group = get_custom_parameter_groups(name) 18 | if custom_group is not None: 19 | tmp = {} 20 | tmp['params'] = [param] 21 | for i in custom_group.keys(): 22 | tmp[i] = custom_group.get(i) 23 | custom_group_list.append(tmp) 24 | continue 25 | normal_group_list.append({'params': [param]}) 26 | return normal_group_list + custom_group_list 27 | -------------------------------------------------------------------------------- /python/jseg/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .runner import Runner 2 | -------------------------------------------------------------------------------- /python/jseg/sampler/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_pixel_sampler import BasePixelSampler 2 | from .ohem_pixel_sampler import OHEMPixelSampler 3 | 4 | __all__ = ['BasePixelSampler', 'OHEMPixelSampler'] 5 | -------------------------------------------------------------------------------- /python/jseg/sampler/base_pixel_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BasePixelSampler(metaclass=ABCMeta): 5 | """Base class of pixel sampler.""" 6 | 7 | def __init__(self, **kwargs): 8 | pass 9 | 10 | @abstractmethod 11 | def sample(self, seg_logit, seg_label): 12 | """Placeholder for sample function.""" 13 | -------------------------------------------------------------------------------- /python/jseg/sampler/ohem_pixel_sampler.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jittor import nn 3 | 4 | from jseg.utils.registry import PIXEL_SAMPLERS 5 | from .base_pixel_sampler import BasePixelSampler 6 | 7 | 8 | @PIXEL_SAMPLERS.register_module() 9 | class OHEMPixelSampler(BasePixelSampler): 10 | def __init__(self, context, thresh=None, min_kept=100000): 11 | super(OHEMPixelSampler, self).__init__() 12 | self.context = context 13 | assert min_kept > 1 14 | self.thresh = thresh 15 | self.min_kept = min_kept 16 | 17 | def sample(self, seg_logit, seg_label): 18 | with jt.no_grad(): 19 | assert seg_logit.shape[2:] == seg_label.shape[2:] 20 | assert seg_label.shape[1] == 1 21 | seg_label = seg_label.squeeze(1).long() 22 | batch_kept = self.min_kept * seg_label.size(0) 23 | valid_mask = seg_label != self.context.ignore_index 24 | seg_weight = jt.zeros(seg_label.size()).astype(seg_logit.dtype) 25 | valid_seg_weight = seg_weight[valid_mask] 26 | if self.thresh is not None: 27 | seg_prob = nn.softmax(seg_logit, dim=1) 28 | 29 | tmp_seg_label = seg_label.clone().unsqueeze(1) 30 | tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 31 | seg_prob = jt.gather(seg_prob, 1, tmp_seg_label).squeeze(1) 32 | sort_indices, sort_prob = seg_prob[valid_mask].argsort() 33 | 34 | if sort_prob.numel() > 0: 35 | min_threshold = sort_prob[min(batch_kept, 36 | sort_prob.numel() - 1)] 37 | else: 38 | min_threshold = 0.0 39 | threshold = max(min_threshold, self.thresh) 40 | valid_seg_weight[seg_prob[valid_mask] < threshold] = 1. 41 | else: 42 | if not isinstance(self.context.loss_decode, nn.ModuleList): 43 | losses_decode = [self.context.loss_decode] 44 | else: 45 | losses_decode = self.context.loss_decode 46 | losses = 0.0 47 | for loss_module in losses_decode: 48 | losses += loss_module( 49 | seg_logit, 50 | seg_label, 51 | weight=None, 52 | ignore_index=self.context.ignore_index, 53 | reduction_override='none') 54 | 55 | sort_indices, _ = losses[valid_mask].argsort(descending=True) 56 | valid_seg_weight[sort_indices[:batch_kept]] = 1. 57 | 58 | seg_weight[valid_mask] = valid_seg_weight 59 | 60 | return seg_weight 61 | -------------------------------------------------------------------------------- /python/jseg/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import * 2 | from .registry import Registry, build_from_cfg -------------------------------------------------------------------------------- /python/jseg/utils/helpers.py: -------------------------------------------------------------------------------- 1 | from itertools import repeat 2 | import collections.abc 3 | 4 | 5 | def _ntuple(n): 6 | def parse(x): 7 | if isinstance(x, collections.abc.Iterable): 8 | return x 9 | return tuple(repeat(x, n)) 10 | 11 | return parse 12 | 13 | 14 | to_1tuple = _ntuple(1) 15 | to_2tuple = _ntuple(2) 16 | to_3tuple = _ntuple(3) 17 | to_4tuple = _ntuple(4) 18 | to_ntuple = _ntuple 19 | 20 | 21 | def make_divisible(v, divisor=8, min_value=None, round_limit=.9): 22 | min_value = min_value or divisor 23 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 24 | # Make sure that round down does not go down by more than 10%. 25 | if new_v < round_limit * v: 26 | new_v += divisor 27 | return new_v 28 | -------------------------------------------------------------------------------- /python/jseg/utils/inference.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import jittor as jt 4 | from jseg.runner import Runner 5 | from jseg.config import init_cfg, update_cfg, get_cfg 6 | from jseg.datasets.pipelines import Compose 7 | 8 | 9 | class InferenceSegmentor: 10 | def __init__(self, config_file, checkpoint_file, save_dir): 11 | init_cfg(config_file) 12 | if len(checkpoint_file) > 0: 13 | update_cfg(resume_path=checkpoint_file) 14 | 15 | self.runner = Runner() 16 | self.runner.model.eval() 17 | self.transforms = Compose(get_cfg().test_pipeline[1:]) 18 | self.palette = self.runner.val_dataset.PALETTE 19 | self.runner.model.CLASSES = self.runner.val_dataset.CLASSES 20 | self.runner.model.PALETTE = self.runner.val_dataset.PALETTE 21 | self.save_dir = save_dir 22 | 23 | def load_img(self, results): 24 | if isinstance(results['img'], str): 25 | results['filename'] = results['img'] 26 | results['ori_filename'] = results['img'] 27 | else: 28 | results['filename'] = None 29 | results['ori_filename'] = None 30 | img = cv2.imread(results['img']) 31 | results['img'] = img 32 | results['img_shape'] = img.shape 33 | results['ori_shape'] = img.shape 34 | return results 35 | 36 | @jt.no_grad() 37 | @jt.single_process_scope() 38 | def infer(self, img): 39 | data = dict(img=img) 40 | data = self.transforms(self.load_img(data)) 41 | data['img'][0] = data['img'][0].unsqueeze(0) 42 | results = self.runner.model(**data, return_loss=False, rescale=True) 43 | results = self.runner.model.show_result(img, results, out_file=os.path.join(self.save_dir, img[img.rfind('/') + 1:])) 44 | return results 45 | -------------------------------------------------------------------------------- /python/jseg/utils/logger.py: -------------------------------------------------------------------------------- 1 | from jseg.utils.general import build_file, current_time 2 | from .registry import HOOKS, build_from_cfg 3 | import time 4 | import os 5 | from tensorboardX import SummaryWriter 6 | from jseg.config import get_cfg 7 | 8 | 9 | @HOOKS.register_module() 10 | class TextLogger: 11 | def __init__(self, work_dir): 12 | save_file = build_file( 13 | work_dir, 14 | prefix="textlog/log_" + 15 | time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) + ".txt") 16 | self.log_file = open(save_file, "a") 17 | 18 | def log(self, data): 19 | msg = ",".join([f"{k}:{d}" for k, d in data.items()]) 20 | msg = current_time() + ' ' + msg + "\n" 21 | self.log_file.write(msg) 22 | self.log_file.flush() 23 | 24 | 25 | @HOOKS.register_module() 26 | class TensorboardLogger: 27 | def __init__(self, work_dir): 28 | self.cfg = get_cfg() 29 | tensorboard_dir = os.path.join(work_dir, "tensorboard") 30 | self.writer = SummaryWriter(tensorboard_dir, flush_secs=10) 31 | 32 | def log(self, data): 33 | if "iter" in data.keys(): 34 | step = data["iter"] 35 | for k, d in data.items(): 36 | if k in ["iter", "epoch", "batch_idx", "times", "batch_size"]: 37 | continue 38 | if isinstance(d, str): 39 | continue 40 | self.writer.add_scalar(k, d, global_step=step) 41 | 42 | 43 | @HOOKS.register_module() 44 | class RunLogger: 45 | def __init__(self, work_dir, loggers=["TextLogger", "TensorboardLogger"]): 46 | self.loggers = [ 47 | build_from_cfg(log, HOOKS, work_dir=work_dir) for log in loggers 48 | ] 49 | 50 | def log(self, data, **kwargs): 51 | data.update(kwargs) 52 | data = { 53 | k: d.item() if hasattr(d, "item") else d 54 | for k, d in data.items() 55 | } 56 | for logger in self.loggers: 57 | logger.log(data) 58 | self.print_log(data) 59 | 60 | def get_time(self, s): 61 | s = int(s) 62 | days = s // 60 // 60 // 24 63 | hours = s // 60 // 60 % 24 64 | minutes = s // 60 % 60 65 | seconds = s % 60 66 | return f' [{days}D:{hours}H:{minutes}M:{seconds}S] ' 67 | 68 | def print_log(self, msg): 69 | if isinstance(msg, dict): 70 | msgs = [] 71 | for k, d in msg.items(): 72 | if (k == "remain_time"): 73 | msgs.append(f" {k}:{self.get_time(d)}") 74 | else: 75 | msgs.append(f" {k}:{d:.7f}" 76 | if isinstance(d, float) else f" {k}:{d}") 77 | msg = ",".join(msgs) 78 | print(current_time(), msg) 79 | -------------------------------------------------------------------------------- /python/jseg/utils/registry.py: -------------------------------------------------------------------------------- 1 | class Registry: 2 | def __init__(self): 3 | self._modules = {} 4 | 5 | def register_module(self, name=None, module=None): 6 | def _register_module(module): 7 | key = name 8 | if key is None: 9 | key = module.__name__ 10 | assert key not in self._modules, f"{key} is already registered." 11 | self._modules[key] = module 12 | return module 13 | 14 | if module is not None: 15 | return _register_module(module) 16 | 17 | return _register_module 18 | 19 | def get(self, name): 20 | assert name in self._modules, f"{name} is not registered." 21 | return self._modules[name] 22 | 23 | 24 | def build_from_cfg(cfg, registry, **kwargs): 25 | if isinstance(cfg, str): 26 | return registry.get(cfg)(**kwargs) 27 | elif isinstance(cfg, dict): 28 | args = cfg.copy() 29 | args.update(kwargs) 30 | obj_type = args.pop('type') 31 | obj_cls = registry.get(obj_type) 32 | try: 33 | module = obj_cls(**args) 34 | except TypeError as e: 35 | if "