├── .gitignore
├── LICENSE.txt
├── README.md
├── docs
    ├── config.md
    └── dataset_prepare.md
├── project
    ├── _base_
    │   ├── datasets
    │   │   ├── ade20k.py
    │   │   ├── ade20k_640x640.py
    │   │   ├── cityscapes_1024x1024.py
    │   │   ├── cityscapes_512x1024.py
    │   │   ├── cityscapes_769x769.py
    │   │   ├── isaid_869x869.py
    │   │   └── loveda.py
    │   ├── default_runtime.py
    │   └── schedules
    │   │   ├── schedule_160k.py
    │   │   ├── schedule_40k.py
    │   │   └── schedule_80k.py
    ├── ann
    │   ├── ann_r50-d8_512x512_ade20k_80k.py
    │   └── readme.md
    ├── beit
    │   ├── readme.md
    │   └── upernet_beit-base_8x2_640x640_160k_ade20k.py
    ├── ccnet
    │   ├── ccnet_r50-d8_512x512_ade20k_80k.py
    │   └── readme.md
    ├── clip_rc
    │   ├── bpe_simple_vocab_16e6.txt.gz
    │   ├── coco
    │   │   ├── clip_rc_fully_vit-b_512x512_80k_coco_stuff164k_100_16.py
    │   │   ├── clip_rc_zero_vit-b_512x512_40k_coco_stuff164k_100_16_st.py
    │   │   └── clip_rc_zero_vit-b_512x512_80k_coco_stuff164k_100_16.py
    │   ├── datasets
    │   │   ├── zero_cocostuff_512x512.py
    │   │   ├── zero_voc12_20_512x512.py
    │   │   └── zero_voc12_20_aug_512x512.py
    │   ├── readme.md
    │   ├── text_embedding
    │   │   ├── coco_multi.npy
    │   │   └── voc12_single.npy
    │   └── voc12
    │   │   ├── clip_rc_fully_vit-b_512x512_40k_voc_10_16.py
    │   │   ├── clip_rc_zero_vit-b_512x512_20k_voc_10_16_st.py
    │   │   └── clip_rc_zero_vit-b_512x512_40k_voc_10_16.py
    ├── convnext
    │   ├── readme.md
    │   └── upernet_convnext_base_512x512_ade20k_160k.py
    ├── danet
    │   ├── danet_r50-d8_512x512_ade20k_80k.py
    │   └── readme.md
    ├── deeplabv3
    │   ├── deeplabv3_r50-d8_512x512_ade20k_80k.py
    │   └── readme.md
    ├── deeplabv3plus
    │   ├── deeplabv3plus_r50-d8_512x512_ade20k_80k.py
    │   └── readme.md
    ├── eanet
    │   ├── eanet_r50-d8_769x769_cityscapes_40k.py
    │   └── readme.md
    ├── emanet
    │   ├── emanet_r50-d8_512x1024_cityscapes_80k.py
    │   └── readme.md
    ├── fcn
    │   ├── fcn_r50-d8_512x1024_cityscapes_80k.py
    │   ├── fcn_r50-d8_512x512_ade20k_80k.py
    │   ├── fcn_r50-d8_896x896_isaid_80k.py
    │   └── readme.md
    ├── gcnet
    │   ├── gcnet_r50-d8_512x512_ade20k_160k.py
    │   └── readme.md
    ├── mae
    │   ├── readme.md
    │   └── upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py
    ├── mobilenet_v2
    │   ├── fcn_m-v2-d8_512x512_ade20k_160k.py
    │   └── readme.md
    ├── nonlocal_net
    │   ├── nonlocal_r50-d8_512x512_ade20k_80k.py
    │   └── readme.md
    ├── point_rend
    │   ├── pointrend_r50_512x512_ade20k_160k.py
    │   └── readme.md
    ├── pspnet
    │   ├── pspnet_r50-d8_512x1024_cityscapes_80k.py
    │   ├── pspnet_r50-d8_512x512_ade20k_80k.py
    │   ├── pspnet_r50-d8_512x512_loveda_80k.py
    │   └── readme.md
    ├── resnest
    │   ├── pspnet_s101-d8_512x512_ade20k_80k.py
    │   └── readme.md
    ├── segformer
    │   ├── b0
    │   │   └── segformer_b0_512x512_ade_160k.py
    │   └── readme.md
    ├── segnext
    │   ├── base
    │   │   ├── segnext_base_1024x1024_cityscapes_160k.py
    │   │   ├── segnext_base_512x512_ade_160k.py
    │   │   └── segnext_base_896x896_isaid_160k.py
    │   ├── large
    │   │   ├── segnext_large_1024x1024_cityscapes_160k.py
    │   │   ├── segnext_large_512x512_ade_160k.py
    │   │   └── segnext_large_896x896_isaid_160k.py
    │   ├── readme.md
    │   ├── resources
    │   │   └── flops.png
    │   ├── small
    │   │   ├── segnext_small_1024x1024_cityscapes_160k.py
    │   │   ├── segnext_small_512x512_ade_160k.py
    │   │   └── segnext_small_896x896_isaid_160k.py
    │   └── tiny
    │   │   ├── segnext_tiny_1024x1024_cityscapes_160k.py
    │   │   ├── segnext_tiny_512x512_ade_160k.py
    │   │   └── segnext_tiny_896x896_isaid_160k.py
    ├── swin
    │   ├── readme.md
    │   └── tiny
    │   │   └── upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py
    ├── upernet
    │   ├── readme.md
    │   └── upernet_r50_512x512_ade20k_160k.py
    └── vit
    │   ├── readme.md
    │   └── upernet_vit-b16_ln_mln_512x512_ade20k_160k.py
├── python
    └── jseg
    │   ├── __init__.py
    │   ├── bricks
    │       ├── __init__.py
    │       ├── activation.py
    │       ├── conv.py
    │       ├── conv_module.py
    │       ├── depthwise_separable_conv_module.py
    │       ├── drop.py
    │       ├── norm.py
    │       └── padding.py
    │   ├── config
    │       ├── __init__.py
    │       └── config.py
    │   ├── datasets
    │       ├── __init__.py
    │       ├── ade.py
    │       ├── cityscapes.py
    │       ├── custom.py
    │       ├── isaid.py
    │       ├── isprs.py
    │       ├── loveda.py
    │       ├── pipelines
    │       │   ├── __init__.py
    │       │   ├── compose.py
    │       │   ├── formating.py
    │       │   ├── loading.py
    │       │   ├── test_time_aug.py
    │       │   ├── transforms.py
    │       │   └── utils.py
    │       ├── potsdam.py
    │       ├── voc.py
    │       ├── zero_coco_stuff.py
    │       └── zero_voc12.py
    │   ├── models
    │       ├── __init__.py
    │       ├── backbones
    │       │   ├── __init__.py
    │       │   ├── beit.py
    │       │   ├── clip_encoder_rlb.py
    │       │   ├── clip_text_encoder.py
    │       │   ├── convnext.py
    │       │   ├── mae.py
    │       │   ├── mix_transformer.py
    │       │   ├── mobilenet_v2.py
    │       │   ├── mscan.py
    │       │   ├── resnest.py
    │       │   ├── resnet.py
    │       │   ├── resnext.py
    │       │   ├── swin.py
    │       │   └── vit.py
    │       ├── decode_heads
    │       │   ├── __init__.py
    │       │   ├── ann_head.py
    │       │   ├── aspp_head.py
    │       │   ├── cascade_decode_head.py
    │       │   ├── cc_head.py
    │       │   ├── cliprc_head.py
    │       │   ├── da_head.py
    │       │   ├── decode_head.py
    │       │   ├── ea_head.py
    │       │   ├── ema_head.py
    │       │   ├── fcn_head.py
    │       │   ├── fpn_head.py
    │       │   ├── gc_head.py
    │       │   ├── ham_head.py
    │       │   ├── nl_head.py
    │       │   ├── point_head.py
    │       │   ├── psp_head.py
    │       │   ├── segformer_head.py
    │       │   ├── sep_aspp_head.py
    │       │   └── uper_head.py
    │       ├── losses
    │       │   ├── __init__.py
    │       │   ├── accuracy.py
    │       │   ├── cross_entropy_loss.py
    │       │   └── utils.py
    │       ├── necks
    │       │   ├── __init__.py
    │       │   ├── featurepyramid.py
    │       │   ├── fpn.py
    │       │   └── multilevel_neck.py
    │       ├── segmentors
    │       │   ├── __init__.py
    │       │   ├── base.py
    │       │   ├── cascade_encoder_decoder.py
    │       │   ├── clip_rc.py
    │       │   └── encoder_decoder.py
    │       └── utils
    │       │   ├── embed.py
    │       │   ├── inverted_residual.py
    │       │   └── se_layer.py
    │   ├── ops
    │       ├── __init__.py
    │       ├── cc_attention.py
    │       ├── cliprc_ops.py
    │       ├── context_block.py
    │       ├── external_attention.py
    │       ├── mha.py
    │       ├── multi_head_attention.py
    │       ├── non_local.py
    │       ├── scale.py
    │       ├── self_attention_block.py
    │       └── wrappers.py
    │   ├── optims
    │       ├── __init__.py
    │       ├── lr_decay_parameter_groups_generator.py
    │       ├── lr_scheduler.py
    │       ├── optimizer.py
    │       └── prameter_groups_generator.py
    │   ├── runner
    │       ├── __init__.py
    │       └── runner.py
    │   ├── sampler
    │       ├── __init__.py
    │       ├── base_pixel_sampler.py
    │       └── ohem_pixel_sampler.py
    │   └── utils
    │       ├── __init__.py
    │       ├── general.py
    │       ├── helpers.py
    │       ├── inference.py
    │       ├── logger.py
    │       ├── metrics.py
    │       ├── registry.py
    │       ├── tokenizer.py
    │       ├── visualize.py
    │       └── weight_init.py
├── requirements.txt
├── setup.py
└── tools
    ├── convert_datasets
        ├── cityscapes.py
        ├── isaid.py
        ├── loveda.py
        ├── potsdam.py
        ├── vaihingen.py
        └── voc_aug.py
    ├── demo.py
    └── run_net.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | .vscode
107 | .idea
108 | 
109 | # custom
110 | *.pkl
111 | *.pkl.json
112 | *.log.json
113 | work_dirs/
114 | work_dirs
115 | pretrained
116 | pretrained/
117 | # Pytorch
118 | *.pth
119 | trash/
120 | trash
121 | test_img/
122 | events*


--------------------------------------------------------------------------------
/docs/config.md:
--------------------------------------------------------------------------------
 1 | # How to use configs in JSeg
 2 | ## Basic usages
 3 | ### .py configuration files
 4 | You can do some easy computation in the .py configuration file:
 5 | ```python
 6 | # cfg.py
 7 | import os
 8 | exp_id = 1
 9 | # path setting
10 | output_path = 'experiments'
11 | root_path = os.path.join(output_path, str(exp_id))
12 | log_path = os.path.join(root_path, 'logs')
13 | 
14 | # easy calculation
15 | gpus = [0,1,2,3]
16 | n_gpus = len(gpus)
17 | batch_size = 16
18 | base_lr = batch_size * 0.001
19 | 
20 | # model setting
21 | model = {
22 |     'type': 'Resnet50',
23 |     'return_stages': = ['layer1','layer2','layer3','layer4'],
24 |     'pretrained': True
25 | }
26 | ```
27 | You can load .py configuration file as load .yaml configuration file:
28 | ```python
29 | # main.py
30 | from jseg.config import init_cfg
31 | init_cfg('cfg.py')
32 | ```
33 | 
34 | Please refer to `[ROOT]/python/jseg/config/config.py` for more details.


--------------------------------------------------------------------------------
/project/_base_/datasets/ade20k.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'ADE20KDataset'
 2 | data_root = 'datasets/ADEChallengeData2016/'
 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 4 |                     std=[58.395, 57.12, 57.375],
 5 |                     to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | dataset = dict(
35 |     train=dict(type=dataset_type,
36 |                batch_size=16,
37 |                num_workers=8,
38 |                shuffle=True,
39 |                drop_last=False,
40 |                data_root=data_root,
41 |                img_dir='images/training',
42 |                ann_dir='annotations/training',
43 |                pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         # Fixed to one
47 |         batch_size=1,
48 |         num_workers=1,
49 |         shuffle=False,
50 |         drop_last=False,
51 |         data_root=data_root,
52 |         img_dir='images/validation',
53 |         ann_dir='annotations/validation',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/project/_base_/datasets/ade20k_640x640.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'ADE20KDataset'
 2 | data_root = 'datasets/ADEChallengeData2016/'
 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 4 |                     std=[58.395, 57.12, 57.375],
 5 |                     to_rgb=True)
 6 | crop_size = (640, 640)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2560, 640),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | dataset = dict(
35 |     train=dict(type=dataset_type,
36 |                batch_size=16,
37 |                num_workers=8,
38 |                shuffle=True,
39 |                drop_last=False,
40 |                data_root=data_root,
41 |                img_dir='images/training',
42 |                ann_dir='annotations/training',
43 |                pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         # Fixed to one
47 |         batch_size=1,
48 |         num_workers=1,
49 |         shuffle=False,
50 |         drop_last=False,
51 |         data_root=data_root,
52 |         img_dir='images/validation',
53 |         ann_dir='annotations/validation',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/project/_base_/datasets/cityscapes_1024x1024.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesDataset'
 2 | data_root = 'datasets/cityscapes/'
 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 4 |                     std=[58.395, 57.12, 57.375],
 5 |                     to_rgb=True)
 6 | crop_size = (1024, 1024)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 1024),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | dataset = dict(
35 |     train=dict(type=dataset_type,
36 |                batch_size=8,
37 |                num_workers=8,
38 |                shuffle=True,
39 |                drop_last=False,
40 |                data_root=data_root,
41 |                img_dir='leftImg8bit/train',
42 |                ann_dir='gtFine/train',
43 |                pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         # Fixed to one
47 |         batch_size=1,
48 |         num_workers=1,
49 |         shuffle=False,
50 |         drop_last=False,
51 |         data_root=data_root,
52 |         img_dir='leftImg8bit/val',
53 |         ann_dir='gtFine/val',
54 |         pipeline=test_pipeline),
55 |     test=dict(
56 |         type=dataset_type,
57 |         # Fixed to one
58 |         batch_size=1,
59 |         num_workers=1,
60 |         shuffle=False,
61 |         drop_last=False,
62 |         data_root=data_root,
63 |         img_dir='leftImg8bit/val',
64 |         ann_dir='gtFine/val',
65 |         pipeline=test_pipeline))
66 | 


--------------------------------------------------------------------------------
/project/_base_/datasets/cityscapes_512x1024.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesDataset'
 2 | data_root = 'datasets/cityscapes/'
 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 4 |                     std=[58.395, 57.12, 57.375],
 5 |                     to_rgb=True)
 6 | crop_size = (512, 1024)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 1024),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | dataset = dict(
35 |     train=dict(type=dataset_type,
36 |                batch_size=8,
37 |                num_workers=8,
38 |                shuffle=True,
39 |                drop_last=False,
40 |                data_root=data_root,
41 |                img_dir='leftImg8bit/train',
42 |                ann_dir='gtFine/train',
43 |                pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         # Fixed to one
47 |         batch_size=1,
48 |         num_workers=1,
49 |         shuffle=False,
50 |         drop_last=False,
51 |         data_root=data_root,
52 |         img_dir='leftImg8bit/val',
53 |         ann_dir='gtFine/val',
54 |         pipeline=test_pipeline),
55 |     test=dict(
56 |         type=dataset_type,
57 |         # Fixed to one
58 |         batch_size=1,
59 |         num_workers=1,
60 |         shuffle=False,
61 |         drop_last=False,
62 |         data_root=data_root,
63 |         img_dir='leftImg8bit/val',
64 |         ann_dir='gtFine/val',
65 |         pipeline=test_pipeline))


--------------------------------------------------------------------------------
/project/_base_/datasets/cityscapes_769x769.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesDataset'
 2 | data_root = 'datasets/cityscapes/'
 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 4 |                     std=[58.395, 57.12, 57.375],
 5 |                     to_rgb=True)
 6 | crop_size = (769, 769)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 1025),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | dataset = dict(
35 |     train=dict(type=dataset_type,
36 |                batch_size=8,
37 |                num_workers=8,
38 |                shuffle=True,
39 |                drop_last=False,
40 |                data_root=data_root,
41 |                img_dir='leftImg8bit/train',
42 |                ann_dir='gtFine/train',
43 |                pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         # Fixed to one
47 |         batch_size=1,
48 |         num_workers=1,
49 |         shuffle=False,
50 |         drop_last=False,
51 |         data_root=data_root,
52 |         img_dir='leftImg8bit/val',
53 |         ann_dir='gtFine/val',
54 |         pipeline=test_pipeline),
55 |     test=dict(
56 |         type=dataset_type,
57 |         # Fixed to one
58 |         batch_size=1,
59 |         num_workers=1,
60 |         shuffle=False,
61 |         drop_last=False,
62 |         data_root=data_root,
63 |         img_dir='leftImg8bit/val',
64 |         ann_dir='gtFine/val',
65 |         pipeline=test_pipeline))
66 | 


--------------------------------------------------------------------------------
/project/_base_/datasets/isaid_869x869.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'iSAIDDataset'
 2 | data_root = 'datasets/iSAID_Patches'
 3 | 
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 5 |                     std=[58.395, 57.12, 57.375],
 6 |                     to_rgb=True)
 7 | 
 8 | crop_size = (896, 896)
 9 | 
10 | train_pipeline = [
11 |     dict(type='LoadImageFromFile'),
12 |     dict(type='LoadAnnotations'),
13 |     dict(type='Resize', img_scale=(896, 896), ratio_range=(0.5, 2.0)),
14 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15 |     dict(type='RandomFlip', prob=0.5),
16 |     dict(type='PhotoMetricDistortion'),
17 |     dict(type='Normalize', **img_norm_cfg),
18 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19 |     dict(type='DefaultFormatBundle'),
20 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21 | ]
22 | test_pipeline = [
23 |     dict(type='LoadImageFromFile'),
24 |     dict(
25 |         type='MultiScaleFlipAug',
26 |         img_scale=(896, 896),
27 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='ImageToTensor', keys=['img']),
34 |             dict(type='Collect', keys=['img']),
35 |         ])
36 | ]
37 | 
38 | dataset = dict(
39 |     train=dict(type=dataset_type,
40 |                batch_size=16,
41 |                num_workers=8,
42 |                shuffle=True,
43 |                drop_last=False,
44 |                data_root=data_root,
45 |                img_dir='train/images',
46 |                ann_dir='train/Semantic_masks',
47 |                pipeline=train_pipeline),
48 |     val=dict(
49 |         type=dataset_type,
50 |         # Fixed to one
51 |         batch_size=1,
52 |         num_workers=1,
53 |         shuffle=False,
54 |         drop_last=False,
55 |         data_root=data_root,
56 |         img_dir='val/images',
57 |         ann_dir='val/Semantic_masks',
58 |         pipeline=test_pipeline))
59 | 


--------------------------------------------------------------------------------
/project/_base_/datasets/loveda.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'LoveDADataset'
 2 | data_root = 'dataset/LoveDA'
 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 4 |                     std=[58.395, 57.12, 57.375],
 5 |                     to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations', reduce_zero_label=True),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(1024, 1024),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | dataset = dict(
35 |     train=dict(type=dataset_type,
36 |                batch_size=16,
37 |                num_workers=8,
38 |                shuffle=True,
39 |                drop_last=False,
40 |                data_root=data_root,
41 |                img_dir='img_dir/train',
42 |                ann_dir='ann_dir/train',
43 |                pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         # Fixed to one
47 |         batch_size=1,
48 |         num_workers=1,
49 |         shuffle=False,
50 |         drop_last=False,
51 |         data_root=data_root,
52 |         img_dir='img_dir/val',
53 |         ann_dir='ann_dir/val',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/project/_base_/default_runtime.py:
--------------------------------------------------------------------------------
1 | logger = dict(type="RunLogger")
2 | log_interval = 50


--------------------------------------------------------------------------------
/project/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
 1 | max_iter = 160000
 2 | eval_interval = 8000
 3 | checkpoint_interval = 8000
 4 | # optimizer
 5 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 6 | # scheduler
 7 | scheduler = dict(type='PolyLR',
 8 |                  max_steps=max_iter,
 9 |                  power=0.9,
10 |                  min_lr=1e-4)
11 | 


--------------------------------------------------------------------------------
/project/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
 1 | max_iter = 40000
 2 | eval_interval = 2000
 3 | checkpoint_interval = 2000
 4 | # optimizer
 5 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 6 | # scheduler
 7 | scheduler = dict(type='PolyLR',
 8 |                  max_steps=max_iter,
 9 |                  power=0.9,
10 |                  min_lr=1e-4)
11 | 


--------------------------------------------------------------------------------
/project/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
 1 | max_iter = 80000
 2 | eval_interval = 4000
 3 | checkpoint_interval = 4000
 4 | # optimizer
 5 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 6 | # scheduler
 7 | scheduler = dict(type='PolyLR',
 8 |                  max_steps=max_iter,
 9 |                  power=0.9,
10 |                  min_lr=1e-4)
11 | 


--------------------------------------------------------------------------------
/project/ann/ann_r50-d8_512x512_ade20k_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | # model settings
 6 | norm_cfg = dict(type='BN')
 7 | model = dict(
 8 |     type='EncoderDecoder',
 9 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
10 |     backbone=dict(type='ResNetV1c',
11 |                   depth=50,
12 |                   num_stages=4,
13 |                   out_indices=(0, 1, 2, 3),
14 |                   dilations=(1, 1, 2, 4),
15 |                   strides=(1, 2, 1, 1),
16 |                   norm_cfg=norm_cfg,
17 |                   norm_eval=False,
18 |                   contract_dilation=True),
19 |     decode_head=dict(type='ANNHead',
20 |                      in_channels=[1024, 2048],
21 |                      in_index=[2, 3],
22 |                      channels=512,
23 |                      project_channels=256,
24 |                      query_scales=(1, ),
25 |                      key_pool_scales=(1, 3, 6, 8),
26 |                      dropout_ratio=0.1,
27 |                      num_classes=150,
28 |                      norm_cfg=norm_cfg,
29 |                      align_corners=False,
30 |                      loss_decode=dict(type='CrossEntropyLoss',
31 |                                       use_sigmoid=False,
32 |                                       loss_weight=1.0)),
33 |     auxiliary_head=dict(type='FCNHead',
34 |                         in_channels=1024,
35 |                         in_index=2,
36 |                         channels=256,
37 |                         num_convs=1,
38 |                         concat_input=False,
39 |                         dropout_ratio=0.1,
40 |                         num_classes=150,
41 |                         norm_cfg=norm_cfg,
42 |                         align_corners=False,
43 |                         loss_decode=dict(type='CrossEntropyLoss',
44 |                                          use_sigmoid=False,
45 |                                          loss_weight=0.4)),
46 |     # model training and testing settings
47 |     train_cfg=dict(),
48 |     test_cfg=dict(mode='whole'))
49 | 


--------------------------------------------------------------------------------
/project/ann/readme.md:
--------------------------------------------------------------------------------
1 | # ANN


--------------------------------------------------------------------------------
/project/beit/readme.md:
--------------------------------------------------------------------------------
1 | # BEiT


--------------------------------------------------------------------------------
/project/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py'
 3 | ]
 4 | 
 5 | norm_cfg = dict(type='BN')
 6 | model = dict(
 7 |     type='EncoderDecoder',
 8 |     pretrained='jittorhub://beit_base_patch16_224_pt22k_ft22k.pkl',
 9 |     backbone=dict(type='BEiT',
10 |                   img_size=(640, 640),
11 |                   patch_size=16,
12 |                   in_channels=3,
13 |                   embed_dims=768,
14 |                   num_layers=12,
15 |                   num_heads=12,
16 |                   mlp_ratio=4,
17 |                   out_indices=(3, 5, 7, 11),
18 |                   qv_bias=True,
19 |                   attn_drop_rate=0.0,
20 |                   drop_path_rate=0.1,
21 |                   norm_cfg=dict(type='LN', eps=1e-6),
22 |                   act_cfg=dict(type='GELU'),
23 |                   norm_eval=False,
24 |                   init_values=0.1),
25 |     neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
26 |     decode_head=dict(type='UPerHead',
27 |                      in_channels=[768, 768, 768, 768],
28 |                      in_index=[0, 1, 2, 3],
29 |                      pool_scales=(1, 2, 3, 6),
30 |                      channels=768,
31 |                      dropout_ratio=0.1,
32 |                      num_classes=150,
33 |                      norm_cfg=norm_cfg,
34 |                      align_corners=False,
35 |                      loss_decode=dict(type='CrossEntropyLoss',
36 |                                       use_sigmoid=False,
37 |                                       loss_weight=1.0)),
38 |     auxiliary_head=dict(type='FCNHead',
39 |                         in_channels=768,
40 |                         in_index=2,
41 |                         channels=256,
42 |                         num_convs=1,
43 |                         concat_input=False,
44 |                         dropout_ratio=0.1,
45 |                         num_classes=150,
46 |                         norm_cfg=norm_cfg,
47 |                         align_corners=False,
48 |                         loss_decode=dict(type='CrossEntropyLoss',
49 |                                          use_sigmoid=False,
50 |                                          loss_weight=0.4)),
51 |     # model training and testing settings
52 |     train_cfg=dict(),
53 |     test_cfg=dict(mode='slide', crop_size=(640, 640), stride=(426, 426)))
54 | 
55 | parameter_groups_generator = dict(type="LRDecayParameterGroupsGenerator",
56 |                                   paramwise_cfg=dict(num_layers=12,
57 |                                                      decay_rate=0.9))
58 | 
59 | optimizer = dict(
60 |     type='CustomAdamW',
61 |     lr=3e-5,
62 |     betas=(0.9, 0.999),
63 |     weight_decay=0.05,
64 | )
65 | 
66 | max_iter = 160000
67 | eval_interval = 8000
68 | checkpoint_interval = 8000
69 | 
70 | scheduler = dict(type='PolyLR',
71 |                  warmup='linear',
72 |                  warmup_iters=1500,
73 |                  warmup_ratio=1e-6,
74 |                  max_steps=max_iter,
75 |                  power=1.0,
76 |                  min_lr=0)
77 | 


--------------------------------------------------------------------------------
/project/ccnet/ccnet_r50-d8_512x512_ade20k_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='CCHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      recurrence=2,
25 |                      dropout_ratio=0.1,
26 |                      num_classes=150,
27 |                      norm_cfg=norm_cfg,
28 |                      align_corners=False,
29 |                      loss_decode=dict(type='CrossEntropyLoss',
30 |                                       use_sigmoid=False,
31 |                                       loss_weight=1.0)),
32 |     auxiliary_head=dict(type='FCNHead',
33 |                         in_channels=1024,
34 |                         in_index=2,
35 |                         channels=256,
36 |                         num_convs=1,
37 |                         concat_input=False,
38 |                         dropout_ratio=0.1,
39 |                         num_classes=150,
40 |                         norm_cfg=norm_cfg,
41 |                         align_corners=False,
42 |                         loss_decode=dict(type='CrossEntropyLoss',
43 |                                          use_sigmoid=False,
44 |                                          loss_weight=0.4)),
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/project/ccnet/readme.md:
--------------------------------------------------------------------------------
1 | # CCNet


--------------------------------------------------------------------------------
/project/clip_rc/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/clip_rc/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/project/clip_rc/datasets/zero_cocostuff_512x512.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'ZeroCOCOStuffDataset'
 2 | data_root = '/home/zy/datasets/coco_stuff164k'
 3 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 4 |                     std=[58.395, 57.12, 57.375],
 5 |                     to_rgb=True)
 6 | crop_size = (512, 512)
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadAnnotations'),
10 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12 |     dict(type='RandomFlip', prob=0.5),
13 |     dict(type='PhotoMetricDistortion'),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(2048, 512),
24 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True, min_size=512),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | dataset = dict(
35 |     train=dict(type=dataset_type,
36 |                batch_size=16,
37 |                num_workers=8,
38 |                shuffle=True,
39 |                drop_last=False,
40 |                data_root=data_root,
41 |                img_dir='images/train2017',
42 |                ann_dir='annotations/train2017',
43 |                pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         # Fixed to one
47 |         batch_size=1,
48 |         num_workers=1,
49 |         shuffle=False,
50 |         drop_last=False,
51 |         data_root=data_root,
52 |         img_dir='images/val2017',
53 |         ann_dir='annotations/val2017',
54 |         pipeline=test_pipeline))
55 | 


--------------------------------------------------------------------------------
/project/clip_rc/datasets/zero_voc12_20_512x512.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'ZeroPascalVOCDataset20'
 3 | data_root = '/home/zy/datasets/pascal_voc/VOC2012'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
 5 |                     std=[58.395, 57.12, 57.375],
 6 |                     to_rgb=True)
 7 | 
 8 | crop_size = (512, 512)
 9 | 
10 | train_pipeline = [
11 |     dict(type='LoadImageFromFile'),
12 |     dict(type='LoadAnnotations', reduce_zero_label=True),
13 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
14 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15 |     dict(type='RandomFlip', prob=0.5),
16 |     dict(type='PhotoMetricDistortion'),
17 |     dict(type='Normalize', **img_norm_cfg),
18 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19 |     dict(type='DefaultFormatBundle'),
20 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21 | ]
22 | 
23 | test_pipeline = [
24 |     dict(type='LoadImageFromFile'),
25 |     dict(
26 |         type='MultiScaleFlipAug',
27 |         img_scale=(2048, 512),
28 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
29 |         flip=False,
30 |         transforms=[
31 |             dict(type='Resize', keep_ratio=True, min_size=512),
32 |             dict(type='RandomFlip'),
33 |             dict(type='Normalize', **img_norm_cfg),
34 |             dict(type='ImageToTensor', keys=['img']),
35 |             dict(type='Collect', keys=['img']),
36 |         ])
37 | ]
38 | 
39 | dataset = dict(
40 |     train=dict(type=dataset_type,
41 |                batch_size=4,
42 |                num_workers=8,
43 |                shuffle=True,
44 |                drop_last=False,
45 |                data_root=data_root,
46 |                img_dir='JPEGImages',
47 |                ann_dir='SegmentationClass',
48 |                split='ImageSets/Segmentation/train.txt',
49 |                pipeline=train_pipeline),
50 |     val=dict(
51 |         type=dataset_type,
52 |         # Fixed to one
53 |         batch_size=1,
54 |         num_workers=1,
55 |         shuffle=False,
56 |         drop_last=False,
57 |         data_root=data_root,
58 |         img_dir='JPEGImages',
59 |         ann_dir='SegmentationClass',
60 |         split='ImageSets/Segmentation/val.txt',
61 |         pipeline=test_pipeline))
62 | 


--------------------------------------------------------------------------------
/project/clip_rc/datasets/zero_voc12_20_aug_512x512.py:
--------------------------------------------------------------------------------
1 | _base_ = './zero_voc12_20_512x512.py'
2 | # dataset settings, merge voc12 and voc12aug
3 | dataset = dict(train=dict(ann_dir='SegmentationClassAug',
4 |                           split='ImageSets/Segmentation/trainaug.txt')) # merge voc12 and voc12aug
5 | 


--------------------------------------------------------------------------------
/project/clip_rc/readme.md:
--------------------------------------------------------------------------------
 1 | # Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation (CVPR 2024)
 2 | 
 3 | 
 4 | The repository contains official Jittor implementations of the paper: Exploring Regional Clues in CLIP for Zero-Shot Semantic Segmentation. 
 5 | 
 6 | The paper is in [**Here**](https://openaccess.thecvf.com/content/CVPR2024/papers/Zhang_Exploring_Regional_Clues_in_CLIP_for_Zero-Shot_Semantic_Segmentation_CVPR_2024_paper.pdf).
 7 | 
 8 | **Notes**: CLIP-ViT-B-16 Pre-trained models can be found in [there](https://bhpan.buaa.edu.cn/link/AA95601A0FBCA5403485078A0160952FEC)
 9 | 
10 | ## Pretrained models
11 | 
12 | |     Dataset     |   Setting    |  pAcc | mIoU(S) | mIoU(U) | hIoU |                           Model Zoo                           |
13 | | :-------------: | :---------:  | :---: | :-----: | :-----: | :--: |  :----------------------------------------------------------: |
14 | | PASCAL VOC 2012 |  Inductive   |  95.8 |   92.8  |   84.4  | 88.4 | [[Drive](https://bhpan.buaa.edu.cn/link/AA10306CBF37904DDCB835F3BE2D7B1C15)] |
15 | | PASCAL VOC 2012 | Transductive |  97.0 |   93.9  |   92.2  | 93.0 | [[Drive](https://bhpan.buaa.edu.cn/link/AAE085202961AF45CD957E9F98BB7449FB)] |
16 | | PASCAL VOC 2012 |    Fully     |  97.1 |   94.1  |   93.4  | 93.7 | [[Drive](https://bhpan.buaa.edu.cn/link/AAA98108D9C3DD408C82B42EC206DD95DD)] |
17 | | COCO Stuff 164K |  Inductive   |  63.1 |   40.9  |   41.6  | 41.2 | [[Drive](https://bhpan.buaa.edu.cn/link/AA12C2F1BBA0804EC6820A8CB160062091)]|
18 | | COCO Stuff 164K | Transductive |  69.9 |   42.0  |   60.8  | 49.7 | [[Drive](https://bhpan.buaa.edu.cn/link/AA492DE7FE832E43D299C221931127CB1D)]|
19 | | COCO Stuff 164K |    Fully     |  70.8 |   42.9  |   64.1  | 51.4 | [[Drive](https://bhpan.buaa.edu.cn/link/AACE6B7E6F7DED41FDA09AF4CB308F4E2A)] |
20 | 


--------------------------------------------------------------------------------
/project/clip_rc/text_embedding/coco_multi.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/clip_rc/text_embedding/coco_multi.npy


--------------------------------------------------------------------------------
/project/clip_rc/text_embedding/voc12_single.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/clip_rc/text_embedding/voc12_single.npy


--------------------------------------------------------------------------------
/project/clip_rc/voc12/clip_rc_fully_vit-b_512x512_40k_voc_10_16.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../datasets/zero_voc12_20_aug_512x512.py',
  3 |     '../../_base_/default_runtime.py'
  4 | ]
  5 | 
  6 | img_size = 512
  7 | in_channels = 512
  8 | out_indices = [11]
  9 | 
 10 | region_level_bridge_size = 16
 11 | base_class = [
 12 |     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
 13 | ]
 14 | novel_class = []
 15 | both_class = base_class
 16 | 
 17 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
 18 |            'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
 19 |            'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
 20 | 
 21 | pretrained = 'ViT-B-16.pkl'
 22 | 
 23 | model = dict(
 24 |     type='CLIPRC',
 25 |     pretrained=pretrained,
 26 |     pretrained_text=pretrained,
 27 |     class_names=CLASSES,
 28 |     backbone=dict(
 29 |         type='CLIPVisionTransformerWithRLB',
 30 |         patch_size=16,
 31 |         width=768,
 32 |         output_dim=512,
 33 |         get_embeddings=True,
 34 |         drop_path_rate=0.1,
 35 |         layers=12,
 36 |         input_resolution=img_size,
 37 |         out_indices=out_indices,
 38 |         # setting of vpt
 39 |         num_tokens=10,
 40 |         prompt_dim=768,
 41 |         total_d_layer=11,
 42 |         # setting of RLB
 43 |         region_level_bridge_size=region_level_bridge_size),
 44 |     text_encoder=dict(type='CLIPTextEncoder',
 45 |                       context_length=77,
 46 |                       embed_dim=512,
 47 |                       transformer_width=512,
 48 |                       transformer_heads=8,
 49 |                       transformer_layers=12),
 50 |     decode_head=dict(
 51 |         type='ATMSingleHeadSeg',
 52 |         img_size=img_size,
 53 |         in_channels=in_channels,
 54 |         seen_idx=base_class,
 55 |         all_idx=both_class,
 56 |         channels=in_channels,
 57 |         num_layers=3,
 58 |         num_classes=len(both_class),  # useless, to decode_head,
 59 |         num_heads=8,
 60 |         use_proj=False,
 61 |         use_stages=len(out_indices),
 62 |         embed_dims=in_channels),
 63 |     test_cfg=dict(mode='slide',
 64 |                   crop_size=(img_size, img_size),
 65 |                   stride=(426, 426)),
 66 |     base_class=base_class,
 67 |     novel_class=novel_class,
 68 |     both_class=both_class,
 69 |     ft_backbone=False,
 70 |     exclude_key='prompt',
 71 |     load_text_embedding='project/clip_rc/text_embedding/voc12_single.npy')
 72 | 
 73 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
 74 |                                   custom_keys={
 75 |                                       'backbone': dict(lr_mult=10.0),
 76 |                                       'text_encoder': dict(lr_mult=0.0),
 77 |                                       'norm': dict(decay_mult=0.),
 78 |                                       'ln': dict(decay_mult=0.),
 79 |                                       'head': dict(lr_mult=10.),
 80 |                                   })
 81 | 
 82 | optimizer = dict(
 83 |     type='CustomAdamW',
 84 |     lr=0.00002,
 85 |     betas=(0.9, 0.999),
 86 |     weight_decay=0.01,
 87 | )
 88 | 
 89 | max_iter = 40000
 90 | eval_interval = 2000
 91 | checkpoint_interval = 2000
 92 | 
 93 | scheduler = dict(type='PolyLR',
 94 |                  warmup='linear',
 95 |                  warmup_iters=1500,
 96 |                  warmup_ratio=1e-6,
 97 |                  max_steps=max_iter,
 98 |                  power=0.9,
 99 |                  min_lr=1e-6)
100 | 


--------------------------------------------------------------------------------
/project/clip_rc/voc12/clip_rc_zero_vit-b_512x512_40k_voc_10_16.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../datasets/zero_voc12_20_aug_512x512.py',
  3 |     '../../_base_/default_runtime.py'
  4 | ]
  5 | 
  6 | img_size = 512
  7 | in_channels = 512
  8 | out_indices = [11]
  9 | 
 10 | region_level_bridge_size = 16
 11 | 
 12 | base_class = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
 13 | novel_class = [15, 16, 17, 18, 19]
 14 | both_class = [
 15 |     0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
 16 | ]
 17 | 
 18 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
 19 |            'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
 20 |            'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
 21 | 
 22 | pretrained = 'ViT-B-16.pkl'
 23 | 
 24 | model = dict(
 25 |     type='CLIPRC',
 26 |     pretrained=pretrained,
 27 |     pretrained_text=pretrained,
 28 |     class_names=CLASSES,
 29 |     backbone=dict(
 30 |         type='CLIPVisionTransformerWithRLB',
 31 |         patch_size=16,
 32 |         width=768,
 33 |         output_dim=512,
 34 |         get_embeddings=True,
 35 |         drop_path_rate=0.1,
 36 |         layers=12,
 37 |         input_resolution=img_size,
 38 |         out_indices=out_indices,
 39 |         # setting of vpt
 40 |         num_tokens=10,
 41 |         prompt_dim=768,
 42 |         total_d_layer=11,
 43 |         # setting of RLB
 44 |         region_level_bridge_size=region_level_bridge_size),
 45 |     text_encoder=dict(type='CLIPTextEncoder',
 46 |                       context_length=77,
 47 |                       embed_dim=512,
 48 |                       transformer_width=512,
 49 |                       transformer_heads=8,
 50 |                       transformer_layers=12),
 51 |     decode_head=dict(
 52 |         type='ATMSingleHeadSeg',
 53 |         img_size=img_size,
 54 |         in_channels=in_channels,
 55 |         seen_idx=base_class,
 56 |         all_idx=both_class,
 57 |         channels=in_channels,
 58 |         num_layers=3,
 59 |         num_classes=len(base_class),  # useless, to decode_head
 60 |         num_heads=8,
 61 |         use_proj=False,
 62 |         use_stages=len(out_indices),
 63 |         embed_dims=in_channels),
 64 |     test_cfg=dict(mode='slide',
 65 |                   crop_size=(img_size, img_size),
 66 |                   stride=(426, 426)),
 67 |     base_class=base_class,
 68 |     novel_class=novel_class,
 69 |     both_class=both_class,
 70 |     ft_backbone=False,
 71 |     exclude_key='prompt',
 72 |     load_text_embedding='project/clip_rc/text_embedding/voc12_single.npy')
 73 | 
 74 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
 75 |                                   custom_keys={
 76 |                                       'backbone': dict(lr_mult=10.0),
 77 |                                       'text_encoder': dict(lr_mult=0.0),
 78 |                                       'norm': dict(decay_mult=0.),
 79 |                                       'ln': dict(decay_mult=0.),
 80 |                                       'head': dict(lr_mult=10.),
 81 |                                   })
 82 | 
 83 | optimizer = dict(
 84 |     type='CustomAdamW',
 85 |     lr=0.00002,
 86 |     betas=(0.9, 0.999),
 87 |     weight_decay=0.01,
 88 | )
 89 | 
 90 | max_iter = 40000
 91 | eval_interval = 2000
 92 | checkpoint_interval = 2000
 93 | 
 94 | scheduler = dict(type='PolyLR',
 95 |                  warmup='linear',
 96 |                  warmup_iters=1500,
 97 |                  warmup_ratio=1e-6,
 98 |                  max_steps=max_iter,
 99 |                  power=0.9,
100 |                  min_lr=1e-6)
101 | 


--------------------------------------------------------------------------------
/project/convnext/readme.md:
--------------------------------------------------------------------------------
1 | # ConvNeXt


--------------------------------------------------------------------------------
/project/convnext/upernet_convnext_base_512x512_ade20k_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | model = dict(
 8 |     type='EncoderDecoder',
 9 |     pretrained=
10 |     'jittorhub://convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pkl',
11 |     backbone=dict(type='ConvNeXt',
12 |                   arch='base',
13 |                   out_indices=[0, 1, 2, 3],
14 |                   drop_path_rate=0.4,
15 |                   layer_scale_init_value=1.0,
16 |                   gap_before_final_norm=False),
17 |     decode_head=dict(type='UPerHead',
18 |                      in_channels=[128, 256, 512, 1024],
19 |                      in_index=[0, 1, 2, 3],
20 |                      pool_scales=(1, 2, 3, 6),
21 |                      channels=512,
22 |                      dropout_ratio=0.1,
23 |                      num_classes=150,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0)),
28 |     auxiliary_head=dict(type='FCNHead',
29 |                         in_channels=512,
30 |                         in_index=2,
31 |                         channels=256,
32 |                         num_convs=1,
33 |                         concat_input=False,
34 |                         dropout_ratio=0.1,
35 |                         num_classes=150,
36 |                         align_corners=False,
37 |                         loss_decode=dict(type='CrossEntropyLoss',
38 |                                          use_sigmoid=False,
39 |                                          loss_weight=0.4)),
40 |     # model training and testing settings
41 |     train_cfg=dict(),
42 |     test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)))
43 | 
44 | parameter_groups_generator = dict(type="LRDecayParameterGroupsGenerator",
45 |                                   paramwise_cfg={
46 |                                       'decay_rate': 0.9,
47 |                                       'decay_type': 'stage_wise',
48 |                                       'num_layers': 12
49 |                                   })
50 | 
51 | optimizer = dict(
52 |     type='CustomAdamW',
53 |     lr=0.0001,
54 |     betas=(0.9, 0.999),
55 |     weight_decay=0.05,
56 | )
57 | 
58 | max_iter = 160000
59 | scheduler = dict(type='PolyLR',
60 |                  warmup='linear',
61 |                  warmup_iters=1500,
62 |                  warmup_ratio=1e-6,
63 |                  max_steps=max_iter,
64 |                  power=1.0,
65 |                  min_lr=0)
66 | 


--------------------------------------------------------------------------------
/project/danet/danet_r50-d8_512x512_ade20k_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='DAHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      pam_channels=64,
25 |                      dropout_ratio=0.1,
26 |                      num_classes=150,
27 |                      norm_cfg=norm_cfg,
28 |                      align_corners=False,
29 |                      loss_decode=dict(type='CrossEntropyLoss',
30 |                                       use_sigmoid=False,
31 |                                       loss_weight=1.0)),
32 |     auxiliary_head=dict(type='FCNHead',
33 |                         in_channels=1024,
34 |                         in_index=2,
35 |                         channels=256,
36 |                         num_convs=1,
37 |                         concat_input=False,
38 |                         dropout_ratio=0.1,
39 |                         num_classes=150,
40 |                         norm_cfg=norm_cfg,
41 |                         align_corners=False,
42 |                         loss_decode=dict(type='CrossEntropyLoss',
43 |                                          use_sigmoid=False,
44 |                                          loss_weight=0.4)),
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/project/danet/readme.md:
--------------------------------------------------------------------------------
1 | # DANet


--------------------------------------------------------------------------------
/project/deeplabv3/deeplabv3_r50-d8_512x512_ade20k_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='ASPPHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      dilations=(1, 12, 24, 36),
25 |                      dropout_ratio=0.1,
26 |                      num_classes=150,
27 |                      norm_cfg=norm_cfg,
28 |                      align_corners=False,
29 |                      loss_decode=dict(type='CrossEntropyLoss',
30 |                                       use_sigmoid=False,
31 |                                       loss_weight=1.0)),
32 |     auxiliary_head=dict(type='FCNHead',
33 |                         in_channels=1024,
34 |                         in_index=2,
35 |                         channels=256,
36 |                         num_convs=1,
37 |                         concat_input=False,
38 |                         dropout_ratio=0.1,
39 |                         num_classes=150,
40 |                         norm_cfg=norm_cfg,
41 |                         align_corners=False,
42 |                         loss_decode=dict(type='CrossEntropyLoss',
43 |                                          use_sigmoid=False,
44 |                                          loss_weight=0.4)),
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/project/deeplabv3/readme.md:
--------------------------------------------------------------------------------
1 | # DeepLabV3


--------------------------------------------------------------------------------
/project/deeplabv3plus/deeplabv3plus_r50-d8_512x512_ade20k_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='DepthwiseSeparableASPPHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      dilations=(1, 12, 24, 36),
25 |                      c1_in_channels=256,
26 |                      c1_channels=48,
27 |                      dropout_ratio=0.1,
28 |                      num_classes=150,
29 |                      norm_cfg=norm_cfg,
30 |                      align_corners=False,
31 |                      loss_decode=dict(type='CrossEntropyLoss',
32 |                                       use_sigmoid=False,
33 |                                       loss_weight=1.0)),
34 |     auxiliary_head=dict(type='FCNHead',
35 |                         in_channels=1024,
36 |                         in_index=2,
37 |                         channels=256,
38 |                         num_convs=1,
39 |                         concat_input=False,
40 |                         dropout_ratio=0.1,
41 |                         num_classes=150,
42 |                         norm_cfg=norm_cfg,
43 |                         align_corners=False,
44 |                         loss_decode=dict(type='CrossEntropyLoss',
45 |                                          use_sigmoid=False,
46 |                                          loss_weight=0.4)),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='whole'))
50 | 


--------------------------------------------------------------------------------
/project/deeplabv3plus/readme.md:
--------------------------------------------------------------------------------
1 | # DeepLabV3+


--------------------------------------------------------------------------------
/project/eanet/eanet_r50-d8_769x769_cityscapes_40k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_40k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='EAHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      dropout_ratio=0.1,
25 |                      num_classes=19,
26 |                      align_corners=True,
27 |                      loss_decode=dict(type='CrossEntropyLoss',
28 |                                       use_sigmoid=False,
29 |                                       loss_weight=1.0,
30 |                                       class_weight=[
31 |                                           0.8373, 0.918, 0.866, 1.0345, 1.0166,
32 |                                           0.9969, 0.9754, 1.0489, 0.8786,
33 |                                           1.0023, 0.9539, 0.9843, 1.1116,
34 |                                           0.9037, 1.0865, 1.0955, 1.0865,
35 |                                           1.1529, 1.0507
36 |                                       ]),
37 |                      sampler=dict(type='OHEMPixelSampler',
38 |                                   thresh=0.7,
39 |                                   min_kept=100000)),
40 |     auxiliary_head=dict(type='FCNHead',
41 |                         in_channels=1024,
42 |                         in_index=2,
43 |                         channels=256,
44 |                         num_convs=1,
45 |                         concat_input=False,
46 |                         dropout_ratio=0.1,
47 |                         num_classes=19,
48 |                         norm_cfg=norm_cfg,
49 |                         align_corners=True,
50 |                         loss_decode=dict(type='CrossEntropyLoss',
51 |                                          use_sigmoid=False,
52 |                                          loss_weight=0.4)),
53 |     # model training and testing settings
54 |     train_cfg=dict(),
55 |     test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513)))
56 | 


--------------------------------------------------------------------------------
/project/eanet/readme.md:
--------------------------------------------------------------------------------
1 | # EANet
2 | 
3 | ### Cityscapes
4 | 
5 | |   Method  |    Backbone     |  Pretrained | Iters | mIoU(ss/ms) | Params | FLOPs  | Config | Download  |
6 | | :-------: | :-------------: | :-----: | :---: | :--: | :----: | :----: | :----: | :-------: |
7 | |  EANet  |     ResNet50  | IN-1K | 40K | 79.9/81.0 | - | - | [config](eanet_r50-d8_769x769_cityscapes_40k.py)  | [Jittor Hub](https://cg.cs.tsinghua.edu.cn/jittor/assets/build/checkpoints/eanet_r50-d8_769x769_cityscapes_40k.pkl) |
8 | 
9 | 


--------------------------------------------------------------------------------
/project/emanet/emanet_r50-d8_512x1024_cityscapes_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/cityscapes_512x1024.py',
 3 |     '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='EMAHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=256,
24 |                      ema_channels=512,
25 |                      num_bases=64,
26 |                      num_stages=3,
27 |                      momentum=0.1,
28 |                      dropout_ratio=0.1,
29 |                      num_classes=19,
30 |                      norm_cfg=norm_cfg,
31 |                      align_corners=False,
32 |                      loss_decode=dict(type='CrossEntropyLoss',
33 |                                       use_sigmoid=False,
34 |                                       loss_weight=1.0)),
35 |     auxiliary_head=dict(type='FCNHead',
36 |                         in_channels=1024,
37 |                         in_index=2,
38 |                         channels=256,
39 |                         num_convs=1,
40 |                         concat_input=False,
41 |                         dropout_ratio=0.1,
42 |                         num_classes=19,
43 |                         norm_cfg=norm_cfg,
44 |                         align_corners=False,
45 |                         loss_decode=dict(type='CrossEntropyLoss',
46 |                                          use_sigmoid=False,
47 |                                          loss_weight=0.4)),
48 |     # model training and testing settings
49 |     train_cfg=dict(),
50 |     test_cfg=dict(mode='whole'))
51 | 


--------------------------------------------------------------------------------
/project/emanet/readme.md:
--------------------------------------------------------------------------------
1 | # EMANet


--------------------------------------------------------------------------------
/project/fcn/fcn_r50-d8_512x1024_cityscapes_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/cityscapes_512x1024.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='FCNHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      num_convs=2,
25 |                      concat_input=True,
26 |                      dropout_ratio=0.1,
27 |                      num_classes=19,
28 |                      norm_cfg=norm_cfg,
29 |                      align_corners=False,
30 |                      loss_decode=dict(type='CrossEntropyLoss',
31 |                                       use_sigmoid=False,
32 |                                       loss_weight=1.0)),
33 |     auxiliary_head=dict(type='FCNHead',
34 |                         in_channels=1024,
35 |                         in_index=2,
36 |                         channels=256,
37 |                         num_convs=1,
38 |                         concat_input=False,
39 |                         dropout_ratio=0.1,
40 |                         num_classes=19,
41 |                         norm_cfg=norm_cfg,
42 |                         align_corners=False,
43 |                         loss_decode=dict(type='CrossEntropyLoss',
44 |                                          use_sigmoid=False,
45 |                                          loss_weight=0.4)),
46 |     # model training and testing settings
47 |     train_cfg=dict(),
48 |     test_cfg=dict(mode='whole'))
49 | 


--------------------------------------------------------------------------------
/project/fcn/fcn_r50-d8_512x512_ade20k_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='FCNHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      num_convs=2,
25 |                      concat_input=True,
26 |                      dropout_ratio=0.1,
27 |                      num_classes=150,
28 |                      norm_cfg=norm_cfg,
29 |                      align_corners=False,
30 |                      loss_decode=dict(type='CrossEntropyLoss',
31 |                                       use_sigmoid=False,
32 |                                       loss_weight=1.0)),
33 |     auxiliary_head=dict(type='FCNHead',
34 |                         in_channels=1024,
35 |                         in_index=2,
36 |                         channels=256,
37 |                         num_convs=1,
38 |                         concat_input=False,
39 |                         dropout_ratio=0.1,
40 |                         num_classes=150,
41 |                         norm_cfg=norm_cfg,
42 |                         align_corners=False,
43 |                         loss_decode=dict(type='CrossEntropyLoss',
44 |                                          use_sigmoid=False,
45 |                                          loss_weight=0.4)),
46 |     # model training and testing settings
47 |     train_cfg=dict(),
48 |     test_cfg=dict(mode='whole'))
49 | 


--------------------------------------------------------------------------------
/project/fcn/fcn_r50-d8_896x896_isaid_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/isaid_869x869.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | 
 7 | # model settings
 8 | norm_cfg = dict(type='BN')
 9 | model = dict(
10 |     type='EncoderDecoder',
11 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
12 |     backbone=dict(type='ResNetV1c',
13 |                   depth=50,
14 |                   num_stages=4,
15 |                   out_indices=(0, 1, 2, 3),
16 |                   dilations=(1, 1, 2, 4),
17 |                   strides=(1, 2, 1, 1),
18 |                   norm_cfg=norm_cfg,
19 |                   norm_eval=False,
20 |                   contract_dilation=True),
21 |     decode_head=dict(type='FCNHead',
22 |                      in_channels=2048,
23 |                      in_index=3,
24 |                      channels=512,
25 |                      num_convs=2,
26 |                      concat_input=True,
27 |                      dropout_ratio=0.1,
28 |                      num_classes=16,
29 |                      norm_cfg=norm_cfg,
30 |                      align_corners=False,
31 |                      loss_decode=dict(type='CrossEntropyLoss',
32 |                                       use_sigmoid=False,
33 |                                       loss_weight=1.0)),
34 |     auxiliary_head=dict(type='FCNHead',
35 |                         in_channels=1024,
36 |                         in_index=2,
37 |                         channels=256,
38 |                         num_convs=1,
39 |                         concat_input=False,
40 |                         dropout_ratio=0.1,
41 |                         num_classes=16,
42 |                         norm_cfg=norm_cfg,
43 |                         align_corners=False,
44 |                         loss_decode=dict(type='CrossEntropyLoss',
45 |                                          use_sigmoid=False,
46 |                                          loss_weight=0.4)),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='whole'))
50 | 


--------------------------------------------------------------------------------
/project/fcn/readme.md:
--------------------------------------------------------------------------------
1 | # FCN


--------------------------------------------------------------------------------
/project/gcnet/gcnet_r50-d8_512x512_ade20k_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='GCHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      ratio=1 / 4.,
25 |                      pooling_type='att',
26 |                      fusion_types=('channel_add', ),
27 |                      dropout_ratio=0.1,
28 |                      num_classes=150,
29 |                      norm_cfg=norm_cfg,
30 |                      align_corners=False,
31 |                      loss_decode=dict(type='CrossEntropyLoss',
32 |                                       use_sigmoid=False,
33 |                                       loss_weight=1.0)),
34 |     auxiliary_head=dict(type='FCNHead',
35 |                         in_channels=1024,
36 |                         in_index=2,
37 |                         channels=256,
38 |                         num_convs=1,
39 |                         concat_input=False,
40 |                         dropout_ratio=0.1,
41 |                         num_classes=150,
42 |                         norm_cfg=norm_cfg,
43 |                         align_corners=False,
44 |                         loss_decode=dict(type='CrossEntropyLoss',
45 |                                          use_sigmoid=False,
46 |                                          loss_weight=0.4)),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='whole'))
50 | 


--------------------------------------------------------------------------------
/project/gcnet/readme.md:
--------------------------------------------------------------------------------
1 | # GCNet


--------------------------------------------------------------------------------
/project/mae/readme.md:
--------------------------------------------------------------------------------
1 | # MAE


--------------------------------------------------------------------------------
/project/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['../_base_/datasets/ade20k.py', '../_base_/default_runtime.py']
 2 | 
 3 | norm_cfg = dict(type='BN')
 4 | model = dict(
 5 |     type='EncoderDecoder',
 6 |     pretrained='jittorhub://mae_pretrain_vit_base.pkl',
 7 |     backbone=dict(type='MAE',
 8 |                   img_size=(512, 512),
 9 |                   patch_size=16,
10 |                   in_channels=3,
11 |                   embed_dims=768,
12 |                   num_layers=12,
13 |                   num_heads=12,
14 |                   mlp_ratio=4,
15 |                   out_indices=(3, 5, 7, 11),
16 |                   attn_drop_rate=0.0,
17 |                   drop_path_rate=0.1,
18 |                   norm_cfg=dict(type='LN', eps=1e-6),
19 |                   act_cfg=dict(type='GELU'),
20 |                   norm_eval=False,
21 |                   init_values=1.0),
22 |     neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
23 |     decode_head=dict(type='UPerHead',
24 |                      in_channels=[768, 768, 768, 768],
25 |                      in_index=[0, 1, 2, 3],
26 |                      pool_scales=(1, 2, 3, 6),
27 |                      channels=768,
28 |                      dropout_ratio=0.1,
29 |                      num_classes=150,
30 |                      norm_cfg=norm_cfg,
31 |                      align_corners=False,
32 |                      loss_decode=dict(type='CrossEntropyLoss',
33 |                                       use_sigmoid=False,
34 |                                       loss_weight=1.0)),
35 |     auxiliary_head=dict(type='FCNHead',
36 |                         in_channels=768,
37 |                         in_index=2,
38 |                         channels=256,
39 |                         num_convs=1,
40 |                         concat_input=False,
41 |                         dropout_ratio=0.1,
42 |                         num_classes=150,
43 |                         norm_cfg=norm_cfg,
44 |                         align_corners=False,
45 |                         loss_decode=dict(type='CrossEntropyLoss',
46 |                                          use_sigmoid=False,
47 |                                          loss_weight=0.4)),
48 |     # model training and testing settings
49 |     train_cfg=dict(),
50 |     test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)))
51 | 
52 | parameter_groups_generator = dict(type="LRDecayParameterGroupsGenerator",
53 |                                   paramwise_cfg=dict(num_layers=12,
54 |                                                      decay_rate=0.65))
55 | 
56 | optimizer = dict(
57 |     type='CustomAdamW',
58 |     lr=1e-4,
59 |     betas=(0.9, 0.999),
60 |     weight_decay=0.05,
61 | )
62 | 
63 | max_iter = 160000
64 | eval_interval = 8000
65 | checkpoint_interval = 8000
66 | 
67 | scheduler = dict(type='PolyLR',
68 |                  warmup='linear',
69 |                  warmup_iters=1500,
70 |                  warmup_ratio=1e-6,
71 |                  max_steps=max_iter,
72 |                  power=1.0,
73 |                  min_lr=0)
74 | 


--------------------------------------------------------------------------------
/project/mobilenet_v2/fcn_m-v2-d8_512x512_ade20k_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     # pretrained='jittorhub://mobilenet_v2.pkl',
11 |     backbone=dict(type='MobileNetV2',
12 |                   widen_factor=1.,
13 |                   strides=(1, 2, 2, 1, 1, 1, 1),
14 |                   dilations=(1, 1, 1, 2, 2, 4, 4),
15 |                   out_indices=(1, 2, 4, 6)),
16 |     decode_head=dict(type='FCNHead',
17 |                      in_channels=320,
18 |                      in_index=3,
19 |                      channels=512,
20 |                      num_convs=2,
21 |                      concat_input=True,
22 |                      dropout_ratio=0.1,
23 |                      num_classes=150,
24 |                      norm_cfg=norm_cfg,
25 |                      align_corners=False,
26 |                      loss_decode=dict(type='CrossEntropyLoss',
27 |                                       use_sigmoid=False,
28 |                                       loss_weight=1.0)),
29 |     auxiliary_head=dict(type='FCNHead',
30 |                         in_channels=96,
31 |                         in_index=2,
32 |                         channels=256,
33 |                         num_convs=1,
34 |                         concat_input=False,
35 |                         dropout_ratio=0.1,
36 |                         num_classes=150,
37 |                         norm_cfg=norm_cfg,
38 |                         align_corners=False,
39 |                         loss_decode=dict(type='CrossEntropyLoss',
40 |                                          use_sigmoid=False,
41 |                                          loss_weight=0.4)),
42 |     # model training and testing settings
43 |     train_cfg=dict(),
44 |     test_cfg=dict(mode='whole'))
45 | 


--------------------------------------------------------------------------------
/project/mobilenet_v2/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/mobilenet_v2/readme.md


--------------------------------------------------------------------------------
/project/nonlocal_net/nonlocal_r50-d8_512x512_ade20k_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='NLHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      dropout_ratio=0.1,
25 |                      reduction=2,
26 |                      use_scale=True,
27 |                      mode='embedded_gaussian',
28 |                      num_classes=150,
29 |                      norm_cfg=norm_cfg,
30 |                      align_corners=False,
31 |                      loss_decode=dict(type='CrossEntropyLoss',
32 |                                       use_sigmoid=False,
33 |                                       loss_weight=1.0)),
34 |     auxiliary_head=dict(type='FCNHead',
35 |                         in_channels=1024,
36 |                         in_index=2,
37 |                         channels=256,
38 |                         num_convs=1,
39 |                         concat_input=False,
40 |                         dropout_ratio=0.1,
41 |                         num_classes=150,
42 |                         norm_cfg=norm_cfg,
43 |                         align_corners=False,
44 |                         loss_decode=dict(type='CrossEntropyLoss',
45 |                                          use_sigmoid=False,
46 |                                          loss_weight=0.4)),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='whole'))
50 | 


--------------------------------------------------------------------------------
/project/nonlocal_net/readme.md:
--------------------------------------------------------------------------------
1 | # NonLocal Net


--------------------------------------------------------------------------------
/project/point_rend/pointrend_r50_512x512_ade20k_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_160k.py'
 4 | ]
 5 | 
 6 | norm_cfg = dict(type='BN')
 7 | model = dict(
 8 |     type='CascadeEncoderDecoder',
 9 |     num_stages=2,
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 1, 1),
16 |                   strides=(1, 2, 2, 2),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     neck=dict(type='FPN',
21 |               in_channels=[256, 512, 1024, 2048],
22 |               out_channels=256,
23 |               num_outs=4),
24 |     decode_head=[
25 |         dict(type='FPNHead',
26 |              in_channels=[256, 256, 256, 256],
27 |              in_index=[0, 1, 2, 3],
28 |              feature_strides=[4, 8, 16, 32],
29 |              channels=128,
30 |              dropout_ratio=-1,
31 |              num_classes=150,
32 |              norm_cfg=norm_cfg,
33 |              align_corners=False,
34 |              loss_decode=dict(type='CrossEntropyLoss',
35 |                               use_sigmoid=False,
36 |                               loss_weight=1.0)),
37 |         dict(type='PointHead',
38 |              in_channels=[256],
39 |              in_index=[0],
40 |              channels=256,
41 |              num_fcs=3,
42 |              coarse_pred_each_layer=True,
43 |              dropout_ratio=-1,
44 |              num_classes=150,
45 |              align_corners=False,
46 |              loss_decode=dict(type='CrossEntropyLoss',
47 |                               use_sigmoid=False,
48 |                               loss_weight=1.0))
49 |     ],
50 |     # model training and testing settings
51 |     train_cfg=dict(num_points=2048,
52 |                    oversample_ratio=3,
53 |                    importance_sample_ratio=0.75),
54 |     test_cfg=dict(mode='whole',
55 |                   subdivision_steps=2,
56 |                   subdivision_num_points=8196,
57 |                   scale_factor=2))
58 | 
59 | scheduler = dict(type='PolyLR',
60 |                  warmup='linear',
61 |                  warmup_iters=200,
62 |                  warmup_ratio=1e-6,
63 |                  max_steps=160000,
64 |                  power=1.0,
65 |                  min_lr=0)
66 | 


--------------------------------------------------------------------------------
/project/point_rend/readme.md:
--------------------------------------------------------------------------------
1 | # PointRend


--------------------------------------------------------------------------------
/project/pspnet/pspnet_r50-d8_512x1024_cityscapes_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/cityscapes_512x1024.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='PSPHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      pool_scales=(1, 2, 3, 6),
25 |                      dropout_ratio=0.1,
26 |                      num_classes=19,
27 |                      norm_cfg=norm_cfg,
28 |                      align_corners=False,
29 |                      loss_decode=dict(type='CrossEntropyLoss',
30 |                                       use_sigmoid=False,
31 |                                       loss_weight=1.0)),
32 |     auxiliary_head=dict(type='FCNHead',
33 |                         in_channels=1024,
34 |                         in_index=2,
35 |                         channels=256,
36 |                         num_convs=1,
37 |                         concat_input=False,
38 |                         dropout_ratio=0.1,
39 |                         num_classes=19,
40 |                         norm_cfg=norm_cfg,
41 |                         align_corners=False,
42 |                         loss_decode=dict(type='CrossEntropyLoss',
43 |                                          use_sigmoid=False,
44 |                                          loss_weight=0.4)),
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/project/pspnet/pspnet_r50-d8_512x512_ade20k_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | # model settings
 6 | norm_cfg = dict(type='BN')
 7 | model = dict(
 8 |     type='EncoderDecoder',
 9 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
10 |     backbone=dict(type='ResNetV1c',
11 |                   depth=50,
12 |                   num_stages=4,
13 |                   out_indices=(0, 1, 2, 3),
14 |                   dilations=(1, 1, 2, 4),
15 |                   strides=(1, 2, 1, 1),
16 |                   norm_cfg=norm_cfg,
17 |                   norm_eval=False,
18 |                   contract_dilation=True),
19 |     decode_head=dict(type='PSPHead',
20 |                      in_channels=2048,
21 |                      in_index=3,
22 |                      channels=512,
23 |                      pool_scales=(1, 2, 3, 6),
24 |                      dropout_ratio=0.1,
25 |                      num_classes=150,
26 |                      norm_cfg=norm_cfg,
27 |                      align_corners=False,
28 |                      loss_decode=dict(type='CrossEntropyLoss',
29 |                                       use_sigmoid=False,
30 |                                       loss_weight=1.0)),
31 |     auxiliary_head=dict(type='FCNHead',
32 |                         in_channels=1024,
33 |                         in_index=2,
34 |                         channels=256,
35 |                         num_convs=1,
36 |                         concat_input=False,
37 |                         dropout_ratio=0.1,
38 |                         num_classes=150,
39 |                         norm_cfg=norm_cfg,
40 |                         align_corners=False,
41 |                         loss_decode=dict(type='CrossEntropyLoss',
42 |                                          use_sigmoid=False,
43 |                                          loss_weight=0.4)),
44 |     # model training and testing settings
45 |     train_cfg=dict(),
46 |     test_cfg=dict(mode='whole'))
47 | 


--------------------------------------------------------------------------------
/project/pspnet/pspnet_r50-d8_512x512_loveda_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/loveda.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 2, 4),
16 |                   strides=(1, 2, 1, 1),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='PSPHead',
21 |                      in_channels=2048,
22 |                      in_index=3,
23 |                      channels=512,
24 |                      pool_scales=(1, 2, 3, 6),
25 |                      dropout_ratio=0.1,
26 |                      num_classes=7,
27 |                      norm_cfg=norm_cfg,
28 |                      align_corners=False,
29 |                      loss_decode=dict(type='CrossEntropyLoss',
30 |                                       use_sigmoid=False,
31 |                                       loss_weight=1.0)),
32 |     auxiliary_head=dict(type='FCNHead',
33 |                         in_channels=1024,
34 |                         in_index=2,
35 |                         channels=256,
36 |                         num_convs=1,
37 |                         concat_input=False,
38 |                         dropout_ratio=0.1,
39 |                         num_classes=7,
40 |                         norm_cfg=norm_cfg,
41 |                         align_corners=False,
42 |                         loss_decode=dict(type='CrossEntropyLoss',
43 |                                          use_sigmoid=False,
44 |                                          loss_weight=0.4)),
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 


--------------------------------------------------------------------------------
/project/pspnet/readme.md:
--------------------------------------------------------------------------------
1 | # PSPNet


--------------------------------------------------------------------------------
/project/resnest/pspnet_s101-d8_512x512_ade20k_80k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py',
 3 |     '../_base_/schedules/schedule_80k.py'
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnest101.pkl',
11 |     backbone=dict(type='ResNeSt',
12 |                   depth=101,
13 |                   stem_channels=128,
14 |                   radix=2,
15 |                   reduction_factor=4,
16 |                   avg_down_stride=True,
17 |                   num_stages=4,
18 |                   out_indices=(0, 1, 2, 3),
19 |                   dilations=(1, 1, 2, 4),
20 |                   strides=(1, 2, 1, 1),
21 |                   norm_cfg=norm_cfg,
22 |                   norm_eval=False,
23 |                   contract_dilation=True),
24 |     decode_head=dict(type='PSPHead',
25 |                      in_channels=2048,
26 |                      in_index=3,
27 |                      channels=512,
28 |                      pool_scales=(1, 2, 3, 6),
29 |                      dropout_ratio=0.1,
30 |                      num_classes=150,
31 |                      norm_cfg=norm_cfg,
32 |                      align_corners=False,
33 |                      loss_decode=dict(type='CrossEntropyLoss',
34 |                                       use_sigmoid=False,
35 |                                       loss_weight=1.0)),
36 |     auxiliary_head=dict(type='FCNHead',
37 |                         in_channels=1024,
38 |                         in_index=2,
39 |                         channels=256,
40 |                         num_convs=1,
41 |                         concat_input=False,
42 |                         dropout_ratio=0.1,
43 |                         num_classes=150,
44 |                         norm_cfg=norm_cfg,
45 |                         align_corners=False,
46 |                         loss_decode=dict(type='CrossEntropyLoss',
47 |                                          use_sigmoid=False,
48 |                                          loss_weight=0.4)),
49 |     # model training and testing settings
50 |     train_cfg=dict(),
51 |     test_cfg=dict(mode='whole'))
52 | 


--------------------------------------------------------------------------------
/project/resnest/readme.md:
--------------------------------------------------------------------------------
1 | # ResNeSt


--------------------------------------------------------------------------------
/project/segformer/b0/segformer_b0_512x512_ade_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = ['../../_base_/datasets/ade20k.py', '../../_base_/default_runtime.py']
 2 | 
 3 | # model settings
 4 | norm_cfg = dict(type='BN')
 5 | model = dict(
 6 |     type='EncoderDecoder',
 7 |     pretrained='jittorhub://mit_b0.pkl',
 8 |     backbone=dict(type='mit_b0'),
 9 |     decode_head=dict(type='SegFormerHead',
10 |                      in_channels=[32, 64, 160, 256],
11 |                      in_index=[0, 1, 2, 3],
12 |                      feature_strides=[4, 8, 16, 32],
13 |                      channels=128,
14 |                      dropout_ratio=0.1,
15 |                      num_classes=150,
16 |                      norm_cfg=norm_cfg,
17 |                      align_corners=False,
18 |                      decoder_params=dict(embed_dim=256),
19 |                      loss_decode=dict(type='CrossEntropyLoss',
20 |                                       use_sigmoid=False,
21 |                                       loss_weight=1.0)),
22 |     # model training and testing settings
23 |     train_cfg=dict(),
24 |     test_cfg=dict(mode='whole'))
25 | 
26 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
27 |                     std=[58.395, 57.12, 57.375],
28 |                     to_rgb=True)
29 | test_pipeline = [
30 |     dict(type='LoadImageFromFile'),
31 |     dict(
32 |         type='MultiScaleFlipAug',
33 |         img_scale=(2048, 512),
34 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
35 |         flip=False,
36 |         transforms=[
37 |             dict(type='Resize', keep_ratio=True),
38 |             dict(type='ResizeToMultiple', size_divisor=32),
39 |             dict(type='RandomFlip'),
40 |             dict(type='Normalize', **img_norm_cfg),
41 |             dict(type='ImageToTensor', keys=['img']),
42 |             dict(type='Collect', keys=['img']),
43 |         ])
44 | ]
45 | 
46 | dataset = dict(
47 |     val=dict(pipeline=test_pipeline))
48 | 
49 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
50 |                                   custom_keys={
51 |                                       'pos_block': dict(decay_mult=0.),
52 |                                       'norm': dict(decay_mult=0.),
53 |                                       'head': dict(lr_mult=10.)
54 |                                   })
55 | 
56 | optimizer = dict(
57 |     type='CustomAdamW',
58 |     lr=0.00006,
59 |     betas=(0.9, 0.999),
60 |     weight_decay=0.01,
61 | )
62 | 
63 | max_iter = 160000
64 | eval_interval = 8000
65 | checkpoint_interval = 8000
66 | 
67 | scheduler = dict(type='PolyLR',
68 |                  warmup='linear',
69 |                  warmup_iters=1500,
70 |                  warmup_ratio=1e-6,
71 |                  max_steps=max_iter,
72 |                  power=1.0,
73 |                  min_lr=0)
74 | 


--------------------------------------------------------------------------------
/project/segformer/readme.md:
--------------------------------------------------------------------------------
1 | # SegFormer


--------------------------------------------------------------------------------
/project/segnext/base/segnext_base_1024x1024_cityscapes_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/cityscapes_1024x1024.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_b.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[64, 128, 320, 512],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.2,
16 |                   depths=[3, 3, 12, 3]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[128, 320, 512],
19 |                      in_index=[1, 2, 3],
20 |                      channels=512,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=19,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=512),
29 |     # model training and testing settings
30 |     train_cfg=dict(),
31 |     # test_cfg=dict(mode='whole'))
32 |     test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768)))
33 | 
34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
35 |                     std=[58.395, 57.12, 57.375],
36 |                     to_rgb=True)
37 | test_pipeline = [
38 |     dict(type='LoadImageFromFile'),
39 |     dict(
40 |         type='MultiScaleFlipAug',
41 |         img_scale=(2048, 1024),
42 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
43 |         flip=False,
44 |         transforms=[
45 |             dict(type='Resize', keep_ratio=True),
46 |             dict(type='ResizeToMultiple', size_divisor=32),
47 |             dict(type='RandomFlip'),
48 |             dict(type='Normalize', **img_norm_cfg),
49 |             dict(type='ImageToTensor', keys=['img']),
50 |             dict(type='Collect', keys=['img']),
51 |         ])
52 | ]
53 | dataset = dict(
54 |     val=dict(pipeline=test_pipeline))
55 | 
56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
57 |                                   custom_keys={
58 |                                       'pos_block': dict(decay_mult=0.),
59 |                                       'norm': dict(decay_mult=0.),
60 |                                       'head': dict(lr_mult=10.)
61 |                                   })
62 | 
63 | optimizer = dict(
64 |     type='CustomAdamW',
65 |     lr=0.00006,
66 |     betas=(0.9, 0.999),
67 |     weight_decay=0.01,
68 | )
69 | 
70 | max_iter = 160000
71 | eval_interval = 8000
72 | checkpoint_interval = 8000
73 | 
74 | scheduler = dict(type='PolyLR',
75 |                  warmup='linear',
76 |                  warmup_iters=1500,
77 |                  warmup_ratio=1e-6,
78 |                  max_steps=max_iter,
79 |                  power=1.0,
80 |                  min_lr=0)
81 | 


--------------------------------------------------------------------------------
/project/segnext/base/segnext_base_512x512_ade_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/ade20k.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_b.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[64, 128, 320, 512],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.2,
16 |                   depths=[3, 3, 12, 3]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[128, 320, 512],
19 |                      in_index=[1, 2, 3],
20 |                      channels=512,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=150,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=512),
29 |     # model training and testing settings
30 |     train_cfg=dict(),
31 |     test_cfg=dict(mode='whole'))
32 | 
33 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
34 |                     std=[58.395, 57.12, 57.375],
35 |                     to_rgb=True)
36 | test_pipeline = [
37 |     dict(type='LoadImageFromFile'),
38 |     dict(
39 |         type='MultiScaleFlipAug',
40 |         img_scale=(2048, 512),
41 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
42 |         flip=False,
43 |         transforms=[
44 |             dict(type='Resize', keep_ratio=True),
45 |             dict(type='ResizeToMultiple', size_divisor=32),
46 |             dict(type='RandomFlip'),
47 |             dict(type='Normalize', **img_norm_cfg),
48 |             dict(type='ImageToTensor', keys=['img']),
49 |             dict(type='Collect', keys=['img']),
50 |         ])
51 | ]
52 | dataset = dict(
53 |     val=dict(pipeline=test_pipeline))
54 | 
55 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
56 |                                   custom_keys={
57 |                                       'pos_block': dict(decay_mult=0.),
58 |                                       'norm': dict(decay_mult=0.),
59 |                                       'head': dict(lr_mult=10.)
60 |                                   })
61 | 
62 | optimizer = dict(
63 |     type='CustomAdamW',
64 |     lr=0.00006,
65 |     betas=(0.9, 0.999),
66 |     weight_decay=0.01,
67 | )
68 | 
69 | max_iter = 160000
70 | eval_interval = 8000
71 | checkpoint_interval = 8000
72 | 
73 | scheduler = dict(type='PolyLR',
74 |                  warmup='linear',
75 |                  warmup_iters=1500,
76 |                  warmup_ratio=1e-6,
77 |                  max_steps=max_iter,
78 |                  power=1.0,
79 |                  min_lr=0)
80 | 


--------------------------------------------------------------------------------
/project/segnext/base/segnext_base_896x896_isaid_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/isaid_869x869.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_b.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[64, 128, 320, 512],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.2,
16 |                   depths=[3, 3, 12, 3]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[128, 320, 512],
19 |                      in_index=[1, 2, 3],
20 |                      channels=512,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=16,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=512),
29 |     # model training and testing settings
30 |     train_cfg=dict(),
31 |     test_cfg=dict(mode='whole'))
32 | 
33 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
34 |                     std=[58.395, 57.12, 57.375],
35 |                     to_rgb=True)
36 | test_pipeline = [
37 |     dict(type='LoadImageFromFile'),
38 |     dict(
39 |         type='MultiScaleFlipAug',
40 |         img_scale=(896, 896),
41 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
42 |         flip=False,
43 |         transforms=[
44 |             dict(type='Resize', keep_ratio=True),
45 |             dict(type='ResizeToMultiple', size_divisor=32),
46 |             dict(type='RandomFlip'),
47 |             dict(type='Normalize', **img_norm_cfg),
48 |             dict(type='ImageToTensor', keys=['img']),
49 |             dict(type='Collect', keys=['img']),
50 |         ])
51 | ]
52 | dataset = dict(
53 |     val=dict(pipeline=test_pipeline))
54 | 
55 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
56 |                                   custom_keys={
57 |                                       'pos_block': dict(decay_mult=0.),
58 |                                       'norm': dict(decay_mult=0.),
59 |                                       'head': dict(lr_mult=10.)
60 |                                   })
61 | 
62 | optimizer = dict(
63 |     type='CustomAdamW',
64 |     lr=0.00006,
65 |     betas=(0.9, 0.999),
66 |     weight_decay=0.01,
67 | )
68 | 
69 | max_iter = 160000
70 | eval_interval = 8000
71 | checkpoint_interval = 8000
72 | 
73 | scheduler = dict(type='PolyLR',
74 |                  warmup='linear',
75 |                  warmup_iters=1500,
76 |                  warmup_ratio=1e-6,
77 |                  max_steps=max_iter,
78 |                  power=1.0,
79 |                  min_lr=0)
80 | 


--------------------------------------------------------------------------------
/project/segnext/large/segnext_large_1024x1024_cityscapes_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/cityscapes_1024x1024.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_l.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[64, 128, 320, 512],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.3,
16 |                   depths=[3, 5, 27, 3]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[128, 320, 512],
19 |                      in_index=[1, 2, 3],
20 |                      channels=1024,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=19,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=1024),
29 |     # model training and testing settings
30 |     train_cfg=dict(),
31 |     # test_cfg=dict(mode='whole'))
32 |     test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768)))
33 | 
34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
35 |                     std=[58.395, 57.12, 57.375],
36 |                     to_rgb=True)
37 | test_pipeline = [
38 |     dict(type='LoadImageFromFile'),
39 |     dict(
40 |         type='MultiScaleFlipAug',
41 |         img_scale=(2048, 1024),
42 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
43 |         flip=False,
44 |         transforms=[
45 |             dict(type='Resize', keep_ratio=True),
46 |             dict(type='ResizeToMultiple', size_divisor=32),
47 |             dict(type='RandomFlip'),
48 |             dict(type='Normalize', **img_norm_cfg),
49 |             dict(type='ImageToTensor', keys=['img']),
50 |             dict(type='Collect', keys=['img']),
51 |         ])
52 | ]
53 | dataset = dict(
54 |     val=dict(pipeline=test_pipeline))
55 | 
56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
57 |                                   custom_keys={
58 |                                       'pos_block': dict(decay_mult=0.),
59 |                                       'norm': dict(decay_mult=0.),
60 |                                       'head': dict(lr_mult=10.)
61 |                                   })
62 | 
63 | optimizer = dict(
64 |     type='CustomAdamW',
65 |     lr=0.00006,
66 |     betas=(0.9, 0.999),
67 |     weight_decay=0.01,
68 | )
69 | 
70 | max_iter = 160000
71 | eval_interval = 8000
72 | checkpoint_interval = 8000
73 | 
74 | scheduler = dict(type='PolyLR',
75 |                  warmup='linear',
76 |                  warmup_iters=1500,
77 |                  warmup_ratio=1e-6,
78 |                  max_steps=max_iter,
79 |                  power=1.0,
80 |                  min_lr=0)
81 | 


--------------------------------------------------------------------------------
/project/segnext/large/segnext_large_512x512_ade_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/ade20k.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_l.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[64, 128, 320, 512],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.3,
16 |                   depths=[3, 5, 27, 3]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[128, 320, 512],
19 |                      in_index=[1, 2, 3],
20 |                      channels=1024,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=150,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=1024),
29 |     # model training and testing settings
30 |     train_cfg=dict(),
31 |     test_cfg=dict(mode='whole'))
32 | 
33 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
34 |                     std=[58.395, 57.12, 57.375],
35 |                     to_rgb=True)
36 | test_pipeline = [
37 |     dict(type='LoadImageFromFile'),
38 |     dict(
39 |         type='MultiScaleFlipAug',
40 |         img_scale=(2048, 512),
41 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
42 |         flip=False,
43 |         transforms=[
44 |             dict(type='Resize', keep_ratio=True),
45 |             dict(type='ResizeToMultiple', size_divisor=32),
46 |             dict(type='RandomFlip'),
47 |             dict(type='Normalize', **img_norm_cfg),
48 |             dict(type='ImageToTensor', keys=['img']),
49 |             dict(type='Collect', keys=['img']),
50 |         ])
51 | ]
52 | dataset = dict(
53 |     val=dict(pipeline=test_pipeline))
54 | 
55 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
56 |                                   custom_keys={
57 |                                       'pos_block': dict(decay_mult=0.),
58 |                                       'norm': dict(decay_mult=0.),
59 |                                       'head': dict(lr_mult=10.)
60 |                                   })
61 | 
62 | optimizer = dict(
63 |     type='CustomAdamW',
64 |     lr=0.00006,
65 |     betas=(0.9, 0.999),
66 |     weight_decay=0.01,
67 | )
68 | 
69 | max_iter = 160000
70 | eval_interval = 8000
71 | checkpoint_interval = 8000
72 | 
73 | scheduler = dict(type='PolyLR',
74 |                  warmup='linear',
75 |                  warmup_iters=1500,
76 |                  warmup_ratio=1e-6,
77 |                  max_steps=max_iter,
78 |                  power=1.0,
79 |                  min_lr=0)
80 | 


--------------------------------------------------------------------------------
/project/segnext/large/segnext_large_896x896_isaid_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/isaid_869x869.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_l.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[64, 128, 320, 512],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.3,
16 |                   depths=[3, 5, 27, 3]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[128, 320, 512],
19 |                      in_index=[1, 2, 3],
20 |                      channels=1024,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=16,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=1024),
29 |     # model training and testing settings
30 |     train_cfg=dict(),
31 |     test_cfg=dict(mode='whole'))
32 | 
33 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
34 |                     std=[58.395, 57.12, 57.375],
35 |                     to_rgb=True)
36 | test_pipeline = [
37 |     dict(type='LoadImageFromFile'),
38 |     dict(
39 |         type='MultiScaleFlipAug',
40 |         img_scale=(896, 896),
41 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
42 |         flip=False,
43 |         transforms=[
44 |             dict(type='Resize', keep_ratio=True),
45 |             dict(type='ResizeToMultiple', size_divisor=32),
46 |             dict(type='RandomFlip'),
47 |             dict(type='Normalize', **img_norm_cfg),
48 |             dict(type='ImageToTensor', keys=['img']),
49 |             dict(type='Collect', keys=['img']),
50 |         ])
51 | ]
52 | dataset = dict(
53 |     val=dict(pipeline=test_pipeline))
54 | 
55 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
56 |                                   custom_keys={
57 |                                       'pos_block': dict(decay_mult=0.),
58 |                                       'norm': dict(decay_mult=0.),
59 |                                       'head': dict(lr_mult=10.)
60 |                                   })
61 | 
62 | optimizer = dict(
63 |     type='CustomAdamW',
64 |     lr=0.00006,
65 |     betas=(0.9, 0.999),
66 |     weight_decay=0.01,
67 | )
68 | 
69 | max_iter = 160000
70 | eval_interval = 8000
71 | checkpoint_interval = 8000
72 | 
73 | scheduler = dict(type='PolyLR',
74 |                  warmup='linear',
75 |                  warmup_iters=1500,
76 |                  warmup_ratio=1e-6,
77 |                  max_steps=max_iter,
78 |                  power=1.0,
79 |                  min_lr=0)
80 | 


--------------------------------------------------------------------------------
/project/segnext/resources/flops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jittor/JSeg/c14696dc4fa6e822fd15b7add2d07067ecb95943/project/segnext/resources/flops.png


--------------------------------------------------------------------------------
/project/segnext/small/segnext_small_1024x1024_cityscapes_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/cityscapes_1024x1024.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_s.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[64, 128, 320, 512],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.1,
16 |                   depths=[2, 2, 4, 2]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[128, 320, 512],
19 |                      in_index=[1, 2, 3],
20 |                      channels=256,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=19,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=256,
29 |                      ham_kwargs=dict(MD_R=16)),
30 |     # model training and testing settings
31 |     train_cfg=dict(),
32 |     # test_cfg=dict(mode='whole'))
33 |     test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768)))
34 | 
35 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
36 |                     std=[58.395, 57.12, 57.375],
37 |                     to_rgb=True)
38 | test_pipeline = [
39 |     dict(type='LoadImageFromFile'),
40 |     dict(
41 |         type='MultiScaleFlipAug',
42 |         img_scale=(2048, 1024),
43 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
44 |         flip=False,
45 |         transforms=[
46 |             dict(type='Resize', keep_ratio=True),
47 |             dict(type='ResizeToMultiple', size_divisor=32),
48 |             dict(type='RandomFlip'),
49 |             dict(type='Normalize', **img_norm_cfg),
50 |             dict(type='ImageToTensor', keys=['img']),
51 |             dict(type='Collect', keys=['img']),
52 |         ])
53 | ]
54 | dataset = dict(
55 |     val=dict(pipeline=test_pipeline))
56 | 
57 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
58 |                                   custom_keys={
59 |                                       'pos_block': dict(decay_mult=0.),
60 |                                       'norm': dict(decay_mult=0.),
61 |                                       'head': dict(lr_mult=10.)
62 |                                   })
63 | 
64 | optimizer = dict(
65 |     type='CustomAdamW',
66 |     lr=0.00006,
67 |     betas=(0.9, 0.999),
68 |     weight_decay=0.01,
69 | )
70 | 
71 | max_iter = 160000
72 | eval_interval = 8000
73 | checkpoint_interval = 8000
74 | 
75 | scheduler = dict(type='PolyLR',
76 |                  warmup='linear',
77 |                  warmup_iters=1500,
78 |                  warmup_ratio=1e-6,
79 |                  max_steps=max_iter,
80 |                  power=1.0,
81 |                  min_lr=0)
82 | 


--------------------------------------------------------------------------------
/project/segnext/small/segnext_small_512x512_ade_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/ade20k.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_s.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[64, 128, 320, 512],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.1,
16 |                   depths=[2, 2, 4, 2]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[128, 320, 512],
19 |                      in_index=[1, 2, 3],
20 |                      channels=256,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=150,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=256,
29 |                      ham_kwargs=dict(MD_R=16)),
30 |     # model training and testing settings
31 |     train_cfg=dict(),
32 |     test_cfg=dict(mode='whole'))
33 | 
34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
35 |                     std=[58.395, 57.12, 57.375],
36 |                     to_rgb=True)
37 | test_pipeline = [
38 |     dict(type='LoadImageFromFile'),
39 |     dict(
40 |         type='MultiScaleFlipAug',
41 |         img_scale=(2048, 512),
42 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
43 |         flip=False,
44 |         transforms=[
45 |             dict(type='Resize', keep_ratio=True),
46 |             dict(type='ResizeToMultiple', size_divisor=32),
47 |             dict(type='RandomFlip'),
48 |             dict(type='Normalize', **img_norm_cfg),
49 |             dict(type='ImageToTensor', keys=['img']),
50 |             dict(type='Collect', keys=['img']),
51 |         ])
52 | ]
53 | dataset = dict(
54 |     val=dict(pipeline=test_pipeline))
55 | 
56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
57 |                                   custom_keys={
58 |                                       'pos_block': dict(decay_mult=0.),
59 |                                       'norm': dict(decay_mult=0.),
60 |                                       'head': dict(lr_mult=10.)
61 |                                   })
62 | 
63 | optimizer = dict(
64 |     type='CustomAdamW',
65 |     lr=0.00006,
66 |     betas=(0.9, 0.999),
67 |     weight_decay=0.01,
68 | )
69 | 
70 | max_iter = 160000
71 | eval_interval = 8000
72 | checkpoint_interval = 8000
73 | 
74 | scheduler = dict(type='PolyLR',
75 |                  warmup='linear',
76 |                  warmup_iters=1500,
77 |                  warmup_ratio=1e-6,
78 |                  max_steps=max_iter,
79 |                  power=1.0,
80 |                  min_lr=0)
81 | 


--------------------------------------------------------------------------------
/project/segnext/small/segnext_small_896x896_isaid_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/isaid_869x869.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_s.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[64, 128, 320, 512],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.1,
16 |                   depths=[2, 2, 4, 2]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[128, 320, 512],
19 |                      in_index=[1, 2, 3],
20 |                      channels=256,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=16,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=256,
29 |                      ham_kwargs=dict(MD_R=16)),
30 |     # model training and testing settings
31 |     train_cfg=dict(),
32 |     test_cfg=dict(mode='whole'))
33 | 
34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
35 |                     std=[58.395, 57.12, 57.375],
36 |                     to_rgb=True)
37 | test_pipeline = [
38 |     dict(type='LoadImageFromFile'),
39 |     dict(
40 |         type='MultiScaleFlipAug',
41 |         img_scale=(896, 896),
42 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
43 |         flip=False,
44 |         transforms=[
45 |             dict(type='Resize', keep_ratio=True),
46 |             dict(type='ResizeToMultiple', size_divisor=32),
47 |             dict(type='RandomFlip'),
48 |             dict(type='Normalize', **img_norm_cfg),
49 |             dict(type='ImageToTensor', keys=['img']),
50 |             dict(type='Collect', keys=['img']),
51 |         ])
52 | ]
53 | dataset = dict(
54 |     val=dict(pipeline=test_pipeline))
55 | 
56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
57 |                                   custom_keys={
58 |                                       'pos_block': dict(decay_mult=0.),
59 |                                       'norm': dict(decay_mult=0.),
60 |                                       'head': dict(lr_mult=10.)
61 |                                   })
62 | 
63 | optimizer = dict(
64 |     type='CustomAdamW',
65 |     lr=0.00006,
66 |     betas=(0.9, 0.999),
67 |     weight_decay=0.01,
68 | )
69 | 
70 | max_iter = 160000
71 | eval_interval = 8000
72 | checkpoint_interval = 8000
73 | 
74 | scheduler = dict(type='PolyLR',
75 |                  warmup='linear',
76 |                  warmup_iters=1500,
77 |                  warmup_ratio=1e-6,
78 |                  max_steps=max_iter,
79 |                  power=1.0,
80 |                  min_lr=0)
81 | 


--------------------------------------------------------------------------------
/project/segnext/tiny/segnext_tiny_1024x1024_cityscapes_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/cityscapes_1024x1024.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_t.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[32, 64, 160, 256],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.1,
16 |                   depths=[3, 3, 5, 2]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[64, 160, 256],
19 |                      in_index=[1, 2, 3],
20 |                      channels=256,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=19,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=256,
29 |                      ham_kwargs=dict(MD_R=16)),
30 |     # model training and testing settings
31 |     train_cfg=dict(),
32 |     # test_cfg=dict(mode='whole'))
33 |     test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768)))
34 | 
35 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
36 |                     std=[58.395, 57.12, 57.375],
37 |                     to_rgb=True)
38 | test_pipeline = [
39 |     dict(type='LoadImageFromFile'),
40 |     dict(
41 |         type='MultiScaleFlipAug',
42 |         img_scale=(2048, 1024),
43 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
44 |         flip=False,
45 |         transforms=[
46 |             dict(type='Resize', keep_ratio=True),
47 |             dict(type='ResizeToMultiple', size_divisor=32),
48 |             dict(type='RandomFlip'),
49 |             dict(type='Normalize', **img_norm_cfg),
50 |             dict(type='ImageToTensor', keys=['img']),
51 |             dict(type='Collect', keys=['img']),
52 |         ])
53 | ]
54 | dataset = dict(
55 |     val=dict(pipeline=test_pipeline))
56 | 
57 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
58 |                                   custom_keys={
59 |                                       'pos_block': dict(decay_mult=0.),
60 |                                       'norm': dict(decay_mult=0.),
61 |                                       'head': dict(lr_mult=10.)
62 |                                   })
63 | 
64 | optimizer = dict(
65 |     type='CustomAdamW',
66 |     lr=0.00006,
67 |     betas=(0.9, 0.999),
68 |     weight_decay=0.01,
69 | )
70 | 
71 | max_iter = 160000
72 | eval_interval = 8000
73 | checkpoint_interval = 8000
74 | 
75 | scheduler = dict(type='PolyLR',
76 |                  warmup='linear',
77 |                  warmup_iters=1500,
78 |                  warmup_ratio=1e-6,
79 |                  max_steps=max_iter,
80 |                  power=1.0,
81 |                  min_lr=0)
82 | 


--------------------------------------------------------------------------------
/project/segnext/tiny/segnext_tiny_512x512_ade_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/ade20k.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_t.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[32, 64, 160, 256],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.1,
16 |                   depths=[3, 3, 5, 2]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[64, 160, 256],
19 |                      in_index=[1, 2, 3],
20 |                      channels=256,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=150,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=256,
29 |                      ham_kwargs=dict(MD_R=16)),
30 |     # model training and testing settings
31 |     train_cfg=dict(),
32 |     test_cfg=dict(mode='whole'))
33 | 
34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
35 |                     std=[58.395, 57.12, 57.375],
36 |                     to_rgb=True)
37 | test_pipeline = [
38 |     dict(type='LoadImageFromFile'),
39 |     dict(
40 |         type='MultiScaleFlipAug',
41 |         img_scale=(2048, 512),
42 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
43 |         flip=False,
44 |         transforms=[
45 |             dict(type='Resize', keep_ratio=True),
46 |             dict(type='ResizeToMultiple', size_divisor=32),
47 |             dict(type='RandomFlip'),
48 |             dict(type='Normalize', **img_norm_cfg),
49 |             dict(type='ImageToTensor', keys=['img']),
50 |             dict(type='Collect', keys=['img']),
51 |         ])
52 | ]
53 | dataset = dict(
54 |     val=dict(pipeline=test_pipeline))
55 | 
56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
57 |                                   custom_keys={
58 |                                       'pos_block': dict(decay_mult=0.),
59 |                                       'norm': dict(decay_mult=0.),
60 |                                       'head': dict(lr_mult=10.)
61 |                                   })
62 | 
63 | optimizer = dict(
64 |     type='CustomAdamW',
65 |     lr=0.00006,
66 |     betas=(0.9, 0.999),
67 |     weight_decay=0.01,
68 | )
69 | 
70 | max_iter = 160000
71 | eval_interval = 8000
72 | checkpoint_interval = 8000
73 | 
74 | scheduler = dict(type='PolyLR',
75 |                  warmup='linear',
76 |                  warmup_iters=1500,
77 |                  warmup_ratio=1e-6,
78 |                  max_steps=max_iter,
79 |                  power=1.0,
80 |                  min_lr=0)
81 | 


--------------------------------------------------------------------------------
/project/segnext/tiny/segnext_tiny_896x896_isaid_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/isaid_869x869.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='GN', num_groups=32)
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://mscan_t.pkl',
11 |     backbone=dict(type='MSCAN',
12 |                   embed_dims=[32, 64, 160, 256],
13 |                   mlp_ratios=[8, 8, 4, 4],
14 |                   drop_rate=0.0,
15 |                   drop_path_rate=0.1,
16 |                   depths=[3, 3, 5, 2]),
17 |     decode_head=dict(type='LightHamHead',
18 |                      in_channels=[64, 160, 256],
19 |                      in_index=[1, 2, 3],
20 |                      channels=256,
21 |                      dropout_ratio=0.1,
22 |                      num_classes=16,
23 |                      norm_cfg=norm_cfg,
24 |                      align_corners=False,
25 |                      loss_decode=dict(type='CrossEntropyLoss',
26 |                                       use_sigmoid=False,
27 |                                       loss_weight=1.0),
28 |                      ham_channels=256,
29 |                      ham_kwargs=dict(MD_R=16)),
30 |     # model training and testing settings
31 |     train_cfg=dict(),
32 |     test_cfg=dict(mode='whole'))
33 | 
34 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
35 |                     std=[58.395, 57.12, 57.375],
36 |                     to_rgb=True)
37 | test_pipeline = [
38 |     dict(type='LoadImageFromFile'),
39 |     dict(
40 |         type='MultiScaleFlipAug',
41 |         img_scale=(896, 896),
42 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
43 |         flip=False,
44 |         transforms=[
45 |             dict(type='Resize', keep_ratio=True),
46 |             dict(type='ResizeToMultiple', size_divisor=32),
47 |             dict(type='RandomFlip'),
48 |             dict(type='Normalize', **img_norm_cfg),
49 |             dict(type='ImageToTensor', keys=['img']),
50 |             dict(type='Collect', keys=['img']),
51 |         ])
52 | ]
53 | dataset = dict(
54 |     val=dict(pipeline=test_pipeline))
55 | 
56 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
57 |                                   custom_keys={
58 |                                       'pos_block': dict(decay_mult=0.),
59 |                                       'norm': dict(decay_mult=0.),
60 |                                       'head': dict(lr_mult=10.)
61 |                                   })
62 | 
63 | optimizer = dict(
64 |     type='CustomAdamW',
65 |     lr=0.00006,
66 |     betas=(0.9, 0.999),
67 |     weight_decay=0.01,
68 | )
69 | 
70 | max_iter = 160000
71 | eval_interval = 8000
72 | checkpoint_interval = 8000
73 | 
74 | scheduler = dict(type='PolyLR',
75 |                  warmup='linear',
76 |                  warmup_iters=1500,
77 |                  warmup_ratio=1e-6,
78 |                  max_steps=max_iter,
79 |                  power=1.0,
80 |                  min_lr=0)
81 | 


--------------------------------------------------------------------------------
/project/swin/readme.md:
--------------------------------------------------------------------------------
1 | # swin


--------------------------------------------------------------------------------
/project/swin/tiny/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/datasets/ade20k.py',
 3 |     '../../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | backbone_norm_cfg = dict(type='LN')
 9 | model = dict(
10 |     type='EncoderDecoder',
11 |     pretrained='jittorhub://swin_tiny_patch4_window7_224.pkl',
12 |     backbone=dict(type='SwinTransformer',
13 |                   pretrain_img_size=224,
14 |                   embed_dims=96,
15 |                   patch_size=4,
16 |                   window_size=7,
17 |                   mlp_ratio=4,
18 |                   depths=[2, 2, 6, 2],
19 |                   num_heads=[3, 6, 12, 24],
20 |                   strides=(4, 2, 2, 2),
21 |                   out_indices=(0, 1, 2, 3),
22 |                   qkv_bias=True,
23 |                   qk_scale=None,
24 |                   patch_norm=True,
25 |                   drop_rate=0.,
26 |                   attn_drop_rate=0.,
27 |                   drop_path_rate=0.3,
28 |                   use_abs_pos_embed=False,
29 |                   act_cfg=dict(type='GELU'),
30 |                   norm_cfg=backbone_norm_cfg),
31 |     decode_head=dict(type='UPerHead',
32 |                      in_channels=[96, 192, 384, 768],
33 |                      in_index=[0, 1, 2, 3],
34 |                      pool_scales=(1, 2, 3, 6),
35 |                      channels=512,
36 |                      dropout_ratio=0.1,
37 |                      num_classes=150,
38 |                      norm_cfg=norm_cfg,
39 |                      align_corners=False,
40 |                      loss_decode=dict(type='CrossEntropyLoss',
41 |                                       use_sigmoid=False,
42 |                                       loss_weight=1.0)),
43 |     auxiliary_head=dict(type='FCNHead',
44 |                         in_channels=384,
45 |                         in_index=2,
46 |                         channels=256,
47 |                         num_convs=1,
48 |                         concat_input=False,
49 |                         dropout_ratio=0.1,
50 |                         num_classes=150,
51 |                         norm_cfg=norm_cfg,
52 |                         align_corners=False,
53 |                         loss_decode=dict(type='CrossEntropyLoss',
54 |                                          use_sigmoid=False,
55 |                                          loss_weight=0.4)),
56 |     # model training and testing settings
57 |     train_cfg=dict(),
58 |     test_cfg=dict(mode='whole'))
59 | 
60 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
61 |                                   custom_keys={
62 |                                       'absolute_pos_embed':
63 |                                       dict(decay_mult=0.),
64 |                                       'relative_position_bias_table':
65 |                                       dict(decay_mult=0.),
66 |                                       'norm':
67 |                                       dict(decay_mult=0.)
68 |                                   })
69 | 
70 | optimizer = dict(
71 |     type='CustomAdamW',
72 |     lr=0.00006,
73 |     betas=(0.9, 0.999),
74 |     weight_decay=0.01,
75 | )
76 | 
77 | max_iter = 160000
78 | eval_interval = 8000
79 | checkpoint_interval = 8000
80 | 
81 | scheduler = dict(type='PolyLR',
82 |                  warmup='linear',
83 |                  warmup_iters=1500,
84 |                  warmup_ratio=1e-6,
85 |                  max_steps=max_iter,
86 |                  power=1.0,
87 |                  min_lr=0)
88 | 


--------------------------------------------------------------------------------
/project/upernet/readme.md:
--------------------------------------------------------------------------------
1 | # upernet


--------------------------------------------------------------------------------
/project/upernet/upernet_r50_512x512_ade20k_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://resnet50_v1c-2cccc1ad.pkl',
11 |     backbone=dict(type='ResNetV1c',
12 |                   depth=50,
13 |                   num_stages=4,
14 |                   out_indices=(0, 1, 2, 3),
15 |                   dilations=(1, 1, 1, 1),
16 |                   strides=(1, 2, 2, 2),
17 |                   norm_cfg=norm_cfg,
18 |                   norm_eval=False,
19 |                   contract_dilation=True),
20 |     decode_head=dict(type='UPerHead',
21 |                      in_channels=[256, 512, 1024, 2048],
22 |                      in_index=[0, 1, 2, 3],
23 |                      pool_scales=(1, 2, 3, 6),
24 |                      channels=512,
25 |                      dropout_ratio=0.1,
26 |                      num_classes=150,
27 |                      norm_cfg=norm_cfg,
28 |                      align_corners=False,
29 |                      loss_decode=dict(type='CrossEntropyLoss',
30 |                                       use_sigmoid=False,
31 |                                       loss_weight=1.0)),
32 |     auxiliary_head=dict(type='FCNHead',
33 |                         in_channels=1024,
34 |                         in_index=2,
35 |                         channels=256,
36 |                         num_convs=1,
37 |                         concat_input=False,
38 |                         dropout_ratio=0.1,
39 |                         num_classes=150,
40 |                         norm_cfg=norm_cfg,
41 |                         align_corners=False,
42 |                         loss_decode=dict(type='CrossEntropyLoss',
43 |                                          use_sigmoid=False,
44 |                                          loss_weight=0.4)),
45 |     # model training and testing settings
46 |     train_cfg=dict(),
47 |     test_cfg=dict(mode='whole'))
48 | 
49 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
50 | 
51 | max_iter = 160000
52 | eval_interval = 8000
53 | checkpoint_interval = 8000
54 | 
55 | scheduler = dict(type='PolyLR', max_steps=max_iter, power=0.9, min_lr=1e-4)
56 | 


--------------------------------------------------------------------------------
/project/vit/readme.md:
--------------------------------------------------------------------------------
1 | # Vision Transformer


--------------------------------------------------------------------------------
/project/vit/upernet_vit-b16_ln_mln_512x512_ade20k_160k.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/ade20k.py',
 3 |     '../_base_/default_runtime.py',
 4 | ]
 5 | 
 6 | # model settings
 7 | norm_cfg = dict(type='BN')
 8 | model = dict(
 9 |     type='EncoderDecoder',
10 |     pretrained='jittorhub://vit_base_p16_224-80ecf9dd.pkl',
11 |     backbone=dict(
12 |         type='VisionTransformer',
13 |         img_size=(512, 512),
14 |         patch_size=16,
15 |         in_channels=3,
16 |         embed_dims=768,
17 |         num_layers=12,
18 |         num_heads=12,
19 |         mlp_ratio=4,
20 |         out_indices=(2, 5, 8, 11),
21 |         qkv_bias=True,
22 |         drop_rate=0.0,
23 |         attn_drop_rate=0.0,
24 |         drop_path_rate=0,
25 |         with_cls_token=True,
26 |         norm_cfg=dict(type='LN', eps=1e-6),
27 |         act_cfg=dict(type='GELU'),
28 |         norm_eval=False,
29 |         final_norm=True,
30 |         interpolate_mode='bicubic'),
31 |     neck=dict(
32 |         type='MultiLevelNeck',
33 |         in_channels=[768, 768, 768, 768],
34 |         out_channels=768,
35 |         scales=[4, 2, 1, 0.5]),
36 |     decode_head=dict(
37 |         type='UPerHead',
38 |         in_channels=[768, 768, 768, 768],
39 |         in_index=[0, 1, 2, 3],
40 |         pool_scales=(1, 2, 3, 6),
41 |         channels=512,
42 |         dropout_ratio=0.1,
43 |         num_classes=150,
44 |         norm_cfg=norm_cfg,
45 |         align_corners=False,
46 |         loss_decode=dict(
47 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
48 |     auxiliary_head=dict(
49 |         type='FCNHead',
50 |         in_channels=768,
51 |         in_index=3,
52 |         channels=256,
53 |         num_convs=1,
54 |         concat_input=False,
55 |         dropout_ratio=0.1,
56 |         num_classes=150,
57 |         norm_cfg=norm_cfg,
58 |         align_corners=False,
59 |         loss_decode=dict(
60 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
61 |     # model training and testing settings
62 |     train_cfg=dict(),
63 |     test_cfg=dict(mode='whole'))  # yapf: disable
64 | 
65 | parameter_groups_generator = dict(type="CustomPrameterGroupsGenerator",
66 |                                   custom_keys={
67 |                                       'pos_embed': dict(decay_mult=0.),
68 |                                       'cls_token': dict(decay_mult=0.),
69 |                                       'norm': dict(decay_mult=0.)
70 |                                   })
71 | 
72 | optimizer = dict(
73 |     type='CustomAdamW',
74 |     lr=0.0001,
75 |     betas=(0.9, 0.999),
76 |     weight_decay=0.05,
77 | )
78 | 
79 | max_iter = 160000
80 | eval_interval = 8000
81 | checkpoint_interval = 8000
82 | 
83 | scheduler = dict(type='PolyLR',
84 |                  warmup='linear',
85 |                  warmup_iters=1500,
86 |                  warmup_ratio=1e-6,
87 |                  max_steps=max_iter,
88 |                  power=1.0,
89 |                  min_lr=0)
90 | 


--------------------------------------------------------------------------------
/python/jseg/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import models
 2 | from . import runner
 3 | from . import config
 4 | from . import datasets
 5 | from . import ops
 6 | from . import utils
 7 | from . import optims
 8 | from . import sampler
 9 | 
10 | # version must use ' instead of "
11 | __version__ = '0.1.0.0'
12 | 


--------------------------------------------------------------------------------
/python/jseg/bricks/__init__.py:
--------------------------------------------------------------------------------
1 | from .conv import build_conv_layer
2 | from .activation import build_activation_layer
3 | from .norm import build_norm_layer
4 | from .drop import build_dropout
5 | from .padding import build_padding_layer
6 | from .conv_module import ConvModule
7 | from .depthwise_separable_conv_module import DepthwiseSeparableConvModule
8 | 


--------------------------------------------------------------------------------
/python/jseg/bricks/activation.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from jittor import nn
 3 | 
 4 | from jseg.utils.registry import ACTIVATION_LAYERS, build_from_cfg
 5 | 
 6 | for module in [
 7 |         nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.ReLU6, nn.ELU, nn.Sigmoid, nn.Tanh
 8 | ]:
 9 |     if module.__name__ == 'relu':
10 |         ACTIVATION_LAYERS.register_module(name='ReLU', module=module)
11 |     elif module.__name__ == 'relu6':
12 |         ACTIVATION_LAYERS.register_module(name='ReLU6', module=module)
13 |     else:
14 |         ACTIVATION_LAYERS.register_module(module=module)
15 | 
16 | 
17 | @ACTIVATION_LAYERS.register_module()
18 | class HSigmoid(nn.Module):
19 | 
20 |     def __init__(self, bias=3.0, divisor=6.0, min_value=0.0, max_value=1.0):
21 |         super().__init__()
22 |         self.bias = bias
23 |         self.divisor = divisor
24 |         assert self.divisor != 0
25 |         self.min_value = min_value
26 |         self.max_value = max_value
27 | 
28 |     def execute(self, x):
29 |         x = (x + self.bias) / self.divisor
30 | 
31 |         return x.clamp_(self.min_value, self.max_value)
32 | 
33 | 
34 | @ACTIVATION_LAYERS.register_module()
35 | class HSwish(nn.Module):
36 | 
37 |     def __init__(self):
38 |         super().__init__()
39 |         self.act = nn.ReLU6()
40 | 
41 |     def execute(self, x):
42 |         return x * self.act(x + 3) / 6
43 | 
44 | 
45 | @ACTIVATION_LAYERS.register_module(name='Clip')
46 | @ACTIVATION_LAYERS.register_module()
47 | class Clamp(nn.Module):
48 | 
49 |     def __init__(self, min=-1., max=1.):
50 |         super().__init__()
51 |         self.min = min
52 |         self.max = max
53 | 
54 |     def execute(self, x):
55 |         return jt.clamp(x, min_v=self.min, max_v=self.max)
56 | 
57 | 
58 | class GELU(nn.Module):
59 | 
60 |     def execute(self, input):
61 |         return nn.gelu(input)
62 | 
63 | 
64 | ACTIVATION_LAYERS.register_module(module=GELU)
65 | 
66 | 
67 | def build_activation_layer(cfg):
68 |     return build_from_cfg(cfg, ACTIVATION_LAYERS)
69 | 


--------------------------------------------------------------------------------
/python/jseg/bricks/conv.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from jittor import nn
 3 | from jseg.utils.registry import CONV_LAYERS
 4 | from typing import Tuple, Union
 5 | 
 6 | CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d)
 7 | CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d)
 8 | CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d)
 9 | CONV_LAYERS.register_module('Conv', module=nn.Conv2d)
10 | 
11 | 
12 | class Conv2dAdaptivePadding(nn.Conv2d):
13 | 
14 |     def __init__(self,
15 |                  in_channels: int,
16 |                  out_channels: int,
17 |                  kernel_size: Union[int, Tuple[int, int]],
18 |                  stride: Union[int, Tuple[int, int]] = 1,
19 |                  padding: Union[int, Tuple[int, int]] = 0,
20 |                  dilation: Union[int, Tuple[int, int]] = 1,
21 |                  groups: int = 1,
22 |                  bias: bool = True):
23 |         super().__init__(in_channels, out_channels, kernel_size, stride, 0,
24 |                          dilation, groups, bias)
25 | 
26 |     def execute(self, x):
27 |         img_h, img_w = x.size()[-2:]
28 |         kernel_h, kernel_w = self.weight.size()[-2:]
29 |         stride_h, stride_w = self.stride
30 |         output_h = math.ceil(img_h / stride_h)
31 |         output_w = math.ceil(img_w / stride_w)
32 |         pad_h = (max((output_h - 1) * self.stride[0] +
33 |                      (kernel_h - 1) * self.dilation[0] + 1 - img_h, 0))
34 |         pad_w = (max((output_w - 1) * self.stride[1] +
35 |                      (kernel_w - 1) * self.dilation[1] + 1 - img_w, 0))
36 |         if pad_h > 0 or pad_w > 0:
37 |             x = nn.pad(x, [
38 |                 pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2
39 |             ])
40 |         return nn.conv2d(x, self.weight, self.bias, self.stride, self.padding,
41 |                          self.dilation, self.groups)
42 | 
43 | 
44 | CONV_LAYERS.register_module('Conv2dAdaptivePadding',
45 |                             module=Conv2dAdaptivePadding)
46 | 
47 | 
48 | def build_conv_layer(cfg, *args, **kwargs):
49 |     """Build convolution layer.
50 | 
51 |     Args:
52 |         cfg (None or dict): The conv layer config, which should contain:
53 |             - type (str): Layer type.
54 |             - layer args: Args needed to instantiate an conv layer.
55 |         args (argument list): Arguments passed to the `__init__`
56 |             method of the corresponding conv layer.
57 |         kwargs (keyword arguments): Keyword arguments passed to the `__init__`
58 |             method of the corresponding conv layer.
59 | 
60 |     Returns:
61 |         nn.Module: Created conv layer.
62 |     """
63 |     if cfg is None:
64 |         cfg_ = dict(type='Conv2d')
65 |     else:
66 |         if not isinstance(cfg, dict):
67 |             raise TypeError('cfg must be a dict')
68 |         if 'type' not in cfg:
69 |             raise KeyError('the cfg dict must contain the key "type"')
70 |         cfg_ = cfg.copy()
71 | 
72 |     layer_type = cfg_.pop('type')
73 |     conv_layer = CONV_LAYERS.get(layer_type)
74 | 
75 |     layer = conv_layer(*args, **kwargs, **cfg_)
76 | 
77 |     return layer
78 | 


--------------------------------------------------------------------------------
/python/jseg/bricks/depthwise_separable_conv_module.py:
--------------------------------------------------------------------------------
 1 | from jittor import nn
 2 | 
 3 | from .conv_module import ConvModule
 4 | 
 5 | 
 6 | class DepthwiseSeparableConvModule(nn.Module):
 7 |     def __init__(self,
 8 |                  in_channels,
 9 |                  out_channels,
10 |                  kernel_size,
11 |                  stride=1,
12 |                  padding=0,
13 |                  dilation=1,
14 |                  norm_cfg=None,
15 |                  act_cfg=dict(type='ReLU'),
16 |                  dw_norm_cfg='default',
17 |                  dw_act_cfg='default',
18 |                  pw_norm_cfg='default',
19 |                  pw_act_cfg='default',
20 |                  **kwargs):
21 |         super().__init__()
22 |         assert 'groups' not in kwargs, 'groups should not be specified'
23 | 
24 |         # if norm/activation config of depthwise/pointwise ConvModule is not
25 |         # specified, use default config.
26 |         dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg
27 |         dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg
28 |         pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg
29 |         pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg
30 | 
31 |         # depthwise convolution
32 |         self.depthwise_conv = ConvModule(
33 |             in_channels,
34 |             in_channels,
35 |             kernel_size,
36 |             stride=stride,
37 |             padding=padding,
38 |             dilation=dilation,
39 |             groups=in_channels,
40 |             norm_cfg=dw_norm_cfg,
41 |             act_cfg=dw_act_cfg,
42 |             **kwargs)
43 | 
44 |         self.pointwise_conv = ConvModule(
45 |             in_channels,
46 |             out_channels,
47 |             1,
48 |             norm_cfg=pw_norm_cfg,
49 |             act_cfg=pw_act_cfg,
50 |             **kwargs)
51 | 
52 |     def execute(self, x):
53 |         x = self.depthwise_conv(x)
54 |         x = self.pointwise_conv(x)
55 |         return x
56 | 


--------------------------------------------------------------------------------
/python/jseg/bricks/drop.py:
--------------------------------------------------------------------------------
 1 | from jittor.nn import Dropout, DropPath
 2 | from jseg.utils.registry import DROPOUT_LAYERS, build_from_cfg
 3 | 
 4 | DROPOUT_LAYERS.register_module(name='Dropout', module=Dropout)
 5 | DROPOUT_LAYERS.register_module(name='DropPath', module=DropPath)
 6 | 
 7 | 
 8 | def build_dropout(cfg, **default_args):
 9 |     """Builder for drop out layers."""
10 |     return build_from_cfg(cfg, DROPOUT_LAYERS, **default_args)
11 | 


--------------------------------------------------------------------------------
/python/jseg/bricks/padding.py:
--------------------------------------------------------------------------------
 1 | from jittor import nn
 2 | from jseg.utils.registry import PADDING_LAYERS
 3 | 
 4 | PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d)
 5 | PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d)
 6 | PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d)
 7 | 
 8 | 
 9 | def build_padding_layer(cfg, *args, **kwargs):
10 |     if not isinstance(cfg, dict):
11 |         raise TypeError('cfg must be a dict')
12 |     if 'type' not in cfg:
13 |         raise KeyError('the cfg dict must contain the key "type"')
14 | 
15 |     cfg_ = cfg.copy()
16 |     padding_type = cfg_.pop('type')
17 |     padding_layer = PADDING_LAYERS.get(padding_type)
18 | 
19 |     layer = padding_layer(*args, **kwargs, **cfg_)
20 | 
21 |     return layer
22 | 


--------------------------------------------------------------------------------
/python/jseg/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import *
2 | 


--------------------------------------------------------------------------------
/python/jseg/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .custom import CustomDataset
 2 | from .isaid import iSAIDDataset
 3 | from .ade import ADE20KDataset
 4 | from .voc import PascalVOCDataset
 5 | from .cityscapes import CityscapesDataset
 6 | from .loveda import LoveDADataset
 7 | from .isprs import ISPRSDataset
 8 | from .potsdam import PotsdamDataset
 9 | from .zero_voc12 import ZeroPascalVOCDataset20
10 | from .zero_coco_stuff import ZeroCOCOStuffDataset
11 | 
12 | __all__ = [
13 |     'CustomDataset', 'iSAIDDataset', 'ADE20KDataset', 'PascalVOCDataset',
14 |     'CityscapesDataset', 'LoveDADataset', 'ISPRSDataset', 'PotsdamDataset',
15 |     'ZeroPascalVOCDataset20', 'ZeroCOCOStuffDataset'
16 | ]
17 | 


--------------------------------------------------------------------------------
/python/jseg/datasets/isaid.py:
--------------------------------------------------------------------------------
 1 | from jseg.utils.registry import DATASETS
 2 | from .custom import CustomDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module()
 6 | class iSAIDDataset(CustomDataset):
 7 |     CLASSES = ('background', 'ship', 'store_tank', 'baseball_diamond',
 8 |                'tennis_court', 'basketball_court', 'Ground_Track_Field',
 9 |                'Bridge', 'Large_Vehicle', 'Small_Vehicle', 'Helicopter',
10 |                'Swimming_pool', 'Roundabout', 'Soccer_ball_field', 'plane',
11 |                'Harbor')
12 |     PALETTE = [[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127],
13 |                [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, 127],
14 |                [0, 0, 127], [0, 0, 191], [0, 0, 255], [0, 191, 127],
15 |                [0, 127, 191], [0, 127, 255], [0, 100, 155]]
16 | 
17 |     def __init__(self, **kwargs):
18 |         super(iSAIDDataset,
19 |               self).__init__(img_suffix='.png',
20 |                              seg_map_suffix='_instance_color_RGB.png',
21 |                              **kwargs)
22 | 


--------------------------------------------------------------------------------
/python/jseg/datasets/isprs.py:
--------------------------------------------------------------------------------
 1 | from jseg.utils.registry import DATASETS
 2 | from .custom import CustomDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module()
 6 | class ISPRSDataset(CustomDataset):
 7 |     """ISPRS dataset.
 8 |     In segmentation map annotation for LoveDA, 0 is the ignore index.
 9 |     ``reduce_zero_label`` should be set to True. The ``img_suffix`` and
10 |     ``seg_map_suffix`` are both fixed to '.png'.
11 |     """
12 |     CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree',
13 |                'car', 'clutter')
14 | 
15 |     PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0],
16 |                [255, 255, 0], [255, 0, 0]]
17 | 
18 |     def __init__(self, **kwargs):
19 |         super(ISPRSDataset, self).__init__(
20 |             img_suffix='.png',
21 |             seg_map_suffix='.png',
22 |             reduce_zero_label=True,
23 |             **kwargs)
24 | 


--------------------------------------------------------------------------------
/python/jseg/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .compose import Compose
 2 | from .formating import (Collect, to_tensor)
 3 | from .loading import LoadAnnotations, LoadImageFromFile
 4 | from .test_time_aug import MultiScaleFlipAug
 5 | from .transforms import (ResizeToMultiple, CLAHE, Normalize, Pad,
 6 |                          PhotoMetricDistortion, RandomCrop, RandomFlip,
 7 |                          RandomRotate, Rerange, Resize, RGB2Gray, SegRescale)
 8 | from .utils import imread, imresize, imrescale, imflip, impad_to_multiple, impad, imnormalize, imrotate, clahe, bgr2hsv, hsv2bgr
 9 | 
10 | __all__ = [
11 |     'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'Transpose',
12 |     'Collect', 'LoadAnnotations', 'LoadImageFromFile', 'MultiScaleFlipAug',
13 |     'ResizeToMultiple', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop',
14 |     'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate',
15 |     'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray', 'imread', 'imresize',
16 |     'imrescale', 'imflip', 'impad_to_multiple', 'impad', 'imnormalize',
17 |     'imrotate', 'clahe', 'bgr2hsv', 'hsv2bgr'
18 | ]
19 | 


--------------------------------------------------------------------------------
/python/jseg/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | from jseg.utils.registry import build_from_cfg, TRANSFORMS
 4 | 
 5 | 
 6 | @TRANSFORMS.register_module()
 7 | class Compose(object):
 8 |     def __init__(self, transforms):
 9 |         assert isinstance(transforms, collections.abc.Sequence)
10 |         self.transforms = []
11 |         for transform in transforms:
12 |             if isinstance(transform, dict):
13 |                 transform = build_from_cfg(transform, TRANSFORMS)
14 |                 self.transforms.append(transform)
15 |             elif callable(transform):
16 |                 self.transforms.append(transform)
17 |             else:
18 |                 raise TypeError('transform must be callable or a dict')
19 | 
20 |     def __call__(self, data):
21 |         for t in self.transforms:
22 |             data = t(data)
23 |             if data is None:
24 |                 return None
25 |         return data
26 | 
27 |     def __repr__(self):
28 |         format_string = self.__class__.__name__ + '('
29 |         for t in self.transforms:
30 |             format_string += '\n'
31 |             format_string += f'    {t}'
32 |         format_string += '\n)'
33 |         return format_string
34 | 


--------------------------------------------------------------------------------
/python/jseg/datasets/pipelines/formating.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from jseg.utils.registry import TRANSFORMS
 4 | import jittor as jt
 5 | 
 6 | 
 7 | def to_tensor(data):
 8 |     return jt.Var(data)
 9 | 
10 | 
11 | @TRANSFORMS.register_module()
12 | class DefaultFormatBundle(object):
13 |     def __call__(self, results):
14 | 
15 |         if 'img' in results:
16 |             img = results['img']
17 |             if len(img.shape) < 3:
18 |                 img = np.expand_dims(img, -1)
19 |             img = np.ascontiguousarray(img.transpose(2, 0, 1))
20 |             results['img'] = to_tensor(img)
21 |         if 'gt_semantic_seg' in results:
22 |             # convert to long
23 |             results['gt_semantic_seg'] = to_tensor(
24 |                 results['gt_semantic_seg'][None, ...].astype(np.int64))
25 |         return results
26 | 
27 |     def __repr__(self):
28 |         return self.__class__.__name__
29 | 
30 | 
31 | @TRANSFORMS.register_module()
32 | class ImageToTensor(object):
33 |     def __init__(self, keys):
34 |         self.keys = keys
35 | 
36 |     def __call__(self, results):
37 |         for key in self.keys:
38 |             img = results[key]
39 |             if len(img.shape) < 3:
40 |                 img = np.expand_dims(img, -1)
41 |             results[key] = to_tensor(img.transpose(2, 0, 1))
42 |         return results
43 | 
44 |     def __repr__(self):
45 |         return self.__class__.__name__ + f'(keys={self.keys})'
46 | 
47 | 
48 | @TRANSFORMS.register_module()
49 | class Collect(object):
50 |     def __init__(self,
51 |                  keys,
52 |                  meta_keys=('filename', 'ori_filename', 'ori_shape',
53 |                             'img_shape', 'pad_shape', 'scale_factor', 'flip',
54 |                             'flip_direction', 'img_norm_cfg')):
55 |         self.keys = keys
56 |         self.meta_keys = meta_keys
57 | 
58 |     def __call__(self, results):
59 |         data = {}
60 |         img_meta = {}
61 |         for key in self.meta_keys:
62 |             img_meta[key] = results[key]
63 |         data['img_metas'] = img_meta
64 |         for key in self.keys:
65 |             data[key] = results[key]
66 |         return data
67 | 
68 |     def __repr__(self):
69 |         return self.__class__.__name__ + f'(keys={self.keys}, meta_keys={self.meta_keys})'
70 | 


--------------------------------------------------------------------------------
/python/jseg/datasets/pipelines/loading.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import numpy as np
 3 | from jseg.utils.registry import TRANSFORMS
 4 | from .utils import imread
 5 | 
 6 | 
 7 | @TRANSFORMS.register_module()
 8 | class LoadImageFromFile(object):
 9 |     def __init__(self, to_float32=False, color_type='color', backend='cv2'):
10 |         self.to_float32 = to_float32
11 |         self.color_type = color_type
12 |         self.backend = backend
13 | 
14 |     def __call__(self, results):
15 |         if results.get('img_prefix') is not None:
16 |             filename = osp.join(results['img_prefix'],
17 |                                 results['img_info']['filename'])
18 |         else:
19 |             filename = results['img_info']['filename']
20 |         img = imread(filename, flag=self.color_type, backend=self.backend)
21 | 
22 |         if self.to_float32:
23 |             img = img.astype(np.float32)
24 | 
25 |         results['filename'] = filename
26 |         results['ori_filename'] = results['img_info']['filename']
27 |         results['img'] = img
28 |         results['img_shape'] = img.shape
29 |         results['ori_shape'] = img.shape
30 |         # Set initial values for default meta_keys
31 |         results['pad_shape'] = img.shape
32 |         results['scale_factor'] = 1.0
33 |         num_channels = 1 if len(img.shape) < 3 else img.shape[2]
34 |         results['img_norm_cfg'] = dict(mean=np.zeros(num_channels,
35 |                                                      dtype=np.float32),
36 |                                        std=np.ones(num_channels,
37 |                                                    dtype=np.float32),
38 |                                        to_rgb=False)
39 |         return results
40 | 
41 |     def __repr__(self):
42 |         repr_str = self.__class__.__name__
43 |         repr_str += f'(to_float32={self.to_float32},'
44 |         repr_str += f"color_type='{self.color_type}',"
45 |         repr_str += f"backend='{self.backend}')"
46 |         return repr_str
47 | 
48 | 
49 | @TRANSFORMS.register_module()
50 | class LoadAnnotations(object):
51 |     def __init__(self, reduce_zero_label=False, backend='pillow'):
52 |         self.reduce_zero_label = reduce_zero_label
53 |         self.backend = backend
54 | 
55 |     def __call__(self, results):
56 |         if results.get('seg_prefix', None) is not None:
57 |             filename = osp.join(results['seg_prefix'],
58 |                                 results['ann_info']['seg_map'])
59 |         else:
60 |             filename = results['ann_info']['seg_map']
61 |         gt_semantic_seg = imread(
62 |             filename, flag='unchanged',
63 |             backend=self.backend).squeeze().astype(np.uint8)
64 |         # modify if custom classes
65 |         if results.get('label_map', None) is not None:
66 |             gt_semantic_seg_copy = gt_semantic_seg.copy()
67 |             for old_id, new_id in results['label_map'].items():
68 |                 gt_semantic_seg[gt_semantic_seg_copy == old_id] = new_id
69 |         # reduce zero_label
70 |         if self.reduce_zero_label:
71 |             # avoid using underflow conversion
72 |             gt_semantic_seg[gt_semantic_seg == 0] = 255
73 |             gt_semantic_seg = gt_semantic_seg - 1
74 |             gt_semantic_seg[gt_semantic_seg == 254] = 255
75 |         results['gt_semantic_seg'] = gt_semantic_seg
76 |         results['seg_fields'].append('gt_semantic_seg')
77 |         return results
78 | 
79 |     def __repr__(self):
80 |         repr_str = self.__class__.__name__
81 |         repr_str += f'(reduce_zero_label={self.reduce_zero_label},'
82 |         repr_str += f"backend='{self.backend}')"
83 |         return repr_str
84 | 


--------------------------------------------------------------------------------
/python/jseg/datasets/potsdam.py:
--------------------------------------------------------------------------------
 1 | from jseg.utils.registry import DATASETS
 2 | from .custom import CustomDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module()
 6 | class PotsdamDataset(CustomDataset):
 7 |     """ISPRS Potsdam dataset.
 8 |     In segmentation map annotation for Potsdam dataset, 0 is the ignore index.
 9 |     ``reduce_zero_label`` should be set to True. The ``img_suffix`` and
10 |     ``seg_map_suffix`` are both fixed to '.png'.
11 |     """
12 |     CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree',
13 |                'car', 'clutter')
14 | 
15 |     PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0],
16 |                [255, 255, 0], [255, 0, 0]]
17 | 
18 |     def __init__(self, **kwargs):
19 |         super(PotsdamDataset, self).__init__(
20 |             img_suffix='.png',
21 |             seg_map_suffix='.png',
22 |             reduce_zero_label=True,
23 |             **kwargs)
24 | 


--------------------------------------------------------------------------------
/python/jseg/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from jseg.utils.registry import DATASETS
 4 | from .custom import CustomDataset
 5 | 
 6 | 
 7 | @DATASETS.register_module()
 8 | class PascalVOCDataset(CustomDataset):
 9 |     CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
10 |                'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
11 |                'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
12 |                'train', 'tvmonitor')
13 | 
14 |     PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
15 |                [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0],
16 |                [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128],
17 |                [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0],
18 |                [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]
19 | 
20 |     def __init__(self, split, **kwargs):
21 |         super(PascalVOCDataset, self).__init__(img_suffix='.jpg',
22 |                                                seg_map_suffix='.png',
23 |                                                split=split,
24 |                                                **kwargs)
25 |         assert osp.exists(self.img_dir) and self.split is not None
26 | 


--------------------------------------------------------------------------------
/python/jseg/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbones import *
2 | from .decode_heads import *
3 | from .losses import *
4 | from .necks import *
5 | from .segmentors import *
6 | 


--------------------------------------------------------------------------------
/python/jseg/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .mix_transformer import *
 2 | from .resnet import *
 3 | from .mscan import MSCAN
 4 | from .swin import SwinTransformer
 5 | from .resnext import ResNeXt
 6 | from .resnest import ResNeSt
 7 | from .convnext import ConvNeXt
 8 | from .vit import VisionTransformer
 9 | from .beit import BEiT
10 | from .mae import MAE
11 | from .mobilenet_v2 import MobileNetV2
12 | from .clip_encoder_rlb import CLIPVisionTransformerWithRLB
13 | from .clip_text_encoder import CLIPTextEncoder
14 | 


--------------------------------------------------------------------------------
/python/jseg/models/backbones/clip_text_encoder.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from jittor import nn
 3 | from jseg.utils.registry import BACKBONES
 4 | 
 5 | from jseg.ops.cliprc_ops import Transformer, LayerNorm
 6 | 
 7 | 
 8 | @BACKBONES.register_module()
 9 | class CLIPTextEncoder(nn.Module):
10 | 
11 |     def __init__(self,
12 |                  context_length=77,
13 |                  vocab_size=49408,
14 |                  transformer_width=512,
15 |                  transformer_heads=8,
16 |                  transformer_layers=12,
17 |                  embed_dim=1024,
18 |                  pretrained=None,
19 |                  **kwargs):
20 |         super().__init__()
21 | 
22 |         self.pretrained = pretrained
23 | 
24 |         self.context_length = context_length
25 |         self.transformer = Transformer(width=transformer_width,
26 |                                        layers=transformer_layers,
27 |                                        heads=transformer_heads,
28 |                                        attn_mask=self.build_attention_mask())
29 | 
30 |         self.vocab_size = vocab_size
31 |         self.token_embedding = nn.Embedding(vocab_size, transformer_width)
32 |         self.positional_embedding = jt.empty(
33 |             (self.context_length, transformer_width))
34 |         self.ln_final = LayerNorm(transformer_width)
35 |         self.text_projection = jt.empty((transformer_width, embed_dim))
36 | 
37 |     def init_weights(self, pretrained=None):
38 |         pretrained = pretrained or self.pretrained
39 |         if isinstance(pretrained, str):
40 |             checkpoint = jt.load(pretrained)
41 | 
42 |             state_dict = {}
43 | 
44 |             for k in checkpoint.keys():
45 |                 if k.startswith('transformer.'):
46 |                     state_dict[k] = checkpoint[k]
47 | 
48 |                 if k == 'positional_embedding' or k == 'text_projection' or k.startswith(
49 |                         'token_embedding') or k.startswith('ln_final'):
50 |                     if k == 'positional_embedding' and checkpoint[k].size(
51 |                             0) > self.context_length:
52 |                         checkpoint[k] = checkpoint[k][:self.context_length]
53 |                         print('positional_embedding is tuncated from 77 to',
54 |                               self.context_length)
55 |                     state_dict[k] = checkpoint[k]
56 | 
57 |             u, w = self.load_state_dict(state_dict, False)
58 |             print(u, w, 'are misaligned params in text encoder')
59 | 
60 |     def build_attention_mask(self):
61 |         # lazily create causal attention mask, with full attention between the vision tokens
62 |         # pytorch uses additive attention mask; fill with -inf
63 |         mask = jt.empty((self.context_length, self.context_length))
64 |         mask.fill_(float("-inf"))
65 |         mask = jt.triu_(mask, 1)  # zero out the lower diagonal
66 |         return mask
67 | 
68 |     def execute(self, text):
69 |         x = self.token_embedding(text)
70 |         x = x + self.positional_embedding
71 |         x = x.permute(1, 0, 2)
72 |         x = self.transformer(x)
73 |         x = x.permute(1, 0, 2)
74 |         x = self.ln_final(x)
75 |         x = x[jt.arange(x.shape[0]),
76 |               text.argmax(dim=-1)] @ self.text_projection
77 |         return x
78 | 


--------------------------------------------------------------------------------
/python/jseg/models/decode_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .segformer_head import SegFormerHead
 2 | from .fcn_head import FCNHead
 3 | from .psp_head import PSPHead
 4 | from .ham_head import LightHamHead
 5 | from .uper_head import UPerHead
 6 | from .ea_head import EAHead
 7 | from .cc_head import CCHead
 8 | from .da_head import DAHead
 9 | from .aspp_head import ASPPHead
10 | from .sep_aspp_head import DepthwiseSeparableASPPHead
11 | from .point_head import PointHead
12 | from .fpn_head import FPNHead
13 | from .nl_head import NonLocal2d
14 | # from .lraspp_head import LRASPPHead
15 | from .gc_head import GCHead
16 | from .ema_head import EMAHead
17 | from .ann_head import ANNHead
18 | from .cliprc_head import ATMSingleHeadSeg


--------------------------------------------------------------------------------
/python/jseg/models/decode_heads/cascade_decode_head.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | from .decode_head import BaseDecodeHead
 4 | 
 5 | 
 6 | class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta):
 7 |     def __init__(self, *args, **kwargs):
 8 |         super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs)
 9 | 
10 |     @abstractmethod
11 |     def execute(self, inputs, prev_output):
12 |         pass
13 | 
14 |     def execute_train(self, inputs, prev_output, img_metas, gt_semantic_seg,
15 |                       train_cfg):
16 |         seg_logits = self.execute(inputs, prev_output)
17 |         losses = self.losses(seg_logits, gt_semantic_seg)
18 | 
19 |         return losses
20 | 
21 |     def execute_test(self, inputs, prev_output, img_metas, test_cfg):
22 |         return self.execute(inputs, prev_output)
23 | 


--------------------------------------------------------------------------------
/python/jseg/models/decode_heads/cc_head.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from .fcn_head import FCNHead
 3 | from jseg.utils.registry import HEADS
 4 | 
 5 | try:
 6 |     from jseg.ops import CrissCrossAttention
 7 | except ModuleNotFoundError:
 8 |     CrissCrossAttention = None
 9 | 
10 | 
11 | @HEADS.register_module()
12 | class CCHead(FCNHead):
13 |     def __init__(self, recurrence=2, **kwargs):
14 |         if CrissCrossAttention is None:
15 |             raise RuntimeError('Please install mmcv-full for '
16 |                                'CrissCrossAttention ops')
17 |         super(CCHead, self).__init__(num_convs=2, **kwargs)
18 |         self.recurrence = recurrence
19 |         self.cca = CrissCrossAttention(self.channels)
20 | 
21 |     def execute(self, inputs):
22 |         """Forward function."""
23 |         x = self._transform_inputs(inputs)
24 |         output = self.convs[0](x)
25 |         for _ in range(self.recurrence):
26 |             output = self.cca(output)
27 |         output = self.convs[1](output)
28 |         if self.concat_input:
29 |             output = self.conv_cat(jt.concat([x, output], dim=1))
30 |         output = self.cls_seg(output)
31 |         return output
32 | 


--------------------------------------------------------------------------------
/python/jseg/models/decode_heads/ea_head.py:
--------------------------------------------------------------------------------
 1 | from .decode_head import BaseDecodeHead
 2 | from jseg.utils.registry import HEADS
 3 | from jseg.ops import External_attention
 4 | 
 5 | 
 6 | @HEADS.register_module()
 7 | class EAHead(BaseDecodeHead):
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         super(EAHead, self).__init__(**kwargs)
11 |         self.ea = External_attention(self.in_channels, self.channels)
12 | 
13 |     def execute(self, inputs):
14 |         x = self._transform_inputs(inputs)
15 |         x = self.ea(x)
16 |         output = self.cls_seg(x)
17 |         return output
18 | 


--------------------------------------------------------------------------------
/python/jseg/models/decode_heads/fcn_head.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from jittor import nn
 3 | from jseg.bricks import ConvModule
 4 | 
 5 | from jseg.utils.registry import HEADS
 6 | from .decode_head import BaseDecodeHead
 7 | 
 8 | 
 9 | @HEADS.register_module()
10 | class FCNHead(BaseDecodeHead):
11 |     def __init__(self,
12 |                  num_convs=2,
13 |                  kernel_size=3,
14 |                  concat_input=True,
15 |                  dilation=1,
16 |                  **kwargs):
17 |         assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int)
18 |         self.num_convs = num_convs
19 |         self.concat_input = concat_input
20 |         self.kernel_size = kernel_size
21 |         super(FCNHead, self).__init__(**kwargs)
22 |         if num_convs == 0:
23 |             assert self.in_channels == self.channels
24 | 
25 |         conv_padding = (kernel_size // 2) * dilation
26 |         convs = []
27 |         convs.append(
28 |             ConvModule(
29 |                 self.in_channels,
30 |                 self.channels,
31 |                 kernel_size=kernel_size,
32 |                 padding=conv_padding,
33 |                 dilation=dilation,
34 |                 conv_cfg=self.conv_cfg,
35 |                 norm_cfg=self.norm_cfg,
36 |                 act_cfg=self.act_cfg))
37 |         for i in range(num_convs - 1):
38 |             convs.append(
39 |                 ConvModule(
40 |                     self.channels,
41 |                     self.channels,
42 |                     kernel_size=kernel_size,
43 |                     padding=conv_padding,
44 |                     dilation=dilation,
45 |                     conv_cfg=self.conv_cfg,
46 |                     norm_cfg=self.norm_cfg,
47 |                     act_cfg=self.act_cfg))
48 |         if num_convs == 0:
49 |             self.convs = nn.Identity()
50 |         else:
51 |             self.convs = nn.Sequential(*convs)
52 |         if self.concat_input:
53 |             self.conv_cat = ConvModule(
54 |                 self.in_channels + self.channels,
55 |                 self.channels,
56 |                 kernel_size=kernel_size,
57 |                 padding=kernel_size // 2,
58 |                 conv_cfg=self.conv_cfg,
59 |                 norm_cfg=self.norm_cfg,
60 |                 act_cfg=self.act_cfg)
61 | 
62 |     def _execute_feature(self, inputs):
63 |         x = self._transform_inputs(inputs)
64 |         feats = self.convs(x)
65 |         if self.concat_input:
66 |             feats = self.conv_cat(jt.concat([x, feats], dim=1))
67 |         return feats
68 | 
69 |     def execute(self, inputs):
70 |         output = self._execute_feature(inputs)
71 |         output = self.cls_seg(output)
72 |         return output
73 | 


--------------------------------------------------------------------------------
/python/jseg/models/decode_heads/fpn_head.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from jittor import nn
 3 | from jseg.bricks import ConvModule
 4 | 
 5 | from jseg.ops import Upsample, resize
 6 | from jseg.utils.registry import HEADS
 7 | from .decode_head import BaseDecodeHead
 8 | 
 9 | 
10 | @HEADS.register_module()
11 | class FPNHead(BaseDecodeHead):
12 | 
13 |     def __init__(self, feature_strides, **kwargs):
14 |         super(FPNHead, self).__init__(input_transform='multiple_select',
15 |                                       **kwargs)
16 |         assert len(feature_strides) == len(self.in_channels)
17 |         assert min(feature_strides) == feature_strides[0]
18 |         self.feature_strides = feature_strides
19 | 
20 |         self.scale_heads = nn.ModuleList()
21 |         for i in range(len(feature_strides)):
22 |             head_length = max(
23 |                 1,
24 |                 int(np.log2(feature_strides[i]) - np.log2(feature_strides[0])))
25 |             scale_head = []
26 |             for k in range(head_length):
27 |                 scale_head.append(
28 |                     ConvModule(
29 |                         self.in_channels[i] if k == 0 else self.channels,
30 |                         self.channels,
31 |                         3,
32 |                         padding=1,
33 |                         conv_cfg=self.conv_cfg,
34 |                         norm_cfg=self.norm_cfg,
35 |                         act_cfg=self.act_cfg))
36 |                 if feature_strides[i] != feature_strides[0]:
37 |                     scale_head.append(
38 |                         Upsample(scale_factor=2,
39 |                                  mode='bilinear',
40 |                                  align_corners=self.align_corners))
41 |             self.scale_heads.append(nn.Sequential(*scale_head))
42 | 
43 |     def execute(self, inputs):
44 | 
45 |         x = self._transform_inputs(inputs)
46 | 
47 |         output = self.scale_heads[0](x[0])
48 |         for i in range(1, len(self.feature_strides)):
49 |             # non inplace
50 |             output = output + resize(self.scale_heads[i](x[i]),
51 |                                      size=output.shape[2:],
52 |                                      mode='bilinear',
53 |                                      align_corners=self.align_corners)
54 | 
55 |         output = self.cls_seg(output)
56 |         return output
57 | 


--------------------------------------------------------------------------------
/python/jseg/models/decode_heads/gc_head.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from jseg.ops import ContextBlock
 3 | 
 4 | from jseg.utils.registry import HEADS
 5 | from .fcn_head import FCNHead
 6 | 
 7 | 
 8 | @HEADS.register_module()
 9 | class GCHead(FCNHead):
10 | 
11 |     def __init__(self,
12 |                  ratio=1 / 4.,
13 |                  pooling_type='att',
14 |                  fusion_types=('channel_add', ),
15 |                  **kwargs):
16 |         super(GCHead, self).__init__(num_convs=2, **kwargs)
17 |         self.ratio = ratio
18 |         self.pooling_type = pooling_type
19 |         self.fusion_types = fusion_types
20 |         self.gc_block = ContextBlock(in_channels=self.channels,
21 |                                      ratio=self.ratio,
22 |                                      pooling_type=self.pooling_type,
23 |                                      fusion_types=self.fusion_types)
24 | 
25 |     def execute(self, inputs):
26 |         x = self._transform_inputs(inputs)
27 |         output = self.convs[0](x)
28 |         output = self.gc_block(output)
29 |         output = self.convs[1](output)
30 |         if self.concat_input:
31 |             output = self.conv_cat(jt.concat([x, output], dim=1))
32 |         output = self.cls_seg(output)
33 |         return output
34 | 


--------------------------------------------------------------------------------
/python/jseg/models/decode_heads/nl_head.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from jseg.ops.non_local import NonLocal2d
 3 | 
 4 | from jseg.utils.registry import HEADS
 5 | from .fcn_head import FCNHead
 6 | 
 7 | 
 8 | @HEADS.register_module()
 9 | class NLHead(FCNHead):
10 |     def __init__(self,
11 |                  reduction=2,
12 |                  use_scale=True,
13 |                  mode='embedded_gaussian',
14 |                  **kwargs):
15 |         super(NLHead, self).__init__(num_convs=2, **kwargs)
16 |         self.reduction = reduction
17 |         self.use_scale = use_scale
18 |         self.mode = mode
19 |         self.nl_block = NonLocal2d(in_channels=self.channels,
20 |                                    reduction=self.reduction,
21 |                                    use_scale=self.use_scale,
22 |                                    conv_cfg=self.conv_cfg,
23 |                                    norm_cfg=self.norm_cfg,
24 |                                    mode=self.mode)
25 | 
26 |     def execute(self, inputs):
27 |         """execute function."""
28 |         x = self._transform_inputs(inputs)
29 |         output = self.convs[0](x)
30 |         output = self.nl_block(output)
31 |         output = self.convs[1](output)
32 |         if self.concat_input:
33 |             output = self.conv_cat(jt.concat([x, output], dim=1))
34 |         output = self.cls_seg(output)
35 |         return output
36 | 


--------------------------------------------------------------------------------
/python/jseg/models/decode_heads/psp_head.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from jittor import nn
 3 | from jseg.bricks import ConvModule
 4 | 
 5 | from jseg.ops import resize
 6 | from jseg.utils.registry import HEADS
 7 | from .decode_head import BaseDecodeHead
 8 | 
 9 | 
10 | class PPM(nn.ModuleList):
11 | 
12 |     def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg,
13 |                  act_cfg, align_corners, **kwargs):
14 |         super(PPM, self).__init__()
15 |         self.pool_scales = pool_scales
16 |         self.align_corners = align_corners
17 |         self.in_channels = in_channels
18 |         self.channels = channels
19 |         self.conv_cfg = conv_cfg
20 |         self.norm_cfg = norm_cfg
21 |         self.act_cfg = act_cfg
22 |         for pool_scale in pool_scales:
23 |             self.append(
24 |                 nn.Sequential(
25 |                     nn.AdaptiveAvgPool2d(pool_scale),
26 |                     ConvModule(self.in_channels,
27 |                                self.channels,
28 |                                1,
29 |                                conv_cfg=self.conv_cfg,
30 |                                norm_cfg=self.norm_cfg,
31 |                                act_cfg=self.act_cfg,
32 |                                **kwargs)))
33 | 
34 |     def execute(self, x):
35 |         ppm_outs = []
36 |         for ppm in self:
37 |             ppm_out = ppm(x)
38 |             upsampled_ppm_out = resize(ppm_out,
39 |                                        size=x.size()[2:],
40 |                                        mode='bilinear',
41 |                                        align_corners=self.align_corners)
42 |             ppm_outs.append(upsampled_ppm_out)
43 |         return ppm_outs
44 | 
45 | 
46 | @HEADS.register_module()
47 | class PSPHead(BaseDecodeHead):
48 | 
49 |     def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs):
50 |         super(PSPHead, self).__init__(**kwargs)
51 |         assert isinstance(pool_scales, (list, tuple))
52 |         self.pool_scales = pool_scales
53 |         self.psp_modules = PPM(self.pool_scales,
54 |                                self.in_channels,
55 |                                self.channels,
56 |                                conv_cfg=self.conv_cfg,
57 |                                norm_cfg=self.norm_cfg,
58 |                                act_cfg=self.act_cfg,
59 |                                align_corners=self.align_corners)
60 |         self.bottleneck = ConvModule(self.in_channels +
61 |                                      len(pool_scales) * self.channels,
62 |                                      self.channels,
63 |                                      3,
64 |                                      padding=1,
65 |                                      conv_cfg=self.conv_cfg,
66 |                                      norm_cfg=self.norm_cfg,
67 |                                      act_cfg=self.act_cfg)
68 | 
69 |     def execute(self, inputs):
70 |         x = self._transform_inputs(inputs)
71 |         psp_outs = [x]
72 |         psp_outs.extend(self.psp_modules(x))
73 |         psp_outs = jt.concat(psp_outs, dim=1)
74 |         output = self.bottleneck(psp_outs)
75 |         output = self.cls_seg(output)
76 |         return output
77 | 


--------------------------------------------------------------------------------
/python/jseg/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .accuracy import Accuracy, accuracy
2 | from .cross_entropy_loss import (CrossEntropyLoss, cross_entropy)
3 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
4 | 
5 | __all__ = [
6 |     'accuracy', 'Accuracy', 'cross_entropy', 'CrossEntropyLoss', 'reduce_loss',
7 |     'weight_reduce_loss', 'weighted_loss'
8 | ]
9 | 


--------------------------------------------------------------------------------
/python/jseg/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | from jittor import nn
 2 | import jittor as jt
 3 | 
 4 | eps = jt.Var(1.1920928955078125e-07)
 5 | 
 6 | 
 7 | def accuracy(pred, target, topk=1, thresh=None, ignore_index=None):
 8 |     assert isinstance(topk, (int, tuple))
 9 |     if isinstance(topk, int):
10 |         topk = (topk, )
11 |         return_single = True
12 |     else:
13 |         return_single = False
14 | 
15 |     maxk = max(topk)
16 |     if pred.size(0) == 0:
17 |         accu = [jt.Var(0.) for i in range(len(topk))]
18 |         return accu[0] if return_single else accu
19 |     assert pred.ndim == target.ndim + 1
20 |     assert pred.size(0) == target.size(0)
21 |     assert maxk <= pred.size(1), \
22 |         f'maxk {maxk} exceeds pred dimension {pred.size(1)}'
23 |     pred_value, pred_label = pred.topk(maxk, dim=1)
24 |     # transpose to shape (maxk, N, ...)
25 |     pred_label = pred_label.transpose(0, 1)
26 |     correct = pred_label == (target.unsqueeze(0).expand_as(pred_label))
27 |     if thresh is not None:
28 |         # Only prediction values larger than thresh are counted as correct
29 |         correct = correct & (pred_value > thresh).t()
30 |     if ignore_index is not None:
31 |         correct = correct[:, target != ignore_index]
32 |     res = []
33 |     for k in topk:
34 |         # Avoid causing ZeroDivisionError when all pixels
35 |         # of an image are ignored
36 |         correct_k = correct[:k].reshape(-1).float().sum(0, keepdims=True) + eps
37 |         if ignore_index is not None:
38 |             total_num = target[target != ignore_index].numel() + eps
39 |         else:
40 |             total_num = target.numel() + eps
41 |         res.append(correct_k.multiply(100.0 / total_num))
42 |     return res[0] if return_single else res
43 | 
44 | 
45 | class Accuracy(nn.Module):
46 | 
47 |     def __init__(self, topk=(1, ), thresh=None, ignore_index=None):
48 |         super().__init__()
49 |         self.topk = topk
50 |         self.thresh = thresh
51 |         self.ignore_index = ignore_index
52 | 
53 |     def execute(self, pred, target):
54 |         return accuracy(pred, target, self.topk, self.thresh,
55 |                         self.ignore_index)
56 | 


--------------------------------------------------------------------------------
/python/jseg/models/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import warnings
 3 | 
 4 | 
 5 | def get_enum(reduction: str) -> int:
 6 |     if reduction == 'none':
 7 |         ret = 0
 8 |     elif reduction == 'mean':
 9 |         ret = 1
10 |     elif reduction == 'elementwise_mean':
11 |         warnings.warn(
12 |             "reduction='elementwise_mean' is deprecated, please use reduction='mean' instead."
13 |         )
14 |         ret = 1
15 |     elif reduction == 'sum':
16 |         ret = 2
17 |     else:
18 |         ret = -1  # TODO: remove once JIT exceptions support control flow
19 |         raise ValueError(
20 |             "{} is not a valid value for reduction".format(reduction))
21 |     return ret
22 | 
23 | 
24 | def reduce_loss(loss, reduction):
25 |     reduction_enum = get_enum(reduction)
26 |     # none: 0, elementwise_mean:1, sum: 2
27 |     if reduction_enum == 0:
28 |         return loss
29 |     elif reduction_enum == 1:
30 |         return loss.mean()
31 |     elif reduction_enum == 2:
32 |         return loss.sum()
33 | 
34 | 
35 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
36 |     # if weight is specified, apply element-wise weight
37 |     if weight is not None:
38 |         assert weight.ndim == loss.ndim
39 |         if weight.ndim > 1:
40 |             assert weight.size(1) == 1 or weight.size(1) == loss.size(1)
41 |         loss = loss * weight
42 | 
43 |     # if avg_factor is not specified, just reduce the loss
44 |     if avg_factor is None:
45 |         loss = reduce_loss(loss, reduction)
46 |     else:
47 |         # if reduction is mean, then average the loss by avg_factor
48 |         if reduction == 'mean':
49 |             loss = loss.sum() / avg_factor
50 |         # if reduction is 'none', then do nothing, otherwise raise an error
51 |         elif reduction != 'none':
52 |             raise ValueError('avg_factor can not be used with reduction="sum"')
53 |     return loss
54 | 
55 | 
56 | def weighted_loss(loss_func):
57 |     @functools.wraps(loss_func)
58 |     def wrapper(pred,
59 |                 target,
60 |                 weight=None,
61 |                 reduction='mean',
62 |                 avg_factor=None,
63 |                 **kwargs):
64 |         # get element-wise loss
65 |         loss = loss_func(pred, target, **kwargs)
66 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
67 |         return loss
68 | 
69 |     return wrapper
70 | 


--------------------------------------------------------------------------------
/python/jseg/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .multilevel_neck import MultiLevelNeck
2 | from .featurepyramid import Feature2Pyramid
3 | from .fpn import FPN


--------------------------------------------------------------------------------
/python/jseg/models/necks/featurepyramid.py:
--------------------------------------------------------------------------------
 1 | from jittor import nn
 2 | from jseg.bricks import build_norm_layer
 3 | 
 4 | from jseg.utils.registry import NECKS
 5 | 
 6 | 
 7 | @NECKS.register_module()
 8 | class Feature2Pyramid(nn.Module):
 9 |     def __init__(self,
10 |                  embed_dim,
11 |                  rescales=[4, 2, 1, 0.5],
12 |                  norm_cfg=dict(type='BN')):
13 |         super(Feature2Pyramid, self).__init__()
14 |         self.rescales = rescales
15 |         self.upsample_4x = None
16 |         for k in self.rescales:
17 |             if k == 4:
18 |                 self.upsample_4x = nn.Sequential(
19 |                     nn.ConvTranspose2d(
20 |                         embed_dim, embed_dim, kernel_size=2, stride=2),
21 |                     build_norm_layer(norm_cfg, embed_dim)[1],
22 |                     nn.GELU(),
23 |                     nn.ConvTranspose2d(
24 |                         embed_dim, embed_dim, kernel_size=2, stride=2),
25 |                 )
26 |             elif k == 2:
27 |                 self.upsample_2x = nn.Sequential(
28 |                     nn.ConvTranspose2d(
29 |                         embed_dim, embed_dim, kernel_size=2, stride=2))
30 |             elif k == 1:
31 |                 self.identity = nn.Identity()
32 |             elif k == 0.5:
33 |                 self.downsample_2x = nn.MaxPool2d(kernel_size=2, stride=2)
34 |             elif k == 0.25:
35 |                 self.downsample_4x = nn.MaxPool2d(kernel_size=4, stride=4)
36 |             else:
37 |                 raise KeyError(f'invalid {k} for feature2pyramid')
38 | 
39 |     def execute(self, inputs):
40 |         assert len(inputs) == len(self.rescales)
41 |         outputs = []
42 |         if self.upsample_4x is not None:
43 |             ops = [
44 |                 self.upsample_4x, self.upsample_2x, self.identity,
45 |                 self.downsample_2x
46 |             ]
47 |         else:
48 |             ops = [
49 |                 self.upsample_2x, self.identity, self.downsample_2x,
50 |                 self.downsample_4x
51 |             ]
52 |         for i in range(len(inputs)):
53 |             outputs.append(ops[i](inputs[i]))
54 |         return tuple(outputs)
55 | 


--------------------------------------------------------------------------------
/python/jseg/models/necks/multilevel_neck.py:
--------------------------------------------------------------------------------
 1 | from jittor import nn
 2 | from jseg.utils.weight_init import xavier_init
 3 | from jseg.ops import resize
 4 | from jseg.utils.registry import NECKS
 5 | import collections
 6 | 
 7 | 
 8 | @NECKS.register_module()
 9 | class MultiLevelNeck(nn.Module):
10 |     """MultiLevelNeck.
11 | 
12 |     A neck structure connect vit backbone and decoder_heads.
13 | 
14 |     Args:
15 |         in_channels (List[int]): Number of input channels per scale.
16 |         out_channels (int): Number of output channels (used at each scale).
17 |         scales (List[float]): Scale factors for each input feature map.
18 |             Default: [0.5, 1, 2, 4]
19 |         norm_cfg (dict): Config dict for normalization layer. Default: None.
20 |         act_cfg (dict): Config dict for activation layer in ConvModule.
21 |             Default: None.
22 |     """
23 | 
24 |     def __init__(self, in_channels, out_channels, scales=[0.5, 1, 2, 4]):
25 |         super(MultiLevelNeck, self).__init__()
26 |         assert isinstance(in_channels, list)
27 |         self.in_channels = in_channels
28 |         self.out_channels = out_channels
29 |         self.scales = scales
30 |         self.num_outs = len(scales)
31 |         self.lateral_convs = nn.ModuleList()
32 |         self.convs = nn.ModuleList()
33 |         for in_channel in in_channels:
34 | 
35 |             self.lateral_convs.append(
36 |                 nn.Sequential(
37 |                     collections.OrderedDict([
38 |                         ('conv', nn.Conv(in_channel, out_channels, 1)),
39 |                     ])))
40 | 
41 |         for _ in range(self.num_outs):
42 |             self.convs.append(
43 |                 nn.Sequential(
44 |                     collections.OrderedDict([('conv',
45 |                                               nn.Conv(out_channels,
46 |                                                       out_channels,
47 |                                                       3,
48 |                                                       padding=1))])))
49 | 
50 |     # default init_weights for conv(msra) and norm in ConvModule
51 |     def init_weights(self):
52 |         for m in self.modules():
53 |             if isinstance(m, nn.Conv2d):
54 |                 xavier_init(m, distribution='uniform')
55 | 
56 |     def execute(self, inputs):
57 |         assert len(inputs) == len(self.in_channels)
58 |         inputs = [
59 |             lateral_conv(inputs[i])
60 |             for i, lateral_conv in enumerate(self.lateral_convs)
61 |         ]
62 |         # for len(inputs) not equal to self.num_outs
63 |         if len(inputs) == 1:
64 |             inputs = [inputs[0] for _ in range(self.num_outs)]
65 |         outs = []
66 |         for i in range(self.num_outs):
67 |             x_resize = resize(inputs[i],
68 |                               scale_factor=self.scales[i],
69 |                               mode='bilinear')
70 |             outs.append(self.convs[i](x_resize))
71 |         return tuple(outs)
72 | 


--------------------------------------------------------------------------------
/python/jseg/models/segmentors/__init__.py:
--------------------------------------------------------------------------------
1 | from .encoder_decoder import EncoderDecoder
2 | from .cascade_encoder_decoder import CascadeEncoderDecoder
3 | from .clip_rc import CLIPRC
4 | 
5 | __all__ = ['EncoderDecoder', 'CascadeEncoderDecoder', 'CLIPRC']
6 | 


--------------------------------------------------------------------------------
/python/jseg/models/segmentors/cascade_encoder_decoder.py:
--------------------------------------------------------------------------------
 1 | from jittor import nn
 2 | 
 3 | from jseg.utils.general import add_prefix
 4 | from jseg.ops import resize
 5 | from jseg.utils.registry import MODELS, build_from_cfg, HEADS
 6 | from .encoder_decoder import EncoderDecoder
 7 | 
 8 | 
 9 | @MODELS.register_module()
10 | class CascadeEncoderDecoder(EncoderDecoder):
11 | 
12 |     def __init__(self,
13 |                  num_stages,
14 |                  backbone,
15 |                  decode_head,
16 |                  neck=None,
17 |                  auxiliary_head=None,
18 |                  train_cfg=None,
19 |                  test_cfg=None,
20 |                  pretrained=None):
21 |         self.num_stages = num_stages
22 |         super(CascadeEncoderDecoder,
23 |               self).__init__(backbone=backbone,
24 |                              decode_head=decode_head,
25 |                              neck=neck,
26 |                              auxiliary_head=auxiliary_head,
27 |                              train_cfg=train_cfg,
28 |                              test_cfg=test_cfg,
29 |                              pretrained=pretrained)
30 | 
31 |     def _init_decode_head(self, decode_head):
32 |         """Initialize ``decode_head``"""
33 |         assert isinstance(decode_head, list)
34 |         assert len(decode_head) == self.num_stages
35 |         self.decode_head = nn.ModuleList()
36 |         for i in range(self.num_stages):
37 |             self.decode_head.append(build_from_cfg(decode_head[i], HEADS))
38 |         self.align_corners = self.decode_head[-1].align_corners
39 |         self.num_classes = self.decode_head[-1].num_classes
40 |         self.out_channels = self.decode_head[-1].out_channels
41 | 
42 |     def encode_decode(self, img, img_metas):
43 |         """Encode images with backbone and decode into a semantic segmentation
44 |         map of the same size as input."""
45 |         x = self.extract_feat(img)
46 |         out = self.decode_head[0].execute_test(x, img_metas, self.test_cfg)
47 |         for i in range(1, self.num_stages):
48 |             out = self.decode_head[i].execute_test(x, out, img_metas,
49 |                                                    self.test_cfg)
50 |         out = resize(input=out,
51 |                      size=img.shape[2:],
52 |                      mode='bilinear',
53 |                      align_corners=self.align_corners)
54 |         return out
55 | 
56 |     def _decode_head_execute_train(self, x, img_metas, gt_semantic_seg):
57 |         """Run execute function and calculate loss for decode head in
58 |         training."""
59 |         losses = dict()
60 | 
61 |         loss_decode = self.decode_head[0].execute_train(
62 |             x, img_metas, gt_semantic_seg, self.train_cfg)
63 | 
64 |         losses.update(add_prefix(loss_decode, 'decode_0'))
65 | 
66 |         for i in range(1, self.num_stages):
67 |             # execute test again, maybe unnecessary for most methods.
68 |             if i == 1:
69 |                 prev_outputs = self.decode_head[0].execute_test(
70 |                     x, img_metas, self.test_cfg)
71 |             else:
72 |                 prev_outputs = self.decode_head[i - 1].execute_test(
73 |                     x, prev_outputs, img_metas, self.test_cfg)
74 |             loss_decode = self.decode_head[i].execute_train(
75 |                 x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg)
76 |             losses.update(add_prefix(loss_decode, f'decode_{i}'))
77 | 
78 |         return losses
79 | 


--------------------------------------------------------------------------------
/python/jseg/models/utils/se_layer.py:
--------------------------------------------------------------------------------
 1 | from jittor import nn
 2 | from jseg.bricks import ConvModule
 3 | 
 4 | from jseg.utils.helpers import make_divisible
 5 | from jseg.utils.general import is_tuple_of
 6 | 
 7 | 
 8 | class SELayer(nn.Module):
 9 | 
10 |     def __init__(self,
11 |                  channels,
12 |                  ratio=16,
13 |                  conv_cfg=None,
14 |                  act_cfg=(dict(type='ReLU'),
15 |                           dict(type='HSigmoid', bias=3.0, divisor=6.0))):
16 |         super(SELayer, self).__init__()
17 |         if isinstance(act_cfg, dict):
18 |             act_cfg = (act_cfg, act_cfg)
19 |         assert len(act_cfg) == 2
20 |         assert is_tuple_of(act_cfg, dict)
21 |         self.global_avgpool = nn.AdaptiveAvgPool2d(1)
22 |         self.conv1 = ConvModule(in_channels=channels,
23 |                                 out_channels=make_divisible(
24 |                                     channels // ratio, 8),
25 |                                 kernel_size=1,
26 |                                 stride=1,
27 |                                 conv_cfg=conv_cfg,
28 |                                 act_cfg=act_cfg[0])
29 |         self.conv2 = ConvModule(in_channels=make_divisible(
30 |             channels // ratio, 8),
31 |                                 out_channels=channels,
32 |                                 kernel_size=1,
33 |                                 stride=1,
34 |                                 conv_cfg=conv_cfg,
35 |                                 act_cfg=act_cfg[1])
36 | 
37 |     def execute(self, x):
38 |         out = self.global_avgpool(x)
39 |         out = self.conv1(out)
40 |         out = self.conv2(out)
41 |         return x * out
42 | 


--------------------------------------------------------------------------------
/python/jseg/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .wrappers import Upsample, resize
 2 | from .external_attention import External_attention
 3 | from .cc_attention import CrissCrossAttention
 4 | from .scale import Scale
 5 | from .self_attention_block import SelfAttentionBlock
 6 | from .multi_head_attention import MultiheadAttention
 7 | from .context_block import ContextBlock
 8 | 
 9 | __all__ = [
10 |     'Upsample', 'resize', 'External_attention', 'CrissCrossAttention', 'Scale',
11 |     'SelfAttentionBlock', 'MultiHeadAttention', 'ContextBlock'
12 | ]
13 | 


--------------------------------------------------------------------------------
/python/jseg/ops/cc_attention.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from jittor import nn
 3 | 
 4 | from .scale import Scale
 5 | 
 6 | 
 7 | def NEG_INF_DIAG(n):
 8 |     return jt.diag(jt.Var(float('-inf')).repeat(n), 0)
 9 | 
10 | 
11 | class CrissCrossAttention(nn.Module):
12 |     def __init__(self, in_channels):
13 |         super().__init__()
14 |         self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
15 |         self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1)
16 |         self.value_conv = nn.Conv2d(in_channels, in_channels, 1)
17 |         self.gamma = Scale(0.)
18 |         self.in_channels = in_channels
19 | 
20 |     def execute(self, x):
21 |         B, C, H, W = x.size()
22 |         query = self.query_conv(x)
23 |         key = self.key_conv(x)
24 |         value = self.value_conv(x)
25 |         energy_H = jt.linalg.einsum('bchw,bciw->bwhi', query,
26 |                                     key) + NEG_INF_DIAG(H)
27 |         energy_H = energy_H.transpose(1, 2)
28 |         energy_W = jt.linalg.einsum('bchw,bchj->bhwj', query, key)
29 |         attn = nn.softmax(jt.concat([energy_H, energy_W], dim=-1),
30 |                           dim=-1)  # [B,H,W,(H+W)]
31 |         out = jt.linalg.einsum('bciw,bhwi->bchw', value, attn[..., :H])
32 |         out += jt.linalg.einsum('bchj,bhwj->bchw', value, attn[..., H:])
33 | 
34 |         out = self.gamma(out) + x
35 | 
36 |         return out
37 | 
38 |     def __repr__(self):
39 |         s = self.__class__.__name__
40 |         s += f'(in_channels={self.in_channels})'
41 |         return s
42 | 


--------------------------------------------------------------------------------
/python/jseg/ops/external_attention.py:
--------------------------------------------------------------------------------
 1 | from jittor import Module, nn
 2 | from jseg.bricks import ConvModule
 3 | 
 4 | 
 5 | class External_attention(Module):
 6 |     '''
 7 |     Arguments:
 8 |         c (int): The input and output channel number.
 9 |     '''
10 |     def __init__(self, in_channels, channels, k=256):
11 |         super(External_attention, self).__init__()
12 | 
13 |         self.in_channels = in_channels
14 |         self.channels = channels
15 |         self.k = k
16 | 
17 |         self.conv1 = ConvModule(self.in_channels, self.channels, 1)
18 | 
19 |         self.linear_0 = ConvModule(self.channels, self.k, 1)
20 | 
21 |         self.linear_1 = ConvModule(self.k, self.channels, 1)
22 | 
23 |         self.conv2 = ConvModule(self.channels, self.channels, 1)
24 | 
25 |     def execute(self, x):
26 |         x = self.conv1(x)
27 |         idn = x
28 |         b, c, h, w = x.size()
29 |         x = self.linear_0(x)  # b, k, h, w
30 |         x = x.view(b, self.k, h * w)  # b * k * n
31 | 
32 |         x = nn.softmax(x, dim=-1)  # b, k, n
33 |         x = x / (1e-9 + x.sum(dim=1, keepdims=True))  # b, k, n
34 | 
35 |         x = x.view(b, self.k, h, w)
36 |         x = self.linear_1(x)  # b, c, h, w
37 | 
38 |         x = x + idn
39 |         x = self.conv2(x)
40 |         return x
41 | 


--------------------------------------------------------------------------------
/python/jseg/ops/scale.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from jittor import nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 |     """A learnable scale parameter.
 7 | 
 8 |     This layer scales the input by a learnable factor. It multiplies a
 9 |     learnable scale parameter of shape (1,) with input of any shape.
10 | 
11 |     Args:
12 |         scale (float): Initial value of scale factor. Default: 1.0
13 |     """
14 | 
15 |     def __init__(self, scale=1.0):
16 |         super().__init__()
17 |         self.scale = jt.Var(scale)
18 | 
19 |     def execute(self, x):
20 |         return x * self.scale
21 | 


--------------------------------------------------------------------------------
/python/jseg/ops/wrappers.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from jittor import nn
 3 | from jittor import Function
 4 | 
 5 | 
 6 | # TODO Save memory
 7 | class Resize(Function):
 8 | 
 9 |     def execute(self, input, size, scale_factor, mode, align_corners):
10 |         self.input_size = input.shape[2:]
11 |         self.scale_factor = scale_factor
12 |         self.mode = mode
13 |         self.align_corners = align_corners
14 |         return nn.interpolate(input, size, scale_factor, mode, align_corners)
15 | 
16 |     def grad(self, grad_output):
17 |         return nn.interpolate(grad_output, self.input_size, self.scale_factor,
18 |                               self.mode, self.align_corners)
19 | 
20 | 
21 | interpolate = Resize.apply
22 | 
23 | 
24 | def resize(input,
25 |            size=None,
26 |            scale_factor=None,
27 |            mode='nearest',
28 |            align_corners=None,
29 |            warning=True):
30 |     if warning:
31 |         if size is not None and align_corners:
32 |             input_h, input_w = tuple(int(x) for x in input.shape[2:])
33 |             output_h, output_w = tuple(int(x) for x in size)
34 |             if output_h > input_h or output_w > output_h:
35 |                 if ((output_h > 1 and output_w > 1 and input_h > 1
36 |                      and input_w > 1) and (output_h - 1) % (input_h - 1)
37 |                         and (output_w - 1) % (input_w - 1)):
38 |                     warnings.warn(
39 |                         f'When align_corners={align_corners}, '
40 |                         'the output would more aligned if '
41 |                         f'input size {(input_h, input_w)} is `x+1` and '
42 |                         f'out size {(output_h, output_w)} is `nx+1`')
43 |     if size is not None:
44 |         size = tuple(int(x) for x in size)
45 |     return interpolate(input, size, scale_factor, mode, align_corners)
46 | 
47 | 
48 | class Upsample(nn.Module):
49 | 
50 |     def __init__(self,
51 |                  size=None,
52 |                  scale_factor=None,
53 |                  mode='nearest',
54 |                  align_corners=None):
55 |         super(Upsample, self).__init__()
56 |         self.size = size
57 |         if isinstance(scale_factor, tuple):
58 |             self.scale_factor = tuple(float(factor) for factor in scale_factor)
59 |         else:
60 |             self.scale_factor = float(scale_factor) if scale_factor else None
61 |         self.mode = mode
62 |         self.align_corners = align_corners
63 | 
64 |     def execute(self, x):
65 |         if not self.size:
66 |             size = [int(t * self.scale_factor) for t in x.shape[-2:]]
67 |         else:
68 |             size = self.size
69 |         return resize(x, size, None, self.mode, self.align_corners)
70 | 


--------------------------------------------------------------------------------
/python/jseg/optims/__init__.py:
--------------------------------------------------------------------------------
1 | from .lr_scheduler import *
2 | from .optimizer import *
3 | from .prameter_groups_generator import *
4 | from .lr_decay_parameter_groups_generator import *


--------------------------------------------------------------------------------
/python/jseg/optims/prameter_groups_generator.py:
--------------------------------------------------------------------------------
 1 | from jseg.utils.registry import MODELS
 2 | 
 3 | 
 4 | @MODELS.register_module()
 5 | def CustomPrameterGroupsGenerator(named_params, model, custom_keys={}, logger=None):
 6 |     def get_custom_parameter_groups(name):
 7 |         for ck in custom_keys.keys():
 8 |             if ck in name:
 9 |                 return custom_keys[ck]
10 |         return None
11 | 
12 |     normal_group_list = []
13 |     custom_group_list = []
14 | 
15 |     for p in named_params:
16 |         name, param = p
17 |         custom_group = get_custom_parameter_groups(name)
18 |         if custom_group is not None:
19 |             tmp = {}
20 |             tmp['params'] = [param]
21 |             for i in custom_group.keys():
22 |                 tmp[i] = custom_group.get(i)
23 |             custom_group_list.append(tmp)
24 |             continue
25 |         normal_group_list.append({'params': [param]})
26 |     return normal_group_list + custom_group_list
27 | 


--------------------------------------------------------------------------------
/python/jseg/runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .runner import Runner
2 | 


--------------------------------------------------------------------------------
/python/jseg/sampler/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_pixel_sampler import BasePixelSampler
2 | from .ohem_pixel_sampler import OHEMPixelSampler
3 | 
4 | __all__ = ['BasePixelSampler', 'OHEMPixelSampler']
5 | 


--------------------------------------------------------------------------------
/python/jseg/sampler/base_pixel_sampler.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | 
 4 | class BasePixelSampler(metaclass=ABCMeta):
 5 |     """Base class of pixel sampler."""
 6 | 
 7 |     def __init__(self, **kwargs):
 8 |         pass
 9 | 
10 |     @abstractmethod
11 |     def sample(self, seg_logit, seg_label):
12 |         """Placeholder for sample function."""
13 | 


--------------------------------------------------------------------------------
/python/jseg/sampler/ohem_pixel_sampler.py:
--------------------------------------------------------------------------------
 1 | import jittor as jt
 2 | from jittor import nn
 3 | 
 4 | from jseg.utils.registry import PIXEL_SAMPLERS
 5 | from .base_pixel_sampler import BasePixelSampler
 6 | 
 7 | 
 8 | @PIXEL_SAMPLERS.register_module()
 9 | class OHEMPixelSampler(BasePixelSampler):
10 |     def __init__(self, context, thresh=None, min_kept=100000):
11 |         super(OHEMPixelSampler, self).__init__()
12 |         self.context = context
13 |         assert min_kept > 1
14 |         self.thresh = thresh
15 |         self.min_kept = min_kept
16 | 
17 |     def sample(self, seg_logit, seg_label):
18 |         with jt.no_grad():
19 |             assert seg_logit.shape[2:] == seg_label.shape[2:]
20 |             assert seg_label.shape[1] == 1
21 |             seg_label = seg_label.squeeze(1).long()
22 |             batch_kept = self.min_kept * seg_label.size(0)
23 |             valid_mask = seg_label != self.context.ignore_index
24 |             seg_weight = jt.zeros(seg_label.size()).astype(seg_logit.dtype)
25 |             valid_seg_weight = seg_weight[valid_mask]
26 |             if self.thresh is not None:
27 |                 seg_prob = nn.softmax(seg_logit, dim=1)
28 | 
29 |                 tmp_seg_label = seg_label.clone().unsqueeze(1)
30 |                 tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0
31 |                 seg_prob = jt.gather(seg_prob, 1, tmp_seg_label).squeeze(1)
32 |                 sort_indices, sort_prob = seg_prob[valid_mask].argsort()
33 | 
34 |                 if sort_prob.numel() > 0:
35 |                     min_threshold = sort_prob[min(batch_kept,
36 |                                                   sort_prob.numel() - 1)]
37 |                 else:
38 |                     min_threshold = 0.0
39 |                 threshold = max(min_threshold, self.thresh)
40 |                 valid_seg_weight[seg_prob[valid_mask] < threshold] = 1.
41 |             else:
42 |                 if not isinstance(self.context.loss_decode, nn.ModuleList):
43 |                     losses_decode = [self.context.loss_decode]
44 |                 else:
45 |                     losses_decode = self.context.loss_decode
46 |                 losses = 0.0
47 |                 for loss_module in losses_decode:
48 |                     losses += loss_module(
49 |                         seg_logit,
50 |                         seg_label,
51 |                         weight=None,
52 |                         ignore_index=self.context.ignore_index,
53 |                         reduction_override='none')
54 | 
55 |                 sort_indices, _ = losses[valid_mask].argsort(descending=True)
56 |                 valid_seg_weight[sort_indices[:batch_kept]] = 1.
57 | 
58 |             seg_weight[valid_mask] = valid_seg_weight
59 | 
60 |             return seg_weight
61 | 


--------------------------------------------------------------------------------
/python/jseg/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .logger import *
2 | from .registry import Registry, build_from_cfg


--------------------------------------------------------------------------------
/python/jseg/utils/helpers.py:
--------------------------------------------------------------------------------
 1 | from itertools import repeat
 2 | import collections.abc
 3 | 
 4 | 
 5 | def _ntuple(n):
 6 |     def parse(x):
 7 |         if isinstance(x, collections.abc.Iterable):
 8 |             return x
 9 |         return tuple(repeat(x, n))
10 | 
11 |     return parse
12 | 
13 | 
14 | to_1tuple = _ntuple(1)
15 | to_2tuple = _ntuple(2)
16 | to_3tuple = _ntuple(3)
17 | to_4tuple = _ntuple(4)
18 | to_ntuple = _ntuple
19 | 
20 | 
21 | def make_divisible(v, divisor=8, min_value=None, round_limit=.9):
22 |     min_value = min_value or divisor
23 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
24 |     # Make sure that round down does not go down by more than 10%.
25 |     if new_v < round_limit * v:
26 |         new_v += divisor
27 |     return new_v
28 | 


--------------------------------------------------------------------------------
/python/jseg/utils/inference.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import os
 3 | import jittor as jt
 4 | from jseg.runner import Runner
 5 | from jseg.config import init_cfg, update_cfg, get_cfg
 6 | from jseg.datasets.pipelines import Compose
 7 | 
 8 | 
 9 | class InferenceSegmentor:
10 |     def __init__(self, config_file, checkpoint_file, save_dir):
11 |         init_cfg(config_file)
12 |         if len(checkpoint_file) > 0:
13 |             update_cfg(resume_path=checkpoint_file)
14 | 
15 |         self.runner = Runner()
16 |         self.runner.model.eval()
17 |         self.transforms = Compose(get_cfg().test_pipeline[1:])
18 |         self.palette = self.runner.val_dataset.PALETTE
19 |         self.runner.model.CLASSES = self.runner.val_dataset.CLASSES
20 |         self.runner.model.PALETTE = self.runner.val_dataset.PALETTE
21 |         self.save_dir = save_dir
22 | 
23 |     def load_img(self, results):
24 |         if isinstance(results['img'], str):
25 |             results['filename'] = results['img']
26 |             results['ori_filename'] = results['img']
27 |         else:
28 |             results['filename'] = None
29 |             results['ori_filename'] = None
30 |         img = cv2.imread(results['img'])
31 |         results['img'] = img
32 |         results['img_shape'] = img.shape
33 |         results['ori_shape'] = img.shape
34 |         return results
35 | 
36 |     @jt.no_grad()
37 |     @jt.single_process_scope()
38 |     def infer(self, img):
39 |         data = dict(img=img)
40 |         data = self.transforms(self.load_img(data))
41 |         data['img'][0] = data['img'][0].unsqueeze(0)
42 |         results = self.runner.model(**data, return_loss=False, rescale=True)
43 |         results = self.runner.model.show_result(img, results, out_file=os.path.join(self.save_dir, img[img.rfind('/') + 1:]))
44 |         return results
45 | 


--------------------------------------------------------------------------------
/python/jseg/utils/logger.py:
--------------------------------------------------------------------------------
 1 | from jseg.utils.general import build_file, current_time
 2 | from .registry import HOOKS, build_from_cfg
 3 | import time
 4 | import os
 5 | from tensorboardX import SummaryWriter
 6 | from jseg.config import get_cfg
 7 | 
 8 | 
 9 | @HOOKS.register_module()
10 | class TextLogger:
11 |     def __init__(self, work_dir):
12 |         save_file = build_file(
13 |             work_dir,
14 |             prefix="textlog/log_" +
15 |             time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) + ".txt")
16 |         self.log_file = open(save_file, "a")
17 | 
18 |     def log(self, data):
19 |         msg = ",".join([f"{k}:{d}" for k, d in data.items()])
20 |         msg = current_time() + ' ' + msg + "\n"
21 |         self.log_file.write(msg)
22 |         self.log_file.flush()
23 | 
24 | 
25 | @HOOKS.register_module()
26 | class TensorboardLogger:
27 |     def __init__(self, work_dir):
28 |         self.cfg = get_cfg()
29 |         tensorboard_dir = os.path.join(work_dir, "tensorboard")
30 |         self.writer = SummaryWriter(tensorboard_dir, flush_secs=10)
31 | 
32 |     def log(self, data):
33 |         if "iter" in data.keys():
34 |             step = data["iter"]
35 |             for k, d in data.items():
36 |                 if k in ["iter", "epoch", "batch_idx", "times", "batch_size"]:
37 |                     continue
38 |                 if isinstance(d, str):
39 |                     continue
40 |                 self.writer.add_scalar(k, d, global_step=step)
41 | 
42 | 
43 | @HOOKS.register_module()
44 | class RunLogger:
45 |     def __init__(self, work_dir, loggers=["TextLogger", "TensorboardLogger"]):
46 |         self.loggers = [
47 |             build_from_cfg(log, HOOKS, work_dir=work_dir) for log in loggers
48 |         ]
49 | 
50 |     def log(self, data, **kwargs):
51 |         data.update(kwargs)
52 |         data = {
53 |             k: d.item() if hasattr(d, "item") else d
54 |             for k, d in data.items()
55 |         }
56 |         for logger in self.loggers:
57 |             logger.log(data)
58 |         self.print_log(data)
59 | 
60 |     def get_time(self, s):
61 |         s = int(s)
62 |         days = s // 60 // 60 // 24
63 |         hours = s // 60 // 60 % 24
64 |         minutes = s // 60 % 60
65 |         seconds = s % 60
66 |         return f' [{days}D:{hours}H:{minutes}M:{seconds}S] '
67 | 
68 |     def print_log(self, msg):
69 |         if isinstance(msg, dict):
70 |             msgs = []
71 |             for k, d in msg.items():
72 |                 if (k == "remain_time"):
73 |                     msgs.append(f" {k}:{self.get_time(d)}")
74 |                 else:
75 |                     msgs.append(f" {k}:{d:.7f}"
76 |                                 if isinstance(d, float) else f" {k}:{d}")
77 |             msg = ",".join(msgs)
78 |         print(current_time(), msg)
79 | 


--------------------------------------------------------------------------------
/python/jseg/utils/registry.py:
--------------------------------------------------------------------------------
 1 | class Registry:
 2 |     def __init__(self):
 3 |         self._modules = {}
 4 | 
 5 |     def register_module(self, name=None, module=None):
 6 |         def _register_module(module):
 7 |             key = name
 8 |             if key is None:
 9 |                 key = module.__name__
10 |             assert key not in self._modules, f"{key} is already registered."
11 |             self._modules[key] = module
12 |             return module
13 | 
14 |         if module is not None:
15 |             return _register_module(module)
16 | 
17 |         return _register_module
18 | 
19 |     def get(self, name):
20 |         assert name in self._modules, f"{name} is not registered."
21 |         return self._modules[name]
22 | 
23 | 
24 | def build_from_cfg(cfg, registry, **kwargs):
25 |     if isinstance(cfg, str):
26 |         return registry.get(cfg)(**kwargs)
27 |     elif isinstance(cfg, dict):
28 |         args = cfg.copy()
29 |         args.update(kwargs)
30 |         obj_type = args.pop('type')
31 |         obj_cls = registry.get(obj_type)
32 |         try:
33 |             module = obj_cls(**args)
34 |         except TypeError as e:
35 |             if "<class" not in str(e):
36 |                 e = f"{obj_cls}.{e}"
37 |             raise TypeError(e)
38 | 
39 |         return module
40 |     elif isinstance(cfg, list):
41 |         from jittor import nn
42 |         return nn.Sequential(
43 |             [build_from_cfg(c, registry, **kwargs) for c in cfg])
44 |     elif cfg is None:
45 |         return None
46 |     else:
47 |         raise TypeError(f"type {type(cfg)} not support")
48 | 
49 | 
50 | DATASETS = Registry()
51 | TRANSFORMS = Registry()
52 | MODELS = Registry()
53 | BACKBONES = Registry()
54 | HEADS = Registry()
55 | LOSSES = Registry()
56 | OPTIMS = Registry()
57 | NECKS = Registry()
58 | SCHEDULERS = Registry()
59 | HOOKS = Registry()
60 | PIXEL_SAMPLERS = Registry()
61 | # bricks
62 | CONV_LAYERS = Registry()
63 | ACTIVATION_LAYERS = Registry()
64 | NORM_LAYERS = Registry()
65 | DROPOUT_LAYERS = Registry()
66 | PADDING_LAYERS = Registry()
67 | 


--------------------------------------------------------------------------------
/python/jseg/utils/visualize.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import os
 3 | import numpy as np
 4 | import warnings
 5 | 
 6 | 
 7 | def visualize_result(seg, palette=None, save_dir=None, file_name=None):
 8 |     if not os.path.exists(save_dir):
 9 |         os.makedirs(save_dir, exist_ok=True)
10 |     out_file = os.path.join(save_dir, file_name[file_name.rfind("/") + 1:])
11 |     if palette is None:
12 |         palette = np.random.randint(0, 255, size=(255, 3))
13 |         warnings.warn('palette is not specified, random palette is used')
14 | 
15 |     palette = np.array(palette)
16 |     assert palette.shape[1] == 3
17 |     assert len(palette.shape) == 2
18 |     color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
19 |     for label, color in enumerate(palette):
20 |         color_seg[seg == label, :] = color
21 |     # convert to BGR
22 |     color_seg = color_seg[..., ::-1]
23 |     color_seg = color_seg.astype(np.uint8)
24 |     cv2.imwrite(out_file, color_seg)
25 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | jittor==1.3.7.16
 2 | Pillow
 3 | numpy 
 4 | matplotlib
 5 | pyyaml
 6 | tensorboardX
 7 | opencv-python
 8 | tqdm
 9 | terminaltables
10 | requests
11 | scipy
12 | prettytable
13 | 


--------------------------------------------------------------------------------
/tools/convert_datasets/cityscapes.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os.path as osp
 3 | from jseg.utils.general import mkdir_or_exist, scandir
 4 | from cityscapesscripts.preparation.json2labelImg import json2labelImg
 5 | from tqdm import tqdm
 6 | 
 7 | 
 8 | def convert_json_to_label(json_file):
 9 |     label_file = json_file.replace('_polygons.json', '_labelTrainIds.png')
10 |     json2labelImg(json_file, label_file, 'trainIds')
11 | 
12 | 
13 | def parse_args():
14 |     parser = argparse.ArgumentParser(
15 |         description='Convert Cityscapes annotations to TrainIds')
16 |     parser.add_argument('cityscapes_path', help='cityscapes data path')
17 |     parser.add_argument('--gt-dir', default='gtFine', type=str)
18 |     parser.add_argument('-o', '--out-dir', help='output path')
19 |     args = parser.parse_args()
20 |     return args
21 | 
22 | 
23 | def main():
24 |     args = parse_args()
25 |     cityscapes_path = args.cityscapes_path
26 |     out_dir = args.out_dir if args.out_dir else cityscapes_path
27 |     mkdir_or_exist(out_dir)
28 | 
29 |     gt_dir = osp.join(cityscapes_path, args.gt_dir)
30 | 
31 |     for poly in tqdm(scandir(gt_dir, '_polygons.json', recursive=True)):
32 |         poly_file = osp.join(gt_dir, poly)
33 |         convert_json_to_label(poly_file)
34 | 
35 |     split_names = ['train', 'val', 'test']
36 | 
37 |     for split in split_names:
38 |         filenames = []
39 |         for poly in scandir(osp.join(gt_dir, split),
40 |                             '_polygons.json',
41 |                             recursive=True):
42 |             filenames.append(poly.replace('_gtFine_polygons.json', ''))
43 |         with open(osp.join(out_dir, f'{split}.txt'), 'w') as f:
44 |             f.writelines(f + '\n' for f in filenames)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     main()
49 | 


--------------------------------------------------------------------------------
/tools/convert_datasets/loveda.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import os.path as osp
 4 | import shutil
 5 | import tempfile
 6 | import zipfile
 7 | from jseg.utils.general import mkdir_or_exist
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(
12 |         description='Convert LoveDA dataset to mmsegmentation format')
13 |     parser.add_argument('dataset_path', help='LoveDA folder path')
14 |     parser.add_argument('--tmp_dir', help='path of the temporary directory')
15 |     parser.add_argument('-o', '--out_dir', help='output path')
16 |     args = parser.parse_args()
17 |     return args
18 | 
19 | 
20 | def main():
21 |     args = parse_args()
22 |     dataset_path = args.dataset_path
23 |     if args.out_dir is None:
24 |         out_dir = osp.join('data', 'loveDA')
25 |     else:
26 |         out_dir = args.out_dir
27 | 
28 |     print('Making directories...')
29 |     mkdir_or_exist(out_dir)
30 |     mkdir_or_exist(osp.join(out_dir, 'img_dir'))
31 |     mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
32 |     mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
33 |     mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test'))
34 |     mkdir_or_exist(osp.join(out_dir, 'ann_dir'))
35 |     mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
36 |     mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
37 | 
38 |     assert 'Train.zip' in os.listdir(dataset_path), \
39 |         'Train.zip is not in {}'.format(dataset_path)
40 |     assert 'Val.zip' in os.listdir(dataset_path), \
41 |         'Val.zip is not in {}'.format(dataset_path)
42 |     assert 'Test.zip' in os.listdir(dataset_path), \
43 |         'Test.zip is not in {}'.format(dataset_path)
44 | 
45 |     with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
46 |         for dataset in ['Train', 'Val', 'Test']:
47 |             zip_file = zipfile.ZipFile(
48 |                 os.path.join(dataset_path, dataset + '.zip'))
49 |             zip_file.extractall(tmp_dir)
50 |             data_type = dataset.lower()
51 |             for location in ['Rural', 'Urban']:
52 |                 for image_type in ['images_png', 'masks_png']:
53 |                     if image_type == 'images_png':
54 |                         dst = osp.join(out_dir, 'img_dir', data_type)
55 |                     else:
56 |                         dst = osp.join(out_dir, 'ann_dir', data_type)
57 |                     if dataset == 'Test' and image_type == 'masks_png':
58 |                         continue
59 |                     else:
60 |                         src_dir = osp.join(tmp_dir, dataset, location,
61 |                                            image_type)
62 |                         src_lst = os.listdir(src_dir)
63 |                         for file in src_lst:
64 |                             shutil.move(osp.join(src_dir, file), dst)
65 |         print('Removing the temporary files...')
66 | 
67 |     print('Done!')
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     main()
72 | 


--------------------------------------------------------------------------------
/tools/convert_datasets/voc_aug.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os.path as osp
 3 | from jseg.utils.general import mkdir_or_exist, scandir
 4 | import numpy as np
 5 | from PIL import Image
 6 | from scipy.io import loadmat
 7 | 
 8 | AUG_LEN = 10582
 9 | 
10 | 
11 | def convert_mat(mat_file, in_dir, out_dir):
12 |     data = loadmat(osp.join(in_dir, mat_file))
13 |     mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8)
14 |     seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png'))
15 |     Image.fromarray(mask).save(seg_filename, 'PNG')
16 | 
17 | 
18 | def generate_aug_list(merged_list, excluded_list):
19 |     return list(set(merged_list) - set(excluded_list))
20 | 
21 | 
22 | def parse_args():
23 |     parser = argparse.ArgumentParser(
24 |         description='Convert PASCAL VOC annotations')
25 |     parser.add_argument('devkit_path', help='pascal voc devkit path')
26 |     parser.add_argument('aug_path', help='pascal voc aug path')
27 |     parser.add_argument('-o', '--out_dir', help='output path')
28 |     args = parser.parse_args()
29 |     return args
30 | 
31 | 
32 | def main():
33 |     args = parse_args()
34 |     devkit_path = args.devkit_path
35 |     aug_path = args.aug_path
36 |     if args.out_dir is None:
37 |         out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug')
38 |     else:
39 |         out_dir = args.out_dir
40 |     mkdir_or_exist(out_dir)
41 |     in_dir = osp.join(aug_path, 'dataset', 'cls')
42 | 
43 |     for i in list(scandir(in_dir, suffix='.mat')):
44 |         convert_mat(i, in_dir, out_dir)
45 | 
46 |     full_aug_list = []
47 |     with open(osp.join(aug_path, 'dataset', 'train.txt')) as f:
48 |         full_aug_list += [line.strip() for line in f]
49 |     with open(osp.join(aug_path, 'dataset', 'val.txt')) as f:
50 |         full_aug_list += [line.strip() for line in f]
51 | 
52 |     with open(
53 |             osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
54 |                      'train.txt')) as f:
55 |         ori_train_list = [line.strip() for line in f]
56 |     with open(
57 |             osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
58 |                      'val.txt')) as f:
59 |         val_list = [line.strip() for line in f]
60 | 
61 |     aug_train_list = generate_aug_list(ori_train_list + full_aug_list,
62 |                                        val_list)
63 |     assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format(
64 |         AUG_LEN)
65 | 
66 |     with open(
67 |             osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
68 |                      'trainaug.txt'), 'w') as f:
69 |         f.writelines(line + '\n' for line in aug_train_list)
70 | 
71 |     aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list)
72 |     assert len(aug_list) == AUG_LEN - len(
73 |         ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN -
74 |                                                       len(ori_train_list))
75 |     with open(
76 |             osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'),
77 |             'w') as f:
78 |         f.writelines(line + '\n' for line in aug_list)
79 | 
80 |     print('Done!')
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     main()
85 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
 1 | from jseg.utils.inference import InferenceSegmentor
 2 | 
 3 | 
 4 | def main():
 5 |     config_file = 'project/fcn/fcn_r50-d8_512x1024_cityscapes_80k.py'
 6 |     ckp_file = 'work_dirs/fcn_r50-d8_512x1024_cityscapes_80k/checkpoints/ckpt_80000.pkl'
 7 |     save_dir = './'
 8 |     image = 'cityscapes/leftImg8bit/val/munster/munster_000069_000019_leftImg8bit.png'
 9 | 
10 |     inference_segmentor = InferenceSegmentor(config_file, ckp_file, save_dir)
11 |     inference_segmentor.infer(image)
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     main()
16 | 


--------------------------------------------------------------------------------
/tools/run_net.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import jittor as jt
 3 | from jseg.runner import Runner
 4 | from jseg.config import init_cfg
 5 | from jseg.config.config import update_cfg
 6 | 
 7 | jt.cudnn.set_max_workspace_ratio(0.0)
 8 | 
 9 | 
10 | def main():
11 |     parser = argparse.ArgumentParser(
12 |         description="Jittor Semantic segmentation Training")
13 |     parser.add_argument(
14 |         "--config-file",
15 |         default="",
16 |         metavar="FILE",
17 |         help="path to config file",
18 |         type=str,
19 |     )
20 |     parser.add_argument(
21 |         "--task",
22 |         default="train",
23 |         help="train,val test",
24 |         type=str,
25 |     )
26 | 
27 |     parser.add_argument(
28 |         "--resume",
29 |         default=None,
30 |         help="resume path",
31 |         type=str,
32 |     )
33 |     parser.add_argument(
34 |         "--save-dir",
35 |         default="./results",
36 |         type=str,
37 |     )
38 | 
39 |     parser.add_argument("--no_cuda", action='store_true')
40 | 
41 |     parser.add_argument("--efficient_val", action='store_true')
42 |     args = parser.parse_args()
43 | 
44 |     if not args.no_cuda:
45 |         jt.flags.use_cuda = 1
46 | 
47 |     assert args.task in [
48 |         "train", "val", "test"
49 |     ], f"{args.task} not support, please choose [train,val,test]"
50 | 
51 |     if args.config_file:
52 |         init_cfg(args.config_file)
53 | 
54 |     if args.resume:
55 |         update_cfg(resume_path=args.resume)
56 |     if args.efficient_val:
57 |         update_cfg(efficient_val=args.efficient_val)
58 | 
59 |     runner = Runner()
60 | 
61 |     if args.task == "train":
62 |         runner.run()
63 |     elif args.task == "val":
64 |         runner.val()
65 |     elif args.task == "test":
66 |         runner.test(args.save_dir)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     main()
71 | 


--------------------------------------------------------------------------------