├── .gitignore
├── LICENSE
├── README.md
├── ablation_ratio.sh
├── assests
    ├── improve.png
    └── overview.png
├── checkpoints
    └── backbones
    │   ├── mit
    │       └── place mit-b0.pth here
    │   └── mobilenet_
    │       └── place mobilenetv3.pth here
├── configs
    ├── BiSeNetV2
    │   ├── camvid.yaml
    │   ├── uavid2020.yaml
    │   └── vaihingen.yaml
    ├── Deeplabv3plus
    │   ├── camvid_mbv3l.yaml
    │   ├── uavid2020_mbv3l.yaml
    │   └── vaihingen_mbv3l.yaml
    ├── PIDNet
    │   ├── camvid_S.yaml
    │   ├── camvid_S_focal.yaml
    │   ├── camvid_S_ohem.yaml
    │   ├── uavid2020_S.yaml
    │   ├── vaihingen_S.yaml
    │   ├── vaihingen_S_focal.yaml
    │   └── vaihingen_S_ohem.yaml
    ├── SegFormer
    │   ├── camvid_mitb0.yaml
    │   ├── camvid_mitb0_focal.yaml
    │   ├── camvid_mitb0_ohem.yaml
    │   ├── uavid2020_mitb0.yaml
    │   ├── vaihingen_mitb0.yaml
    │   ├── vaihingen_mitb0_focal.yaml
    │   ├── vaihingen_mitb0_ohem.yaml
    │   └── vaihingen_mitb3.yaml
    ├── TopFormer
    │   ├── camvid_B.yaml
    │   ├── camvid_B_focal.yaml
    │   ├── camvid_B_ohem.yaml
    │   ├── uavid2020_B.yaml
    │   ├── vaihingen_B.yaml
    │   ├── vaihingen_B_focal.yaml
    │   └── vaihingen_B_ohem.yaml
    ├── UperNet
    │   ├── camvid_mbv3l.yaml
    │   ├── uavid2020_mbv3l.yaml
    │   └── vaihingen_mbv3l.yaml
    ├── deeplabv3plus
    │   ├── camvid_mbv3l.yaml
    │   ├── uavid2020_mbv3l.yaml
    │   └── vaihingen_mbv3l.yaml
    └── segformer
    │   ├── camvid_mitb0.yaml
    │   ├── uavid2020_mitb0.yaml
    │   ├── vaihingen_mitb0.yaml
    │   └── vaihingen_mitb3.yaml
├── semseg
    ├── __init__.py
    ├── augmentations.py
    ├── datasets
    │   ├── __init__.py
    │   ├── ade20k.py
    │   ├── aeroscapes.py
    │   ├── atr.py
    │   ├── camvid.py
    │   ├── celebamaskhq.py
    │   ├── cihp.py
    │   ├── cityscapes.py
    │   ├── cocostuff.py
    │   ├── facesynthetics.py
    │   ├── helen.py
    │   ├── htht2022.py
    │   ├── ibugmask.py
    │   ├── isaid.py
    │   ├── lapa.py
    │   ├── lip.py
    │   ├── mapillary.py
    │   ├── mhpv1.py
    │   ├── mhpv2.py
    │   ├── pascalcontext.py
    │   ├── suim.py
    │   ├── sunrgbd.py
    │   ├── uavid2020.py
    │   ├── udd6.py
    │   └── vaihingen.py
    ├── losses.py
    ├── metrics.py
    ├── models
    │   ├── __init__.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── convnext.py
    │   │   ├── micronet.py
    │   │   ├── mit.py
    │   │   ├── mobilenetv2.py
    │   │   ├── mobilenetv3.py
    │   │   ├── mobilenetv3_.py
    │   │   ├── poolformer.py
    │   │   ├── pvt.py
    │   │   ├── resnet.py
    │   │   ├── resnetd.py
    │   │   ├── rest.py
    │   │   ├── topformer.py
    │   │   └── uniformer.py
    │   ├── base.py
    │   ├── bisenetv1.py
    │   ├── bisenetv2.py
    │   ├── bisenetv2_ss.py
    │   ├── ccnet.py
    │   ├── custom_cnn.py
    │   ├── custom_vit.py
    │   ├── ddrnet.py
    │   ├── ddrnet_official.py
    │   ├── deeplabv3plus.py
    │   ├── fast_scnn.py
    │   ├── fchardnet.py
    │   ├── heads
    │   │   ├── __init__.py
    │   │   ├── condnet.py
    │   │   ├── fapn.py
    │   │   ├── fcn.py
    │   │   ├── fpn.py
    │   │   ├── lawin.py
    │   │   ├── segformer.py
    │   │   ├── sfnet.py
    │   │   └── upernet.py
    │   ├── lawin.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── common.py
    │   │   └── initialize.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── attention.py
    │   │   ├── auxiliary.py
    │   │   ├── cc_attention.py
    │   │   ├── dfem.py
    │   │   ├── ppm.py
    │   │   ├── psa.py
    │   │   └── sos.py
    │   ├── pidnet.py
    │   ├── pspnet.py
    │   ├── segformer.py
    │   ├── sfnet.py
    │   ├── sosnet.py
    │   ├── sosnet_ablation.py
    │   ├── topformer.py
    │   └── upernet.py
    ├── optimizers.py
    ├── schedulers.py
    └── utils
    │   ├── __init__.py
    │   ├── utils.py
    │   └── visualize.py
├── setup.py
└── tools
    ├── benchmark.py
    ├── convert_datasets
        ├── convert_camvid.py
        ├── convert_uavid.py
        └── convert_vaihingen.py
    ├── export.py
    ├── export_small_objects.py
    ├── feature_visualization.py
    ├── infer.py
    ├── infer_single.py
    ├── submit
        └── uavid_submit.py
    ├── train.py
    ├── train_sosnet.py
    └── val.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 sithu3
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ablation_ratio.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | python tools/benchmark.py --model-name DeeplabV3Plus --backbone-name MobileNetV3-large
 3 | python tools/benchmark.py --model-name SOSNet --backbone-name MobileNetV3-large
 4 | python tools/benchmark.py --model-name SegFormer --backbone-name MiT-B0
 5 | 
 6 | 
 7 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.0
 8 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.1
 9 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.2
10 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.3
11 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.4
12 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.5
13 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.6
14 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.7
15 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.8
16 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.9
17 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 1.0
18 | 


--------------------------------------------------------------------------------
/assests/improve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/assests/improve.png


--------------------------------------------------------------------------------
/assests/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/assests/overview.png


--------------------------------------------------------------------------------
/checkpoints/backbones/mit/place mit-b0.pth here:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/checkpoints/backbones/mit/place mit-b0.pth here


--------------------------------------------------------------------------------
/checkpoints/backbones/mobilenet_/place mobilenetv3.pth here:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/checkpoints/backbones/mobilenet_/place mobilenetv3.pth here


--------------------------------------------------------------------------------
/configs/BiSeNetV2/camvid.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0           # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/Bisenetv2/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : BiSeNetv2                                           # name of the model you are using
 6 |   BACKBONE      :                                                     # model variant
 7 |   PRETRAINED    : checkpoints/backbones/bisenetv2/bisenetv2.pth       # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [352, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32               # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [352, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [352, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/BiSeNetV2/uavid2020.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda               # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/Bisenetv2/uavid2020'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : BiSeNetv2                                           # name of the model you are using
 6 |   BACKBONE      :                                                     # model variant
 7 |   PRETRAINED    : checkpoints/backbones/bisenetv2/bisenetv2.pth       # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : UAVid2020                    # dataset name to be trained
11 |   ROOT          : 'data/UAVid2020'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [1024, 1920]      # training image size in (h, w)
18 |   BATCH_SIZE    : 4                 # batch size used to train
19 |   EPOCHS        : 100               # number of epochs to train
20 |   EVAL_INTERVAL : 10                 # evaluation interval during training
21 |   AMP           : false             # use AMP in training
22 |   DDP           : false             # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 5              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.01             # warmup ratio
39 | 
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
43 |   IMAGE_SIZE    : [2160, 3840]                          # evaluation image size in (h, w)
44 |   MSF:
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
52 |   FILE          : 'data/UAVid2020/img_dir/test'                    # filename or foldername
53 |   IMAGE_SIZE    : [2160, 3840]                          # inference image size in (h, w)
54 |   OVERLAY       : false                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/BiSeNetV2/vaihingen.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/Bisenetv2/Vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : BiSeNetv2                                           # name of the model you are using
 6 |   BACKBONE      :                                                     # model variant
 7 |   PRETRAINED    : checkpoints/backbones/bisenetv2/bisenetv2.pth       # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/Deeplabv3plus/camvid_mbv3l.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/deeplabv3plus/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : DeeplabV3Plus                                           # name of the model you are using
 6 |   BACKBONE      : MobileNetV3-large                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/Deeplabv3plus/uavid2020_mbv3l.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:4              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/deeplabv3plus/uavid2020'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : DeeplabV3Plus                                           # name of the model you are using
 6 |   BACKBONE      : MobileNetV3-large                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : UAVid2020                    # dataset name to be trained
11 |   ROOT          : 'data/UAVid2020'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [1024, 1920]      # training image size in (h, w)
18 |   BATCH_SIZE    : 4                 # batch size used to train
19 |   EPOCHS        : 100               # number of epochs to train
20 |   EVAL_INTERVAL : 10                 # evaluation interval during training
21 |   AMP           : false             # use AMP in training
22 |   DDP           : false             # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 5              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.01             # warmup ratio
39 | 
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
43 |   IMAGE_SIZE    : [2160, 3840]                          # evaluation image size in (h, w)
44 |   MSF:
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
52 |   FILE          : 'data/UAVid2020/img_dir/test'                    # filename or foldername
53 |   IMAGE_SIZE    : [2160, 3840]                          # inference image size in (h, w)
54 |   OVERLAY       : false                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/Deeplabv3plus/vaihingen_mbv3l.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/deeplabv3plus/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : DeeplabV3Plus                                           # name of the model you are using
 6 |   BACKBONE      : MobileNetV3-large                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                    # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : true            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/deeplabv3plus_vaihingen.pth'     # trained model file path
43 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/deeplabv3plus_vaihingen.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/PIDNet/camvid_S.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0           # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/PIDNet/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : PIDNet                                           # name of the model you are using
 6 |   BACKBONE      : PIDNet-S                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32               # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/PIDNet/camvid_S_focal.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0           # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/PIDNet/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : PIDNet                                           # name of the model you are using
 6 |   BACKBONE      : PIDNet-S                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32               # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : Focal          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/PIDNet/camvid_S_ohem.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0           # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/PIDNet/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : PIDNet                                           # name of the model you are using
 6 |   BACKBONE      : PIDNet-S                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32               # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : OhemCrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/PIDNet/uavid2020_S.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0             # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/PIDNet/uavid2020'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : PIDNet                                           # name of the model you are using
 6 |   BACKBONE      : PIDNet-S                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : UAVid2020                    # dataset name to be trained
11 |   ROOT          : 'data/UAVid2020'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [1024, 1920]      # training image size in (h, w)
18 |   BATCH_SIZE    : 4                 # batch size used to train
19 |   EPOCHS        : 100               # number of epochs to train
20 |   EVAL_INTERVAL : 10                # evaluation interval during training
21 |   AMP           : false             # use AMP in training
22 |   DDP           : false             # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 5              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.01             # warmup ratio
39 | 
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
43 |   IMAGE_SIZE    : [2160, 3840]                          # evaluation image size in (h, w)
44 |   MSF:
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
52 |   FILE          : 'data/UAVid2020/img_dir/test'                    # filename or foldername
53 |   IMAGE_SIZE    : [2160, 3840]                          # inference image size in (h, w)
54 |   OVERLAY       : false                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/PIDNet/vaihingen_S.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/PIDNet/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : PIDNet                                           # name of the model you are using
 6 |   BACKBONE      : PIDNet-S                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/PIDNet/vaihingen_S_focal.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/PIDNet/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : PIDNet                                           # name of the model you are using
 6 |   BACKBONE      : PIDNet-S                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : Focal          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/PIDNet/vaihingen_S_ohem.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/PIDNet/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : PIDNet                                           # name of the model you are using
 6 |   BACKBONE      : PIDNet-S                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : OhemCrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/SegFormer/camvid_mitb0.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                           # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/SegFormer/camvid_mitb0_focal.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : Focal           # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false           # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                           # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/SegFormer/camvid_mitb0_ohem.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : OhemCrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                           # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/SegFormer/uavid2020_mitb0.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:5              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/uavid2020'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : UAVid2020                    # dataset name to be trained
11 |   ROOT          : 'data/UAVid2020'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [1000, 1800]      # training image size in (h, w)
18 |   BATCH_SIZE    : 2                 # batch size used to train
19 |   EPOCHS        : 100               # number of epochs to train
20 |   EVAL_INTERVAL : 10                 # evaluation interval during training
21 |   AMP           : false             # use AMP in training
22 |   DDP           : false             # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 5              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.01             # warmup ratio
39 | 
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
43 |   IMAGE_SIZE    : [1080, 1920]                          # evaluation image size in (h, w)
44 |   MSF:
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
52 |   FILE          : 'data/UAVid2020/img_dir/test'                    # filename or foldername
53 |   IMAGE_SIZE    : [1728, 3072]                          # inference image size in (h, w)
54 |   OVERLAY       : false                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/SegFormer/vaihingen_mitb0.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : true            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/SegFormer/vaihingen_mitb0_focal.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : Focal           # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : true            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/SegFormer/vaihingen_mitb0_ohem.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : OhemCrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : true            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/SegFormer/vaihingen_mitb3.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:                                    
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B3                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b3.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                    # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]    # training image size in (h, w)
18 |   BATCH_SIZE    : 8              # batch size used to train
19 |   EPOCHS        : 2000             # number of epochs to train
20 |   EVAL_INTERVAL : 40              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 | 
24 | LOSS:
25 |   NAME          : OhemCrossEntropy          # loss function name (ohemce, ce, dice)
26 |   CLS_WEIGHTS   : true            # use class weights in loss calculation
27 | 
28 | OPTIMIZER:
29 |   NAME          : adamw           # optimizer name
30 |   LR            : 0.001           # initial learning rate used in optimizer
31 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer 
32 | 
33 | SCHEDULER:
34 |   NAME          : warmuppolylr    # scheduler name
35 |   POWER         : 0.9             # scheduler power
36 |   WARMUP        : 10              # warmup epochs used in scheduler
37 |   WARMUP_RATIO  : 0.1             # warmup ratio
38 |   
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SegFormer_MiT-B3_Vaihingen.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                            # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                                   # multi-scale and flip evaluation  
45 |     FLIP        : true                                    # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]       # scales used in MSF evaluation                
47 | 
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'checkpoints/pretrained/mit/mit_b3_vaihingen.pth'    # trained model file path
51 |   FILE          : 'assests/cityscapes'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                            # inference image size in (h, w)
53 |   OVERLAY       : true                                    # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/TopFormer/camvid_B.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0           # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/TopFormer/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : TopFormer                                           # name of the model you are using
 6 |   BACKBONE      : TokenPyramidTransformer-B                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32               # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/TopFormer/camvid_B_focal.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0           # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/TopFormer/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : TopFormer                                           # name of the model you are using
 6 |   BACKBONE      : TokenPyramidTransformer-B                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32               # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : Focal          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/TopFormer/camvid_B_ohem.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0           # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/TopFormer/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : TopFormer                                           # name of the model you are using
 6 |   BACKBONE      : TokenPyramidTransformer-B                                             # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32               # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : OhemCrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/TopFormer/uavid2020_B.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0             # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/TopFormer/uavid2020'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : TopFormer                                           # name of the model you are using
 6 |   BACKBONE      : TokenPyramidTransformer-B                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : UAVid2020                    # dataset name to be trained
11 |   ROOT          : 'data/UAVid2020'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [1024, 1920]      # training image size in (h, w)
18 |   BATCH_SIZE    : 4                 # batch size used to train
19 |   EPOCHS        : 100               # number of epochs to train
20 |   EVAL_INTERVAL : 10                 # evaluation interval during training
21 |   AMP           : false             # use AMP in training
22 |   DDP           : false             # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 5              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.01             # warmup ratio
39 | 
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
43 |   IMAGE_SIZE    : [2160, 3840]                          # evaluation image size in (h, w)
44 |   MSF:
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
52 |   FILE          : 'data/UAVid2020/img_dir/test'                    # filename or foldername
53 |   IMAGE_SIZE    : [2160, 3840]                          # inference image size in (h, w)
54 |   OVERLAY       : false                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/TopFormer/vaihingen_B.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/TopFormer/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : TopFormer                                           # name of the model you are using
 6 |   BACKBONE      : TokenPyramidTransformer-B                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/TopFormer/vaihingen_B_focal.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/TopFormer/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : TopFormer                                           # name of the model you are using
 6 |   BACKBONE      : TokenPyramidTransformer-B                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : Focal          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/TopFormer/vaihingen_B_ohem.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/TopFormer/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : TopFormer                                           # name of the model you are using
 6 |   BACKBONE      : TokenPyramidTransformer-B                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : OhemCrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/UperNet/camvid_mbv3l.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:2             # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/UperNet/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : UperNet                                           # name of the model you are using
 6 |   BACKBONE      : MobileNetV3-large                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32               # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/UperNet/uavid2020_mbv3l.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:1              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/UperNet/uavid2020'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : UperNet                                           # name of the model you are using
 6 |   BACKBONE      : MobileNetV3-large                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : UAVid2020                    # dataset name to be trained
11 |   ROOT          : 'data/UAVid2020'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [1024, 1920]      # training image size in (h, w)
18 |   BATCH_SIZE    : 4                 # batch size used to train
19 |   EPOCHS        : 100               # number of epochs to train
20 |   EVAL_INTERVAL : 10                 # evaluation interval during training
21 |   AMP           : false             # use AMP in training
22 |   DDP           : false             # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 5              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.01             # warmup ratio
39 | 
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
43 |   IMAGE_SIZE    : [2160, 3840]                          # evaluation image size in (h, w)
44 |   MSF:
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
52 |   FILE          : 'data/UAVid2020/img_dir/test'                    # filename or foldername
53 |   IMAGE_SIZE    : [2160, 3840]                          # inference image size in (h, w)
54 |   OVERLAY       : false                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/UperNet/vaihingen_mbv3l.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:7              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/UperNet/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : UperNet                                           # name of the model you are using
 6 |   BACKBONE      : MobileNetV3-large                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/deeplabv3plus/camvid_mbv3l.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/deeplabv3plus/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : DeeplabV3Plus                                           # name of the model you are using
 6 |   BACKBONE      : MobileNetV3-large                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/deeplabv3plus/uavid2020_mbv3l.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:4              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/deeplabv3plus/uavid2020'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : DeeplabV3Plus                                           # name of the model you are using
 6 |   BACKBONE      : MobileNetV3-large                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : UAVid2020                    # dataset name to be trained
11 |   ROOT          : 'data/UAVid2020'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [1024, 1920]      # training image size in (h, w)
18 |   BATCH_SIZE    : 4                 # batch size used to train
19 |   EPOCHS        : 100               # number of epochs to train
20 |   EVAL_INTERVAL : 10                 # evaluation interval during training
21 |   AMP           : false             # use AMP in training
22 |   DDP           : false             # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 5              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.01             # warmup ratio
39 | 
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
43 |   IMAGE_SIZE    : [2160, 3840]                          # evaluation image size in (h, w)
44 |   MSF:
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
52 |   FILE          : 'data/UAVid2020/img_dir/test'                    # filename or foldername
53 |   IMAGE_SIZE    : [2160, 3840]                          # inference image size in (h, w)
54 |   OVERLAY       : false                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/deeplabv3plus/vaihingen_mbv3l.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/deeplabv3plus/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : DeeplabV3Plus                                           # name of the model you are using
 6 |   BACKBONE      : MobileNetV3-large                                               # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                    # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : true            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/deeplabv3plus_vaihingen.pth'     # trained model file path
43 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/deeplabv3plus_vaihingen.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/segformer/camvid_mitb0.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:0              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/camvid'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : CamVid                                              # dataset name to be trained with (camvid, cityscapes, ade20k)
11 |   ROOT          : 'data/CamVid'                         # dataset root path
12 |   IGNORE_LABEL  : 11
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [360, 480]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 968             # number of epochs to train
20 |   EVAL_INTERVAL : 50              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 |   
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
43 |   IMAGE_SIZE    : [360, 480]                           # evaluation image size in (h, w)
44 |   MSF: 
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation  
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/sosnet_resnet18_camvid.pth'     # trained model file path
52 |   FILE          : 'assests/camvid'                    # filename or foldername 
53 |   IMAGE_SIZE    : [360, 480]                          # inference image size in (h, w)
54 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/segformer/uavid2020_mitb0.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda:5              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/uavid2020'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : UAVid2020                    # dataset name to be trained
11 |   ROOT          : 'data/UAVid2020'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : false
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [1000, 1800]      # training image size in (h, w)
18 |   BATCH_SIZE    : 2                 # batch size used to train
19 |   EPOCHS        : 100               # number of epochs to train
20 |   EVAL_INTERVAL : 10                 # evaluation interval during training
21 |   AMP           : false             # use AMP in training
22 |   DDP           : false             # use DDP training
23 |   MAX_INERITER  : 999999            # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : false            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 5              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.01             # warmup ratio
39 | 
40 | 
41 | EVAL:
42 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
43 |   IMAGE_SIZE    : [1080, 1920]                          # evaluation image size in (h, w)
44 |   MSF:
45 |     ENABLE      : false                               # multi-scale and flip evaluation
46 |     FLIP        : true                                # use flip in evaluation
47 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation
48 |   TRAIN_SET     : false
49 | 
50 | TEST:
51 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth'     # trained model file path
52 |   FILE          : 'data/UAVid2020/img_dir/test'                    # filename or foldername
53 |   IMAGE_SIZE    : [1728, 3072]                          # inference image size in (h, w)
54 |   OVERLAY       : false                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/segformer/vaihingen_mitb0.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output_ablation/segformer/vaihingen'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B0                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b0.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                     # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 5                             # ignore_label for back propagation(loss) and aug filling
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]      # training image size in (h, w)
18 |   BATCH_SIZE    : 32              # batch size used to train
19 |   EPOCHS        : 500             # number of epochs to train
20 |   EVAL_INTERVAL : 25              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 |   MAX_INERITER  : 999999          # max iteration for each epoch
24 | 
25 | LOSS:
26 |   NAME          : CrossEntropy          # loss function name (ohemce, ce, dice)
27 |   CLS_WEIGHTS   : true            # use class weights in loss calculation
28 | 
29 | OPTIMIZER:
30 |   NAME          : adamw           # optimizer name
31 |   LR            : 0.001           # initial learning rate used in optimizer
32 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer
33 | 
34 | SCHEDULER:
35 |   NAME          : warmuppolylr    # scheduler name
36 |   POWER         : 0.9             # scheduler power
37 |   WARMUP        : 10              # warmup epochs used in scheduler
38 |   WARMUP_RATIO  : 0.1             # warmup ratio
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                          # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                               # multi-scale and flip evaluation
45 |     FLIP        : true                                # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]   # scales used in MSF evaluation                
47 |   TRAIN_SET     : false
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth'     # trained model file path
51 |   FILE          : 'assests/camvid'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                          # inference image size in (h, w)
53 |   OVERLAY       : true                                # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/configs/segformer/vaihingen_mitb3.yaml:
--------------------------------------------------------------------------------
 1 | DEVICE          : cuda              # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...)
 2 | SAVE_DIR        : 'output'         # output folder name used for saving the model, logs and inference results
 3 | 
 4 | MODEL:                                    
 5 |   NAME          : SegFormer                                           # name of the model you are using
 6 |   BACKBONE      : MiT-B3                                                 # model variant
 7 |   PRETRAINED    : 'checkpoints/backbones/mit/mit_b3.pth'              # backbone model's weight
 8 | 
 9 | DATASET:
10 |   NAME          : Vaihingen                    # dataset name to be trained
11 |   ROOT          : 'data/ISPRS_DATA/Vaihingen2'                # dataset root path
12 |   IGNORE_LABEL  : 255
13 |   H_FLIP        : true
14 |   V_FLIP        : true
15 | 
16 | TRAIN:
17 |   IMAGE_SIZE    : [512, 512]    # training image size in (h, w)
18 |   BATCH_SIZE    : 8              # batch size used to train
19 |   EPOCHS        : 2000             # number of epochs to train
20 |   EVAL_INTERVAL : 40              # evaluation interval during training
21 |   AMP           : false           # use AMP in training
22 |   DDP           : false           # use DDP training
23 | 
24 | LOSS:
25 |   NAME          : OhemCrossEntropy          # loss function name (ohemce, ce, dice)
26 |   CLS_WEIGHTS   : true            # use class weights in loss calculation
27 | 
28 | OPTIMIZER:
29 |   NAME          : adamw           # optimizer name
30 |   LR            : 0.001           # initial learning rate used in optimizer
31 |   WEIGHT_DECAY  : 0.01            # decay rate used in optimizer 
32 | 
33 | SCHEDULER:
34 |   NAME          : warmuppolylr    # scheduler name
35 |   POWER         : 0.9             # scheduler power
36 |   WARMUP        : 10              # warmup epochs used in scheduler
37 |   WARMUP_RATIO  : 0.1             # warmup ratio
38 |   
39 | 
40 | EVAL:
41 |   MODEL_PATH    : 'output/SegFormer_MiT-B3_Vaihingen.pth'     # trained model file path
42 |   IMAGE_SIZE    : [512, 512]                            # evaluation image size in (h, w)
43 |   MSF: 
44 |     ENABLE      : false                                   # multi-scale and flip evaluation  
45 |     FLIP        : true                                    # use flip in evaluation  
46 |     SCALES      : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]       # scales used in MSF evaluation                
47 | 
48 | 
49 | TEST:
50 |   MODEL_PATH    : 'checkpoints/pretrained/mit/mit_b3_vaihingen.pth'    # trained model file path
51 |   FILE          : 'assests/cityscapes'                    # filename or foldername 
52 |   IMAGE_SIZE    : [512, 512]                            # inference image size in (h, w)
53 |   OVERLAY       : true                                    # save the overlay result (image_alpha+label_alpha)


--------------------------------------------------------------------------------
/semseg/__init__.py:
--------------------------------------------------------------------------------
 1 | from tabulate import tabulate
 2 | from semseg import models
 3 | from semseg import datasets
 4 | from semseg.models import backbones, heads
 5 | 
 6 | 
 7 | def show_models():
 8 |     model_names = models.__all__
 9 |     numbers = list(range(1, len(model_names)+1))
10 |     print(tabulate({'No.': numbers, 'Model Names': model_names}, headers='keys'))
11 | 
12 | 
13 | def show_backbones():
14 |     backbone_names = backbones.__all__
15 |     variants = []
16 |     for name in backbone_names:
17 |         try:
18 |             variants.append(list(eval(f"backbones.{name.lower()}_settings").keys()))
19 |         except:
20 |             variants.append('-')
21 |     print(tabulate({'Backbone Names': backbone_names, 'Variants': variants}, headers='keys'))
22 | 
23 | 
24 | def show_heads():
25 |     head_names = heads.__all__
26 |     numbers = list(range(1, len(head_names)+1))
27 |     print(tabulate({'No.': numbers, 'Heads': head_names}, headers='keys'))
28 | 
29 | 
30 | def show_datasets():
31 |     dataset_names = datasets.__all__
32 |     numbers = list(range(1, len(dataset_names)+1))
33 |     print(tabulate({'No.': numbers, 'Datasets': dataset_names}, headers='keys'))
34 | 


--------------------------------------------------------------------------------
/semseg/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .ade20k import ADE20K
 2 | from .camvid import CamVid
 3 | from .cityscapes import CityScapes
 4 | from .pascalcontext import PASCALContext
 5 | from .cocostuff import COCOStuff
 6 | from .sunrgbd import SunRGBD
 7 | from .mapillary import MapillaryVistas
 8 | from .mhpv1 import MHPv1
 9 | from .mhpv2 import MHPv2
10 | from .lip import LIP
11 | from .cihp import CIHP, CCIHP
12 | from .atr import ATR
13 | from .suim import SUIM
14 | from .helen import HELEN
15 | from .lapa import LaPa
16 | from .ibugmask import iBugMask
17 | from .celebamaskhq import CelebAMaskHQ
18 | from .facesynthetics import FaceSynthetics
19 | # #################################
20 | from .vaihingen import Vaihingen
21 | from .uavid2020 import UAVid2020
22 | from .isaid import ISAID
23 | from .udd6 import UDD6
24 | from .htht2022 import HTHT2022Coarse
25 | from .aeroscapes import Aeroscapes
26 | # #################################
27 | 
28 | 
29 | __all__ = [
30 |     'CamVid',
31 |     'CityScapes',
32 |     'ADE20K',
33 |     'MHPv1',
34 |     'MHPv2',
35 |     'LIP',
36 |     'CIHP',
37 |     'CCIHP',
38 |     'ATR',
39 |     'PASCALContext',
40 |     'COCOStuff',
41 |     'SUIM',
42 |     'SunRGBD',
43 |     'MapillaryVistas',
44 |     'HELEN',
45 |     'LaPa',
46 |     'iBugMask',
47 |     'CelebAMaskHQ',
48 |     'FaceSynthetics',
49 |     # #################
50 |     'Vaihingen',
51 |     'UAVid2020',
52 |     'ISAID',
53 |     'UDD6',
54 |     'HTHT2022Coarse',
55 |     'Aeroscapes',
56 |     # #################
57 | ]
58 | 


--------------------------------------------------------------------------------
/semseg/datasets/atr.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | 
 8 | 
 9 | class ATR(Dataset):
10 |     """Single Person Fashion Dataset
11 |     https://openaccess.thecvf.com/content_iccv_2015/papers/Liang_Human_Parsing_With_ICCV_2015_paper.pdf
12 | 
13 |     https://github.com/lemondan/HumanParsing-Dataset
14 |     num_classes: 17+background
15 |     16000 train images
16 |     700 val images
17 |     1000 test images with labels
18 |     """
19 |     CLASSES = ['background', 'hat', 'hair', 'sunglass', 'upper-clothes', 'skirt', 'pants', 'dress', 'belt', 'left-shoe', 'right-shoe', 'face', 'left-leg', 'right-leg', 'left-arm', 'right-arm', 'bag', 'scarf']
20 |     PALETTE = torch.tensor([[0, 0, 0], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0], [51, 85, 127], [0, 127, 0], [0, 0, 254], [50, 169, 220], [0, 254, 254], [84, 254, 169], [169, 254, 84]])
21 | 
22 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
23 |         super().__init__()
24 |         assert split in ['train', 'val', 'test']
25 |         self.transform = transform
26 |         self.n_classes = len(self.CLASSES)
27 |         self.ignore_label = 255
28 | 
29 |         img_path = Path(root) / 'humanparsing' / 'JPEGImages' 
30 |         self.files = list(img_path.glob('*.jpg'))
31 |         if split == 'train':
32 |             self.files = self.files[:16000]
33 |         elif split == 'val':
34 |             self.files = self.files[16000:16700]
35 |         else:
36 |             self.files = self.files[16700:17700]
37 |     
38 |         if not self.files:
39 |             raise Exception(f"No images found in {img_path}")
40 |         print(f"Found {len(self.files)} {split} images.")
41 | 
42 |     def __len__(self) -> int:
43 |         return len(self.files)
44 | 
45 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
46 |         img_path = str(self.files[index])
47 |         lbl_path = str(self.files[index]).replace('JPEGImages', 'SegmentationClassAug').replace('.jpg', '.png')
48 | 
49 |         image = io.read_image(img_path)
50 |         label = io.read_image(lbl_path)
51 | 
52 |         if self.transform:
53 |             image, label = self.transform(image, label)
54 |         return image, label.squeeze().long() 
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     from semseg.utils.visualize import visualize_dataset_sample
59 |     visualize_dataset_sample(ATR, '/home/sithu/datasets/LIP/ATR')


--------------------------------------------------------------------------------
/semseg/datasets/celebamaskhq.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | from torchvision import transforms as T
 8 | 
 9 | 
10 | class CelebAMaskHQ(Dataset):
11 |     CLASSES = [
12 |         'background', 'skin', 'nose', 'eye_g', 'l_eye', 'r_eye', 'l_brow', 'r_brow', 'l_ear', 
13 |         'r_ear', 'mouth', 'u_lip', 'l_lip', 'hair', 'hat', 'ear_r', 'neck_l', 'neck', 'cloth'
14 |     ]
15 |     PALETTE = torch.tensor([
16 |         [0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], 
17 |         [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0]
18 |     ])
19 | 
20 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
21 |         super().__init__()
22 |         assert split in ['train', 'val', 'test']
23 |         self.root = Path(root)
24 |         self.transform = transform
25 |         self.n_classes = len(self.CLASSES)
26 |         self.ignore_label = 255
27 |         self.resize = T.Resize((512, 512))
28 | 
29 |         with open(self.root / f'{split}_list.txt') as f:
30 |             self.files = f.read().splitlines()
31 | 
32 |         if not self.files: 
33 |             raise Exception(f"No images found in {root}")
34 |         print(f"Found {len(self.files)} {split} images.")
35 | 
36 |     def __len__(self) -> int:
37 |         return len(self.files)
38 | 
39 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
40 |         img_path = self.root / 'CelebA-HQ-img' / f"{self.files[index]}.jpg"
41 |         lbl_path = self.root / 'CelebAMask-HQ-label' / f"{self.files[index]}.png"
42 |         image = io.read_image(str(img_path))
43 |         image = self.resize(image)
44 |         label = io.read_image(str(lbl_path))
45 | 
46 |         if self.transform:
47 |             image, label = self.transform(image, label)
48 |         return image, label.squeeze().long()
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     from semseg.utils.visualize import visualize_dataset_sample
53 |     visualize_dataset_sample(CelebAMaskHQ, '/home/sithu/datasets/CelebAMask-HQ')


--------------------------------------------------------------------------------
/semseg/datasets/cihp.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | 
 8 | 
 9 | class CIHP(Dataset):
10 |     """This has Best Human Parsing Labels
11 |     num_classes: 19+background
12 |     28280 train images
13 |     5000 val images
14 |     """
15 |     CLASSES = ['background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', 'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', 'face', 'left-arm', 'right-arm', 'left-leg', 'right-leg', 'left-shoe', 'right-shoe']
16 |     PALETTE = torch.tensor([[120, 120, 120], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0], [51, 85, 127], [0, 127, 0], [0, 0, 254], [50, 169, 220], [0, 254, 254], [84, 254, 169], [169, 254, 84], [254, 254, 0], [254, 169, 0]])
17 | 
18 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
19 |         super().__init__()
20 |         assert split in ['train', 'val']
21 |         split = 'Training' if split == 'train' else 'Validation'
22 |         self.transform = transform
23 |         self.n_classes = len(self.CLASSES)
24 |         self.ignore_label = 255
25 | 
26 |         img_path = Path(root) / 'instance-level_human_parsing' / split / 'Images' 
27 |         self.files = list(img_path.glob('*.jpg'))
28 |     
29 |         if not self.files:
30 |             raise Exception(f"No images found in {img_path}")
31 |         print(f"Found {len(self.files)} {split} images.")
32 | 
33 |     def __len__(self) -> int:
34 |         return len(self.files)
35 | 
36 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
37 |         img_path = str(self.files[index])
38 |         lbl_path = str(self.files[index]).replace('Images', 'Category_ids').replace('.jpg', '.png')
39 | 
40 |         image = io.read_image(img_path)
41 |         label = io.read_image(lbl_path)
42 | 
43 |         if self.transform:
44 |             image, label = self.transform(image, label)
45 |         return image, label.squeeze().long() 
46 | 
47 | 
48 | class CCIHP(CIHP):
49 |     CLASSES = ['background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', 'facemask', 'coat', 'socks', 'pants', 'torso-skin', 'scarf', 'skirt', 'face', 'left-arm', 'right-arm', 'left-leg', 'right-leg', 'left-shoe', 'right-shoe', 'bag', 'others']
50 |     PALETTE = torch.tensor([[120, 120, 120], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0], [51, 85, 127], [0, 127, 0], [0, 0, 254], [50, 169, 220], [0, 254, 254], [84, 254, 169], [169, 254, 84], [254, 254, 0], [254, 169, 0], [102, 254, 0], [182, 255, 0]])
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     import sys
55 |     sys.path.insert(0, '.')
56 |     from semseg.utils.visualize import visualize_dataset_sample
57 |     visualize_dataset_sample(CCIHP, 'C:\\Users\\sithu\\Documents\\Datasets\\LIP\\CIHP')


--------------------------------------------------------------------------------
/semseg/datasets/facesynthetics.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | 
 8 | 
 9 | class FaceSynthetics(Dataset):
10 |     CLASSES = ['background', 'skin', 'nose', 'r-eye', 'l-eye', 'r-brow', 'l-brow', 'r-ear', 'l-ear', 'i-mouth', 't-lip', 'b-lip', 'neck', 'hair', 'beard', 'clothing', 'glasses', 'headwear', 'facewear']
11 |     PALETTE = torch.tensor([
12 |         [0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], 
13 |         [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0]
14 |     ])
15 | 
16 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
17 |         super().__init__()
18 |         assert split in ['train', 'val', 'test']
19 |         if split == 'train':
20 |             split = 'dataset_100000'
21 |         elif split == 'val':
22 |             split = 'dataset_1000'
23 |         else:
24 |             split = 'dataset_100'
25 |         
26 |         self.transform = transform
27 |         self.n_classes = len(self.CLASSES)
28 |         self.ignore_label = 255
29 |         
30 |         img_path = Path(root) / split
31 |         images = img_path.glob('*.png')
32 |         self.files = [path for path in images if '_seg' not in path.name]
33 | 
34 |         if not self.files: raise Exception(f"No images found in {root}")
35 |         print(f"Found {len(self.files)} {split} images.")
36 | 
37 |     def __len__(self) -> int:
38 |         return len(self.files)
39 | 
40 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
41 |         img_path = str(self.files[index])
42 |         lbl_path = str(self.files[index]).replace('.png', '_seg.png')
43 |         image = io.read_image(str(img_path))
44 |         label = io.read_image(str(lbl_path))
45 | 
46 |         if self.transform:
47 |             image, label = self.transform(image, label)
48 |         return image, label.squeeze().long()
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     import sys
53 |     sys.path.insert(0, '.')
54 |     from semseg.utils.visualize import visualize_dataset_sample
55 |     visualize_dataset_sample(FaceSynthetics, 'C:\\Users\\sithu\\Documents\\Datasets')


--------------------------------------------------------------------------------
/semseg/datasets/helen.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | 
 8 | 
 9 | class HELEN(Dataset):
10 |     CLASSES = ['background', 'skin', 'l-brow', 'r-brow', 'l-eye', 'r-eye', 'nose', 'u-lip', 'i-mouth', 'l-lip', 'hair']
11 |     PALETTE = torch.tensor([[0, 0 ,0], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0]])
12 | 
13 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
14 |         super().__init__()
15 |         assert split in ['train', 'val', 'test']
16 |         self.transform = transform
17 |         self.n_classes = len(self.CLASSES)
18 |         self.ignore_label = 255
19 | 
20 |         self.files = self.get_files(root, split)
21 |         if not self.files: raise Exception(f"No images found in {root}")
22 |         print(f"Found {len(self.files)} {split} images.")
23 | 
24 |     def get_files(self, root: str, split: str):
25 |         root = Path(root)
26 |         if split == 'train':
27 |             split = 'exemplars'
28 |         elif split == 'val':
29 |             split = 'tuning'
30 |         else:
31 |             split = 'testing'
32 |         with open(root / f'{split}.txt') as f:
33 |             lines = f.read().splitlines()
34 |         
35 |         split_names = [line.split(',')[-1].strip() for line in lines if line != '']
36 |         files = (root / 'images').glob("*.jpg")
37 |         files = list(filter(lambda x: x.stem in split_names, files))
38 |         return files
39 | 
40 |     def __len__(self) -> int:
41 |         return len(self.files)
42 | 
43 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
44 |         img_path = str(self.files[index])
45 |         lbl_path = str(self.files[index]).split('.')[0].replace('images', 'labels')
46 |         image = io.read_image(img_path)
47 |         label = self.encode(lbl_path)
48 | 
49 |         if self.transform:
50 |             image, label = self.transform(image, label)
51 |         return image, label.squeeze().long()
52 | 
53 |     def encode(self, label_path: str) -> Tensor:
54 |         mask_paths = sorted(list(Path(label_path).glob('*.png')))
55 |         for i, mask_path in enumerate(mask_paths):
56 |             mask = io.read_image(str(mask_path)).squeeze()
57 |             if i == 0:
58 |                 label = torch.zeros(self.n_classes, *mask.shape)
59 |             label[i, ...] = mask
60 |         label = label.argmax(dim=0).unsqueeze(0)
61 |         return label 
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     from semseg.utils.visualize import visualize_dataset_sample
66 |     visualize_dataset_sample(HELEN, '/home/sithu/datasets/SmithCVPR2013_dataset_resized')


--------------------------------------------------------------------------------
/semseg/datasets/ibugmask.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | 
 8 | 
 9 | class iBugMask(Dataset):
10 |     CLASSES = ['background', 'skin', 'l-brow', 'r-brow', 'l-eye', 'r-eye', 'nose', 'u-lip', 'i-mouth', 'l-lip', 'hair']
11 |     PALETTE = torch.tensor([[0, 0, 0], [255, 255, 0], [139, 76, 57], [139, 54, 38], [0, 205, 0], [0, 138, 0], [154, 50, 205], [72, 118, 255], [255, 165, 0], [0, 0, 139], [255, 0, 0]])
12 | 
13 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
14 |         super().__init__()
15 |         assert split in ['train', 'val', 'test']
16 |         split = 'train' if split == 'train' else 'test'
17 |         self.transform = transform
18 |         self.n_classes = len(self.CLASSES)
19 |         self.ignore_label = 255
20 |         
21 |         img_path = Path(root) / split 
22 |         self.files = list(img_path.glob('*.jpg'))
23 | 
24 |         if not self.files: raise Exception(f"No images found in {root}")
25 |         print(f"Found {len(self.files)} {split} images.")
26 | 
27 |     def __len__(self) -> int:
28 |         return len(self.files)
29 | 
30 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
31 |         img_path = str(self.files[index])
32 |         lbl_path = str(self.files[index]).replace('.jpg', '.png')
33 |         image = io.read_image(str(img_path))
34 |         label = io.read_image(str(lbl_path))
35 | 
36 |         if self.transform:
37 |             image, label = self.transform(image, label)
38 |         return image, label.squeeze().long()
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     from semseg.utils.visualize import visualize_dataset_sample
43 |     visualize_dataset_sample(iBugMask, '/home/sithu/datasets/ibugmask_release')


--------------------------------------------------------------------------------
/semseg/datasets/isaid.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | 
 4 | import cv2
 5 | import torch
 6 | import logging
 7 | from torch import Tensor
 8 | from torch.utils.data import Dataset
 9 | from torchvision import io
10 | from pathlib import Path
11 | from typing import Tuple
12 | from glob import glob
13 | 
14 | 
15 | class ISAID(Dataset):
16 |     """
17 |     num_classes: 16, ignore index is 255 (impervious_surface).
18 |     """
19 |     CLASSES = ['background', 'ship', 'store_tank', 'baseball_diamond', 'tennis_court',
20 |                'basketball_court', 'Ground_Track_Field', 'Bridge', 'Large_Vehicle', 'Small_Vehicle',
21 |                'Helicopter', 'Swimming_pool', 'Roundabout', 'Soccer_ball_field', 'plane',
22 |                'Harbor']
23 | 
24 |     PALETTE = torch.tensor([[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127],
25 |                             [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, 127], [0, 0, 127],
26 |                             [0, 0, 191], [0, 0, 255], [0, 191, 127], [0, 127, 191], [0, 127, 255],
27 |                             [0, 100, 155]])
28 | 
29 |     SMALL_OBJECT = [1, 2, 3, 7, 8, 9, 10, 11, 12, 14, 15]
30 | 
31 |     def __init__(self, root: str, split: str = 'train', transform=None, preload=False) -> None:
32 |         super().__init__()
33 |         assert split in ['train', 'val']
34 |         self.split = split
35 |         self.transform = transform
36 |         self.n_classes = len(self.CLASSES)
37 |         self.ignore_label = 255
38 |         self.preload = preload
39 |         self.pairs = []
40 | 
41 |         imgs = glob(osp.join(root, 'img_dir', self.split) + '/*.png')
42 |         imgs.sort()
43 |         for img_path in imgs:
44 |             lbl_path = img_path.replace('img_dir', 'ann_dir').replace('.png', '_instance_color_RGB.png')
45 |             data_pair = [
46 |                 io.read_image(img_path) if self.preload else img_path,
47 |                 io.read_image(lbl_path) if self.preload else lbl_path,
48 |             ]
49 |             self.pairs.append(data_pair)
50 | 
51 |         assert len(self.pairs) > 0, f"No images found in {root}"
52 |         logging.info(f"Found {len(self.pairs)} {split} images.")
53 | 
54 |     def __len__(self) -> int:
55 |         return len(self.pairs)
56 | 
57 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
58 |         image, label = self.pairs[index]
59 |         if not self.preload:
60 |             image = io.read_image(image)
61 |             label = io.read_image(label)
62 | 
63 |         if self.transform:
64 |             image, label = self.transform(image, label)
65 |         return image, torch.squeeze(label.long())
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     # from semseg.utils.visualize import visualize_dataset_sample
70 |     # visualize_dataset_sample(ISAID, '../../data/iSAID2')
71 | 
72 | 
73 |     from torch.utils.data import DataLoader
74 |     import numpy as np
75 | 
76 | 
77 |     train_dataset = ISAID('../../data/iSAID2', split='train')
78 |     val_dataset = ISAID('../../data/iSAID2', split='val')
79 |     print(f'train size={len(train_dataset)}, val size={len(val_dataset)}')
80 | 
81 |     train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=False)
82 |     for _img, _lbl in train_dataloader:
83 |         print(_img.cpu().numpy().shape, _lbl.cpu().numpy().shape, np.unique(_lbl.cpu().numpy()))
84 |         cc = _lbl.cpu().numpy().squeeze().astype(np.uint8)
85 |         cv2.imshow('img', _img.cpu().numpy().squeeze().transpose((1,2,0)))
86 |         # cv2.imshow('lbl', np.array([cc, cc, cc]).transpose((1, 2, 0)).astype(np.uint8))
87 |         cv2.imshow('lbl', cc)
88 |         cv2.waitKey(0)
89 | 


--------------------------------------------------------------------------------
/semseg/datasets/lapa.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | 
 8 | 
 9 | class LaPa(Dataset):
10 |     CLASSES = ['background', 'skin', 'l-brow', 'r-brow', 'l-eye', 'r-eye', 'nose', 'u-lip', 'i-mouth', 'l-lip', 'hair']
11 |     PALETTE = torch.tensor([[0, 0, 0], [0, 153, 255], [102, 255, 153], [0, 204, 153], [255, 255, 102], [255, 255, 204], [255, 153, 0], [255, 102, 255], [102, 0, 51], [255, 204, 255], [255, 0, 102]])
12 | 
13 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
14 |         super().__init__()
15 |         assert split in ['train', 'val', 'test']
16 |         self.transform = transform
17 |         self.n_classes = len(self.CLASSES)
18 |         self.ignore_label = 255
19 |         
20 |         img_path = Path(root) / split / 'images'
21 |         self.files = list(img_path.glob('*.jpg'))
22 | 
23 |         if not self.files: raise Exception(f"No images found in {root}")
24 |         print(f"Found {len(self.files)} {split} images.")
25 | 
26 |     def __len__(self) -> int:
27 |         return len(self.files)
28 | 
29 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
30 |         img_path = str(self.files[index])
31 |         lbl_path = str(self.files[index]).replace('images', 'labels').replace('.jpg', '.png')
32 |         image = io.read_image(str(img_path))
33 |         label = io.read_image(str(lbl_path))
34 | 
35 |         if self.transform:
36 |             image, label = self.transform(image, label)
37 |         return image, label.squeeze().long()
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     from semseg.utils.visualize import visualize_dataset_sample
42 |     visualize_dataset_sample(LaPa, '/home/sithu/datasets/LaPa')


--------------------------------------------------------------------------------
/semseg/datasets/lip.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | 
 8 | 
 9 | class LIP(Dataset):
10 |     """
11 |     num_classes: 19+background
12 |     30462 train images
13 |     10000 val images
14 |     """
15 |     CLASSES = ['background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', 'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', 'face', 'left-arm', 'right-arm', 'left-leg', 'right-leg', 'left-shoe', 'right-shoe']
16 |     PALETTE = torch.tensor([[0, 0, 0], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0], [51, 85, 127], [0, 127, 0], [0, 0, 254], [50, 169, 220], [0, 254, 254], [84, 254, 169], [169, 254, 84], [254, 254, 0], [254, 169, 0]])
17 | 
18 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
19 |         super().__init__()
20 |         assert split in ['train', 'val']
21 |         self.split = split
22 |         self.transform = transform
23 |         self.n_classes = len(self.CLASSES)
24 |         self.ignore_label = 255
25 | 
26 |         img_path = Path(root) / 'TrainVal_images' / f'{split}_images' 
27 |         self.files = list(img_path.glob('*.jpg'))
28 |     
29 |         if not self.files:
30 |             raise Exception(f"No images found in {img_path}")
31 |         print(f"Found {len(self.files)} {split} images.")
32 | 
33 |     def __len__(self) -> int:
34 |         return len(self.files)
35 | 
36 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
37 |         img_path = str(self.files[index])
38 |         lbl_path = str(self.files[index]).replace('TrainVal_images', 'TrainVal_parsing_annotations').replace(f'{self.split}_images', f'{self.split}_segmentations').replace('.jpg', '.png')
39 | 
40 |         image = io.read_image(img_path)
41 |         label = io.read_image(lbl_path)
42 | 
43 |         if self.transform:
44 |             image, label = self.transform(image, label)
45 |         return image, label.squeeze().long() 
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     from semseg.utils.visualize import visualize_dataset_sample
50 |     visualize_dataset_sample(LIP, '/home/sithu/datasets/LIP/LIP')


--------------------------------------------------------------------------------
/semseg/datasets/mapillary.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | 
 8 | 
 9 | class MapillaryVistas(Dataset):
10 |     CLASSES = [
11 |         'Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier', 'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', 'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk', 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation', 'Water', 'Banner',
12 |         'Bench', 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera', 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole', 'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light', 'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', 'Wheeled Slow', 'Car Mount', 'Ego Vehicle'
13 |     ]
14 |     PALETTE = torch.tensor([
15 |         [165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], [180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255], [140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96], [230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232], [150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60], [255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128], [255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180], [190, 255, 255], [152, 251, 152], [107, 142, 35], 
16 |         [0, 170, 30], [255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220], [220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40], [33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150], [210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80], [250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20], [119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142], [0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10]
17 |     ])
18 |     
19 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
20 |         super().__init__()
21 |         assert split in ['train', 'val']
22 |         split = 'training' if split == 'train' else 'validation'
23 |         self.transform = transform
24 |         self.n_classes = len(self.CLASSES)
25 |         self.ignore_label = 65  
26 | 
27 |         img_path = Path(root) / split / 'images'
28 |         self.files = list(img_path.glob("*.jpg"))
29 |     
30 |         if not self.files:
31 |             raise Exception(f"No images found in {img_path}")
32 |         print(f"Found {len(self.files)} {split} images.")
33 | 
34 |     def __len__(self) -> int:
35 |         return len(self.files)
36 |     
37 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
38 |         img_path = str(self.files[index])
39 |         lbl_path = str(self.files[index]).replace('images', 'labels').replace('.jpg', '.png')
40 | 
41 |         image = io.read_image(img_path, io.ImageReadMode.RGB)
42 |         label = io.read_image(lbl_path)
43 | 
44 |         if self.transform:
45 |             image, label = self.transform(image, label)
46 |         return image, label.squeeze().long()
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     from semseg.utils.visualize import visualize_dataset_sample
51 |     visualize_dataset_sample(MapillaryVistas, '/home/sithu/datasets/Mapillary')


--------------------------------------------------------------------------------
/semseg/datasets/suim.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | from torch import Tensor
 3 | from torch.utils.data import Dataset
 4 | from torchvision import io
 5 | from pathlib import Path
 6 | from typing import Tuple
 7 | from PIL import Image
 8 | from torchvision.transforms import functional as TF
 9 | 
10 | 
11 | class SUIM(Dataset):
12 |     CLASSES = ['water', 'human divers', 'aquatic plants and sea-grass', 'wrecks and ruins', 'robots (AUVs/ROVs/instruments)', 'reefs and invertebrates', 'fish and vertebrates', 'sea-floor and rocks']
13 |     PALETTE = torch.tensor([[0, 0, 0], [0, 0, 255], [0, 255, 0], [0, 255, 255], [255, 0, 0], [255, 0, 255], [255, 255, 0], [255, 255, 255]])
14 |     
15 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
16 |         super().__init__()
17 |         assert split in ['train', 'val']
18 |         self.split = 'train_val' if split == 'train' else 'TEST'
19 |         self.transform = transform
20 |         self.n_classes = len(self.CLASSES)
21 |         self.ignore_label = 255    
22 | 
23 |         img_path = Path(root) / self.split / 'images'
24 |         self.files = list(img_path.glob("*.jpg"))
25 |     
26 |         if not self.files:
27 |             raise Exception(f"No images found in {img_path}")
28 |         print(f"Found {len(self.files)} {split} images.")
29 | 
30 |     def __len__(self) -> int:
31 |         return len(self.files)
32 |     
33 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
34 |         img_path = str(self.files[index])
35 |         lbl_path = str(self.files[index]).replace('images', 'masks').replace('.jpg', '.bmp')
36 | 
37 |         image = io.read_image(img_path)
38 |         label = TF.pil_to_tensor(Image.open(lbl_path).convert('RGB'))
39 | 
40 |         if self.transform:
41 |             image, label = self.transform(image, label)
42 |         return image, self.encode(label).long()
43 | 
44 |     def encode(self, label: Tensor) -> Tensor:
45 |         label = label.permute(1, 2, 0)
46 |         mask = torch.zeros(label.shape[:-1])
47 | 
48 |         for index, color in enumerate(self.PALETTE):
49 |             bool_mask = torch.eq(label, color)
50 |             class_map = torch.all(bool_mask, dim=-1)
51 |             mask[class_map] = index
52 |         return mask
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     from semseg.utils.visualize import visualize_dataset_sample
57 |     visualize_dataset_sample(SUIM, '/home/sithu/datasets/SUIM')


--------------------------------------------------------------------------------
/semseg/datasets/sunrgbd.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import numpy as np
 3 | from torch import Tensor
 4 | from torch.utils.data import Dataset
 5 | from torchvision import io
 6 | from scipy import io as sio
 7 | from pathlib import Path
 8 | from typing import Tuple
 9 | 
10 | 
11 | class SunRGBD(Dataset):
12 |     """
13 |     num_classes: 37
14 |     """
15 |     CLASSES = [
16 |         'wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture', 'counter', 'blinds', 'desk', 'shelves', 'curtain', 'dresser', 'pillow', 'mirror', 
17 |         'floor mat', 'clothes', 'ceiling', 'books', 'fridge', 'tv', 'paper', 'towel', 'shower curtain', 'box', 'whiteboard', 'person', 'night stand', 'toilet', 'sink', 'lamp', 'bathtub', 'bag'
18 |     ]
19 | 
20 |     PALETTE = torch.tensor([
21 |         (119, 119, 119), (244, 243, 131), (137, 28, 157), (150, 255, 255), (54, 114, 113), (0, 0, 176), (255, 69, 0), (87, 112, 255), (0, 163, 33),
22 |         (255, 150, 255), (255, 180, 10), (101, 70, 86), (38, 230, 0), (255, 120, 70), (117, 41, 121), (150, 255, 0), (132, 0, 255), (24, 209, 255),
23 |         (191, 130, 35), (219, 200, 109), (154, 62, 86), (255, 190, 190), (255, 0, 255), (152, 163, 55), (192, 79, 212), (230, 230, 230), (53, 130, 64),
24 |         (155, 249, 152), (87, 64, 34), (214, 209, 175), (170, 0, 59), (255, 0, 0), (193, 195, 234), (70, 72, 115), (255, 255, 0), (52, 57, 131), (12, 83, 45)
25 |     ])
26 | 
27 |     def __init__(self, root: str, split: str = 'train', transform = None) -> None:
28 |         super().__init__()
29 |         assert split in ['alltrain', 'train', 'val', 'test']
30 |         self.transform = transform
31 |         self.n_classes = len(self.CLASSES)
32 |         self.ignore_label = -1
33 |         self.files, self.labels = self.get_data(root, split)
34 |         print(f"Found {len(self.files)} {split} images.")
35 | 
36 |     def get_data(self, root: str, split: str):
37 |         root = Path(root)
38 |         files, labels = [], []
39 |         split_path = root / 'SUNRGBDtoolbox' / 'traintestSUNRGBD' / 'allsplit.mat'
40 |         split_mat = sio.loadmat(split_path, squeeze_me=True, struct_as_record=False)
41 |         if split == 'train':
42 |             file_lists = split_mat['trainvalsplit'].train
43 |         elif split == 'val':
44 |             file_lists = split_mat['trainvalsplit'].val
45 |         elif split == 'test':
46 |             file_lists = split_mat['alltest']
47 |         else:
48 |             file_lists = split_mat['alltrain']
49 |         
50 |         for fl in file_lists:
51 |             real_fl = root / fl.split('/n/fs/sun3d/data/')[-1]
52 |             files.append(str(list((real_fl / 'image').glob('*.jpg'))[0]))
53 |             labels.append(real_fl / 'seg.mat')
54 | 
55 |         assert len(files) == len(labels)
56 |         return files, labels
57 | 
58 |     def __len__(self) -> int:
59 |         return len(self.files)
60 | 
61 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
62 |         image = io.read_image(self.files[index], io.ImageReadMode.RGB)
63 |         label = sio.loadmat(self.labels[index], squeeze_me=True, struct_as_record=False)['seglabel']
64 |         label = torch.from_numpy(label.astype(np.uint8)).unsqueeze(0)
65 |         
66 |         if self.transform:
67 |             image, label = self.transform(image, label)
68 |         return image, self.encode(label.squeeze()).long() - 1    # subtract -1 to remove void class
69 | 
70 |     def encode(self, label: Tensor) -> Tensor:
71 |         label[label > self.n_classes] = 0
72 |         return label
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     from semseg.utils.visualize import visualize_dataset_sample
77 |     visualize_dataset_sample(SunRGBD, '/home/sithu/datasets/sunrgbd')


--------------------------------------------------------------------------------
/semseg/datasets/uavid2020.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @Project : semantic-segmentation 
 3 | @File    : uavid2020.py
 4 | @IDE     : PyCharm 
 5 | @Author  : Wang Liu
 6 | @Date    : 2022/4/30 下午8:02
 7 | @e-mail  : 1183862787@qq.com
 8 | """
 9 | import os
10 | import os.path as osp
11 | import torch
12 | import logging
13 | from torch import Tensor
14 | from torch.utils.data import Dataset
15 | from torchvision import io
16 | from pathlib import Path
17 | from typing import Tuple
18 | from glob import glob
19 | 
20 | 
21 | class UAVid2020(Dataset):
22 |     """UAVid2020 dataset.
23 | 
24 |     In segmentation map annotation for UAVid2020, 0 stands for background, which is
25 |     included in 8 categories. ``reduce_zero_label`` is fixed to False. The
26 |     ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to '.png', too.
27 |     In UAVid2020, 200 images for training, 70 images for validating, and 150 images for testing.
28 |     The 8 classes and corresponding label color (R,G,B) are as follows:
29 |         'label name'        'R,G,B'         'label id'
30 |         Background clutter  (0,0,0)         0
31 |         Building            (128,0,0)       1
32 |         Road                (128,64,128)    2
33 |         Static car          (192,0,192)     3
34 |         Tree                (0,128,0)       4
35 |         Low vegetation      (128,128,0)     5
36 |         Human               (64,64,0)       6
37 |         Moving car          (64,0,128)      7
38 | 
39 |     """
40 | 
41 |     CLASSES = ('Background clutter', 'Building', 'Road', 'Static car',
42 |                'Tree', 'Low vegetation', 'Human', 'Moving car')
43 | 
44 |     PALETTE = torch.tensor([[0, 0, 0], [128, 0, 0], [128, 64, 128], [192, 0, 192],
45 |                             [0, 128, 0], [128, 128, 0], [64, 64, 0], [64, 0, 128]])
46 | 
47 |     SMALL_OBJECT = [3, 6, 7]
48 | 
49 |     def __init__(self, root: str, split: str = 'train', transform=None, preload=False, **kwargs) -> None:
50 |         super().__init__()
51 |         assert split in ['train', 'val', 'test']
52 |         # assert split in ['train', 'val', 'test']
53 |         self.split = split
54 |         self.transform = transform
55 |         self.n_classes = len(self.CLASSES)
56 |         self.ignore_label = 255
57 |         self.preload = preload
58 |         self.pairs = []
59 | 
60 |         imgs = glob(osp.join(root, 'img_dir', self.split) + '/*.png')
61 |         for img_path in imgs:
62 |             lbl_path = img_path.replace('img_dir', 'ann_dir')
63 |             data_pair = [
64 |                 io.read_image(img_path) if self.preload else img_path,
65 |                 io.read_image(lbl_path)[-1:] if self.preload else lbl_path,
66 |             ]
67 |             self.pairs.append(data_pair)
68 | 
69 |         assert len(self.pairs) > 0, f"No images found in {root}"
70 |         logging.info(f"Found {len(self.pairs)} {split} images.")
71 | 
72 |     def __len__(self) -> int:
73 |         return len(self.pairs)
74 | 
75 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
76 |         image, label = self.pairs[index]
77 |         if not self.preload:
78 |             image = io.read_image(image)
79 |             label = io.read_image(label)[-1:]
80 | 
81 |         if self.transform:
82 |             image, label = self.transform(image, label)
83 |         return image, torch.squeeze(label.long())
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     _dataset = UAVid2020('../../data/UAVid2020_mm', 'train', preload=False)
88 |     for _i, _l in _dataset:
89 |         print(_i.size(), _l.size())
90 | 


--------------------------------------------------------------------------------
/semseg/datasets/udd6.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @Project : semantic-segmentation 
 3 | @File    : uavid2020.py
 4 | @IDE     : PyCharm 
 5 | @Author  : Wang Liu
 6 | @Date    : 2022/4/30 下午8:02
 7 | @e-mail  : 1183862787@qq.com
 8 | """
 9 | import os
10 | import os.path as osp
11 | import torch
12 | import logging
13 | from torch import Tensor
14 | from torch.utils.data import Dataset
15 | from torchvision import io
16 | from pathlib import Path
17 | from typing import Tuple
18 | from glob import glob
19 | 
20 | 
21 | class UDD6(Dataset):
22 |     """UDD6 dataset.
23 | 
24 |         'label name'        'R,G,B'         'label id'
25 |         Other               (0,0,0)          0
26 |         Facade              (102,102,156)    1
27 |         Road                (128,64,128)     2
28 |         Vegetation	        (107,142,35)     3
29 |         Vehicle             (0,0,142)        4
30 |         Roof                (70,70,70)       5
31 | 
32 |     """
33 | 
34 |     CLASSES = ('Other', 'Facade', 'Road', 'Vegetation', 'Vehicle', 'Roof')
35 | 
36 |     PALETTE = torch.tensor([[0, 0, 0], [102, 102, 156], [128, 64, 128], [107, 142, 35],
37 |                             [0, 0, 142], [70, 70, 70]])
38 | 
39 |     SMALL_OBJECT = [4]
40 | 
41 |     def __init__(self, root: str, split: str = 'train', transform=None, preload=False) -> None:
42 |         super().__init__()
43 |         assert split in ['train', 'val']
44 |         # assert split in ['train', 'val', 'test']
45 |         self.split = split
46 |         self.transform = transform
47 |         self.n_classes = len(self.CLASSES)
48 |         self.ignore_label = 255
49 |         self.preload = preload
50 |         self.pairs = []
51 |         # r=osp.join(root, 'img_dir', self.split) + '/*.png'
52 |         imgs = glob(osp.join(root, 'img_dir', self.split) + '/*.png')
53 |         for img_path in imgs:
54 |             lbl_path = img_path.replace('img_dir', 'ann_dir')
55 |             data_pair = [
56 |                 io.read_image(img_path) if self.preload else img_path,
57 |                 io.read_image(lbl_path)[-1:] if self.preload else lbl_path,
58 |             ]
59 |             self.pairs.append(data_pair)
60 | 
61 |         assert len(self.pairs) > 0, f"No images found in {root}"
62 |         logging.info(f"Found {len(self.pairs)} {split} images.")
63 | 
64 |     def __len__(self) -> int:
65 |         return len(self.pairs)
66 | 
67 |     def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]:
68 |         image, label = self.pairs[index]
69 |         if not self.preload:
70 |             image = io.read_image(image)
71 |             label = io.read_image(label)[-1:]
72 | 
73 |         if self.transform:
74 |             image, label = self.transform(image, label)
75 |         return image, torch.squeeze(label.long())
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     _dataset = UDD6('../../data/UDD6', 'train', preload=False)
80 |     for _i, _l in _dataset:
81 |         break
82 | 


--------------------------------------------------------------------------------
/semseg/metrics.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor
 3 | from typing import Tuple
 4 | 
 5 | 
 6 | class Metrics:
 7 | 
 8 |     def __init__(self, num_classes: int, ignore_label: int, device) -> None:
 9 |         self.ignore_label = ignore_label
10 |         self.num_classes = num_classes
11 |         self.hist = torch.zeros(num_classes, num_classes).cuda()
12 | 
13 |     def update(self, pred: Tensor, target: Tensor) -> None:
14 |         pred = pred.argmax(dim=1)
15 |         keep = target != self.ignore_label
16 |         self.hist += torch.bincount(target[keep] * self.num_classes + pred[keep],
17 |                                     minlength=self.num_classes ** 2).view(self.num_classes, self.num_classes)
18 | 
19 |     def compute_iou(self) -> Tuple[Tensor, Tensor]:
20 |         ious = self.hist.diag() / (self.hist.sum(0) + self.hist.sum(1) - self.hist.diag())
21 |         if self.ignore_label < self.num_classes:
22 |             ious[self.ignore_label] = torch.tensor(float('nan'))
23 |         miou = ious[~ious.isnan()].mean().item()
24 |         ious *= 100
25 |         miou *= 100
26 |         return ious.cpu().numpy().round(2).tolist(), round(miou, 2)
27 | 
28 |     def compute_f1(self) -> Tuple[Tensor, Tensor]:
29 |         f1 = 2 * self.hist.diag() / (self.hist.sum(0) + self.hist.sum(1))
30 |         if self.ignore_label < self.num_classes:
31 |             f1[self.ignore_label] = torch.tensor(float('nan'))
32 |         mf1 = f1[~f1.isnan()].mean().item()
33 |         f1 *= 100
34 |         mf1 *= 100
35 |         return f1.cpu().numpy().round(2).tolist(), round(mf1, 2)
36 | 
37 |     def compute_pixel_acc(self) -> Tuple[Tensor, Tensor]:
38 |         acc = self.hist.diag() / self.hist.sum(1)
39 |         if self.ignore_label < self.num_classes:
40 |             acc[self.ignore_label] = torch.tensor(float('nan'))
41 |         macc = acc[~acc.isnan()].mean().item()
42 |         acc *= 100
43 |         macc *= 100
44 |         return acc.cpu().numpy().round(2).tolist(), round(macc, 2)
45 | 
46 |     def compute_oa(self):
47 |         oa = self.hist.diag().sum() / self.hist.sum()
48 |         return oa.item()
49 | 


--------------------------------------------------------------------------------
/semseg/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .segformer import SegFormer
 2 | # from .ddrnet import DDRNet
 3 | from .ddrnet_official import DDRNet
 4 | from .fchardnet import FCHarDNet
 5 | from .sfnet import SFNet
 6 | from .bisenetv1 import BiSeNetv1
 7 | from .bisenetv2 import BiSeNetv2
 8 | from .lawin import Lawin
 9 | 
10 | # added models
11 | from .deeplabv3plus import DeeplabV3Plus
12 | from .pspnet import PSPNet
13 | from .upernet import UperNet
14 | # from .sosnet_ablation import SOSNetBaseline, SOSNetSB, SOSNetDFEMABL
15 | from .fast_scnn import FastSCNN
16 | from .ccnet import CCNet
17 | from .topformer import TopFormer
18 | from .pidnet import PIDNet
19 | 
20 | 
21 | __all__ = [
22 |     'SegFormer',
23 |     'Lawin',
24 |     'SFNet',
25 |     'BiSeNetv1',
26 |     'TopFormer',
27 |     'PSPNet',
28 |     'DeeplabV3Plus',
29 |     'UperNet',
30 |     'CCNet',
31 |     # Standalone Models
32 |     'FastSCNN',
33 |     'DDRNet',
34 |     'FCHarDNet',
35 |     'BiSeNetv2',
36 |     'PIDNet',
37 | ]
38 | 


--------------------------------------------------------------------------------
/semseg/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .resnet import ResNet, resnet_settings
 2 | from .resnetd import ResNetD, resnetd_settings
 3 | from .micronet import MicroNet, micronet_settings
 4 | from .mobilenetv2 import MobileNetV2, mobilenetv2_settings
 5 | from .mobilenetv3_ import MobileNetV3, mobilenetv3_settings
 6 | 
 7 | from .mit import MiT, mit_settings
 8 | from .pvt import PVTv2, pvtv2_settings
 9 | from .rest import ResT, rest_settings
10 | from .poolformer import PoolFormer, poolformer_settings
11 | from .convnext import ConvNeXt, convnext_settings
12 | from .topformer import TokenPyramidTransformer, topformer_cfgs
13 | 
14 | 
15 | __all__ = [
16 |     'ResNet', 
17 |     'ResNetD', 
18 |     'MicroNet',
19 |     'MobileNetV2',
20 |     'MobileNetV3',
21 |     
22 |     'MiT', 
23 |     'PVTv2',
24 |     'ResT',
25 |     'PoolFormer',
26 |     'ConvNeXt',
27 |     'TokenPyramidTransformer',
28 | ]


--------------------------------------------------------------------------------
/semseg/models/backbones/mobilenetv2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | 
 4 | 
 5 | class ConvModule(nn.Sequential):
 6 |     def __init__(self, c1, c2, k, s=1, p=0, d=1, g=1):
 7 |         super().__init__(
 8 |             nn.Conv2d(c1, c2, k, s, p, d, g, bias=False),
 9 |             nn.BatchNorm2d(c2),
10 |             nn.ReLU6(True)
11 |         )
12 | 
13 | 
14 | class InvertedResidual(nn.Module):
15 |     def __init__(self, c1, c2, s, expand_ratio):
16 |         super().__init__()
17 |         ch = int(round(c1 * expand_ratio))
18 |         self.use_res_connect = s == 1 and c1 == c2
19 | 
20 |         layers = []
21 | 
22 |         if expand_ratio != 1:
23 |             layers.append(ConvModule(c1, ch, 1))
24 | 
25 |         layers.extend([
26 |             ConvModule(ch, ch, 3, s, 1, g=ch),
27 |             nn.Conv2d(ch, c2, 1, bias=False),
28 |             nn.BatchNorm2d(c2)
29 |         ])
30 | 
31 |         self.conv = nn.Sequential(*layers)
32 |         
33 |     def forward(self, x: Tensor) -> Tensor:
34 |         if self.use_res_connect:
35 |             return x + self.conv(x)
36 |         else:
37 |             return self.conv(x)
38 | 
39 | 
40 | mobilenetv2_settings = {
41 |     '1.0': []
42 | }
43 | 
44 | 
45 | class MobileNetV2(nn.Module):
46 |     def __init__(self, variant: str = None):
47 |         super().__init__()
48 |         self.out_indices = [3, 6, 13, 17]
49 |         self.channels = [24, 32, 96, 320]
50 |         input_channel = 32
51 |         
52 |         inverted_residual_setting = [
53 |             # t, c, n, s
54 |             [1, 16, 1, 1],
55 |             [6, 24, 2, 2],
56 |             [6, 32, 3, 2],
57 |             [6, 64, 4, 2],
58 |             [6, 96, 3, 1],
59 |             [6, 160, 3, 2],
60 |             [6, 320, 1, 1],
61 |         ]
62 | 
63 |         self.features = nn.ModuleList([ConvModule(3, input_channel, 3, 2, 1)])
64 | 
65 |         for t, c, n, s in inverted_residual_setting:
66 |             output_channel = c
67 |             for i in range(n):
68 |                 stride = s if i == 0 else 1
69 |                 self.features.append(InvertedResidual(input_channel, output_channel, stride, t))
70 |                 input_channel = output_channel
71 |     
72 |     def forward(self, x: Tensor) -> Tensor:
73 |         outs = []
74 |         for i, m in enumerate(self.features):
75 |             x = m(x)
76 |             if i in self.out_indices:
77 |                 outs.append(x)
78 |         return outs
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     model = MobileNetV2()
83 |     # model.load_state_dict(torch.load('checkpoints/backbones/mobilenet_v2.pth', map_location='cpu'), strict=False)
84 |     model.eval()
85 |     x = torch.randn(1, 3, 224, 224)
86 |     outs = model(x)
87 |     for y in outs:
88 |         print(y.shape)
89 | 
90 |     from fvcore.nn import flop_count_table, FlopCountAnalysis
91 |     flops = FlopCountAnalysis(model, x)
92 |     print(flops.total() / 1e6)
93 |     # print(flop_count_table(flops))


--------------------------------------------------------------------------------
/semseg/models/base.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import math
 3 | from torch import nn
 4 | from semseg.models.backbones import *
 5 | from semseg.models.layers import trunc_normal_
 6 | 
 7 | 
 8 | class BaseModel(nn.Module):
 9 |     def __init__(self, backbone: str = 'MiT-B0', num_classes: int = 19) -> None:
10 |         super().__init__()
11 |         self.backbone = None
12 |         self.num_classes = num_classes
13 |         if backbone is not None and backbone != 'None':
14 |             backbone, variant = backbone.split('-')
15 |             self.backbone = eval(backbone)(variant)
16 | 
17 |     def _init_weights(self, m: nn.Module) -> None:
18 |         if isinstance(m, nn.Linear):
19 |             trunc_normal_(m.weight, std=.02)
20 |             if m.bias is not None:
21 |                 nn.init.zeros_(m.bias)
22 |         elif isinstance(m, nn.Conv2d):
23 |             fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
24 |             fan_out // m.groups
25 |             m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
26 |             if m.bias is not None:
27 |                 nn.init.zeros_(m.bias)
28 |         elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)):
29 |             nn.init.ones_(m.weight)
30 |             nn.init.zeros_(m.bias)
31 | 
32 |     def init_pretrained(self, pretrained: str = None) -> None:
33 |         if pretrained and isinstance(self.backbone, nn.Module):
34 |             self.backbone.load_state_dict(torch.load(pretrained, map_location='cpu'), strict=False)
35 | 
36 |     def freeze_backbone(self):
37 |         if isinstance(self.backbone, nn.Module):
38 |             for p in self.backbone.parameters():
39 |                 p.requires_grad = False
40 | 
41 |     def unfreeze_backbone(self):
42 |         if isinstance(self.backbone, nn.Module):
43 |             for p in self.backbone.parameters():
44 |                 p.requires_grad = True
45 | 


--------------------------------------------------------------------------------
/semseg/models/custom_cnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.base import BaseModel
 5 | from semseg.models.heads import UPerHead
 6 | 
 7 | 
 8 | class CustomCNN(BaseModel):
 9 |     def __init__(self, backbone: str = 'ResNet-50', num_classes: int = 19):
10 |         super().__init__(backbone, num_classes)
11 |         self.decode_head = UPerHead(self.backbone.channels, 256, num_classes)
12 |         self.apply(self._init_weights)
13 | 
14 |     def forward(self, x: Tensor) -> Tensor:
15 |         y = self.backbone(x)
16 |         y = self.decode_head(y)   # 4x reduction in image size
17 |         y = F.interpolate(y, size=x.shape[2:], mode='bilinear', align_corners=False)    # to original image shape
18 |         return y
19 | 
20 |     
21 | if __name__ == '__main__':
22 |     model = CustomCNN('ResNet-50', 19)
23 |     # model.init_pretrained('checkpoints/backbones/resnet/resnet18.pth')
24 |     from semseg.utils.utils import count_parameters
25 |     print(f'model params cnt: {count_parameters(model)}MB')
26 |     x = torch.randn(2, 3, 224, 224)
27 |     y = model(x)
28 |     print(y.shape)


--------------------------------------------------------------------------------
/semseg/models/custom_vit.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.base import BaseModel
 5 | from semseg.models.heads import UPerHead
 6 | 
 7 | 
 8 | class CustomVIT(BaseModel):
 9 |     def __init__(self, backbone: str = 'ResT-S', num_classes: int = 19) -> None:
10 |         super().__init__(backbone, num_classes)
11 |         self.decode_head = UPerHead(self.backbone.channels, 128, num_classes)
12 |         self.apply(self._init_weights)
13 | 
14 |     def forward(self, x: Tensor) -> Tensor:
15 |         y = self.backbone(x)
16 |         y = self.decode_head(y)   # 4x reduction in image size
17 |         y = F.interpolate(y, size=x.shape[2:], mode='bilinear', align_corners=False)    # to original image shape
18 |         return y
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     model = CustomVIT('ResT-S', 19)
23 |     model.init_pretrained('checkpoints/backbones/rest/rest_small.pth')
24 |     x = torch.zeros(2, 3, 512, 512)
25 |     y = model(x)
26 |     print(y.shape)
27 |         
28 | 
29 | 


--------------------------------------------------------------------------------
/semseg/models/heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .upernet import UPerHead
 2 | from .segformer import SegFormerHead
 3 | from .sfnet import SFHead
 4 | from .fpn import FPNHead
 5 | from .fapn import FaPNHead
 6 | from .fcn import FCNHead
 7 | from .condnet import CondHead
 8 | from .lawin import LawinHead
 9 | 
10 | __all__ = ['UPerHead', 'SegFormerHead', 'SFHead', 'FPNHead', 'FaPNHead', 'FCNHead', 'CondHead', 'LawinHead']


--------------------------------------------------------------------------------
/semseg/models/heads/condnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.layers import ConvModule
 5 | 
 6 | 
 7 | class CondHead(nn.Module):
 8 |     def __init__(self, in_channel: int = 2048, channel: int = 512, num_classes: int = 19):
 9 |         super().__init__()
10 |         self.num_classes = num_classes
11 |         self.weight_num = channel * num_classes
12 |         self.bias_num = num_classes
13 | 
14 |         self.conv = ConvModule(in_channel, channel, 1)
15 |         self.dropout = nn.Dropout2d(0.1)
16 | 
17 |         self.guidance_project = nn.Conv2d(channel, num_classes, 1)
18 |         self.filter_project = nn.Conv2d(channel*num_classes, self.weight_num + self.bias_num, 1, groups=num_classes)
19 | 
20 |     def forward(self, features) -> Tensor:
21 |         x = self.dropout(self.conv(features[-1]))
22 |         B, C, H, W = x.shape
23 |         guidance_mask = self.guidance_project(x)
24 |         cond_logit = guidance_mask
25 |         
26 |         key = x
27 |         value = x
28 |         guidance_mask = guidance_mask.softmax(dim=1).view(*guidance_mask.shape[:2], -1)
29 |         key = key.view(B, C, -1).permute(0, 2, 1)
30 | 
31 |         cond_filters = torch.matmul(guidance_mask, key)
32 |         cond_filters /= H * W
33 |         cond_filters = cond_filters.view(B, -1, 1, 1)
34 |         cond_filters = self.filter_project(cond_filters)
35 |         cond_filters = cond_filters.view(B, -1)
36 | 
37 |         weight, bias = torch.split(cond_filters, [self.weight_num, self.bias_num], dim=1)
38 |         weight = weight.reshape(B * self.num_classes, -1, 1, 1)
39 |         bias = bias.reshape(B * self.num_classes)
40 | 
41 |         value = value.view(-1, H, W).unsqueeze(0)
42 |         seg_logit = F.conv2d(value, weight, bias, 1, 0, groups=B).view(B, self.num_classes, H, W)
43 |         
44 |         if self.training:
45 |             return cond_logit, seg_logit
46 |         return seg_logit
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     from semseg.models.backbones import ResNetD
51 |     backbone = ResNetD('50')
52 |     head = CondHead()
53 |     x = torch.randn(2, 3, 224, 224)
54 |     features = backbone(x)
55 |     outs = head(features)
56 |     for out in outs:
57 |         out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False)
58 |         print(out.shape)


--------------------------------------------------------------------------------
/semseg/models/heads/fapn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | from torch.nn import functional as F
 4 | from torchvision.ops import DeformConv2d
 5 | from semseg.models.layers import ConvModule
 6 | 
 7 | 
 8 | class DCNv2(nn.Module):
 9 |     def __init__(self, c1, c2, k, s, p, g=1):
10 |         super().__init__()
11 |         self.dcn = DeformConv2d(c1, c2, k, s, p, groups=g)
12 |         self.offset_mask = nn.Conv2d(c2,  g* 3 * k * k, k, s, p)
13 |         self._init_offset()
14 | 
15 |     def _init_offset(self):
16 |         self.offset_mask.weight.data.zero_()
17 |         self.offset_mask.bias.data.zero_()
18 | 
19 |     def forward(self, x, offset):
20 |         out = self.offset_mask(offset)
21 |         o1, o2, mask = torch.chunk(out, 3, dim=1)
22 |         offset = torch.cat([o1, o2], dim=1)
23 |         mask = mask.sigmoid()
24 |         return self.dcn(x, offset, mask)
25 | 
26 | 
27 | class FSM(nn.Module):
28 |     def __init__(self, c1, c2):
29 |         super().__init__()
30 |         self.conv_atten = nn.Conv2d(c1, c1, 1, bias=False)
31 |         self.conv = nn.Conv2d(c1, c2, 1, bias=False)
32 | 
33 |     def forward(self, x: Tensor) -> Tensor:
34 |         atten = self.conv_atten(F.avg_pool2d(x, x.shape[2:])).sigmoid()
35 |         feat = torch.mul(x, atten)
36 |         x = x + feat
37 |         return self.conv(x)
38 | 
39 | 
40 | class FAM(nn.Module):
41 |     def __init__(self, c1, c2):
42 |         super().__init__()
43 |         self.lateral_conv = FSM(c1, c2)
44 |         self.offset = nn.Conv2d(c2*2, c2, 1, bias=False)
45 |         self.dcpack_l2 = DCNv2(c2, c2, 3, 1, 1, 8)
46 |     
47 |     def forward(self, feat_l, feat_s):
48 |         feat_up = feat_s
49 |         if feat_l.shape[2:] != feat_s.shape[2:]:
50 |             feat_up = F.interpolate(feat_s, size=feat_l.shape[2:], mode='bilinear', align_corners=False)
51 |         
52 |         feat_arm = self.lateral_conv(feat_l)
53 |         offset = self.offset(torch.cat([feat_arm, feat_up*2], dim=1))
54 | 
55 |         feat_align = F.relu(self.dcpack_l2(feat_up, offset))
56 |         return feat_align + feat_arm
57 | 
58 | 
59 | class FaPNHead(nn.Module):
60 |     def __init__(self, in_channels, channel=128, num_classes=19):
61 |         super().__init__()
62 |         in_channels = in_channels[::-1]
63 |         self.align_modules = nn.ModuleList([ConvModule(in_channels[0], channel, 1)])
64 |         self.output_convs = nn.ModuleList([])
65 | 
66 |         for ch in in_channels[1:]:
67 |             self.align_modules.append(FAM(ch, channel))
68 |             self.output_convs.append(ConvModule(channel, channel, 3, 1, 1))
69 | 
70 |         self.conv_seg = nn.Conv2d(channel, num_classes, 1)
71 |         self.dropout = nn.Dropout2d(0.1)
72 | 
73 |     def forward(self, features) -> Tensor:
74 |         features = features[::-1]
75 |         out = self.align_modules[0](features[0])
76 |         
77 |         for feat, align_module, output_conv in zip(features[1:], self.align_modules[1:], self.output_convs):
78 |             out = align_module(feat, out)
79 |             out = output_conv(out)
80 |         out = self.conv_seg(self.dropout(out))
81 |         return out
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     from semseg.models.backbones import ResNet
86 |     backbone = ResNet('50')
87 |     head = FaPNHead([256, 512, 1024, 2048], 128, 19)
88 |     x = torch.randn(2, 3, 224, 224)
89 |     features = backbone(x)
90 |     out = head(features)
91 |     out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False)
92 |     print(out.shape)


--------------------------------------------------------------------------------
/semseg/models/heads/fcn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.layers import ConvModule
 5 | 
 6 | 
 7 | class FCNHead(nn.Module):
 8 |     def __init__(self, c1, c2, num_classes: int = 19):
 9 |         super().__init__()
10 |         self.conv = ConvModule(c1, c2, 1)
11 |         self.cls = nn.Conv2d(c2, num_classes, 1)
12 | 
13 |     def forward(self, features) -> Tensor:
14 |         x = self.conv(features[-1])
15 |         x = self.cls(x)
16 |         return x
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     from semseg.models.backbones import ResNet
21 |     backbone = ResNet('50')
22 |     head = FCNHead(2048, 256, 19)
23 |     x = torch.randn(2, 3, 224, 224)
24 |     features = backbone(x)
25 |     out = head(features)
26 |     out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False)
27 |     print(out.shape)
28 | 


--------------------------------------------------------------------------------
/semseg/models/heads/fpn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.layers import ConvModule
 5 | 
 6 | 
 7 | class FPNHead(nn.Module):
 8 |     """Panoptic Feature Pyramid Networks
 9 |     https://arxiv.org/abs/1901.02446
10 |     """
11 |     def __init__(self, in_channels, channel=128, num_classes=19):
12 |         super().__init__()
13 |         self.lateral_convs = nn.ModuleList([])
14 |         self.output_convs = nn.ModuleList([])
15 | 
16 |         for ch in in_channels[::-1]:
17 |             self.lateral_convs.append(ConvModule(ch, channel, 1))
18 |             self.output_convs.append(ConvModule(channel, channel, 3, 1, 1))
19 | 
20 |         self.conv_seg = nn.Conv2d(channel, num_classes, 1)
21 |         self.dropout = nn.Dropout2d(0.1)
22 | 
23 |     def forward(self, features) -> Tensor:
24 |         features = features[::-1]
25 |         out = self.lateral_convs[0](features[0])
26 |         
27 |         for i in range(1, len(features)):
28 |             out = F.interpolate(out, scale_factor=2.0, mode='nearest')
29 |             out = out + self.lateral_convs[i](features[i])
30 |             out = self.output_convs[i](out)
31 |         out = self.conv_seg(self.dropout(out))
32 |         return out
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     from semseg.models.backbones import ResNet
37 |     backbone = ResNet('50')
38 |     head = FPNHead([256, 512, 1024, 2048], 128, 19)
39 |     x = torch.randn(2, 3, 224, 224)
40 |     features = backbone(x)
41 |     out = head(features)
42 |     out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False)
43 |     print(out.shape)


--------------------------------------------------------------------------------
/semseg/models/heads/segformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | from typing import Tuple
 4 | from torch.nn import functional as F
 5 | 
 6 | 
 7 | class MLP(nn.Module):
 8 |     def __init__(self, dim, embed_dim):
 9 |         super().__init__()
10 |         self.proj = nn.Linear(dim, embed_dim)
11 | 
12 |     def forward(self, x: Tensor) -> Tensor:
13 |         x = x.flatten(2).transpose(1, 2)
14 |         x = self.proj(x)
15 |         return x
16 | 
17 | 
18 | class ConvModule(nn.Module):
19 |     def __init__(self, c1, c2):
20 |         super().__init__()
21 |         self.conv = nn.Conv2d(c1, c2, 1, bias=False)
22 |         self.bn = nn.BatchNorm2d(c2)        # use SyncBN in original
23 |         self.activate = nn.ReLU(True)
24 | 
25 |     def forward(self, x: Tensor) -> Tensor:
26 |         return self.activate(self.bn(self.conv(x)))
27 | 
28 | 
29 | class SegFormerHead(nn.Module):
30 |     def __init__(self, dims: list, embed_dim: int = 256, num_classes: int = 19):
31 |         super().__init__()
32 |         for i, dim in enumerate(dims):
33 |             self.add_module(f"linear_c{i+1}", MLP(dim, embed_dim))
34 | 
35 |         self.linear_fuse = ConvModule(embed_dim*4, embed_dim)
36 |         self.linear_pred = nn.Conv2d(embed_dim, num_classes, 1)
37 |         self.dropout = nn.Dropout2d(0.1)
38 | 
39 |     def forward(self, features: Tuple[Tensor, Tensor, Tensor, Tensor]) -> Tensor:
40 |         B, _, H, W = features[0].shape
41 |         outs = [self.linear_c1(features[0]).permute(0, 2, 1).reshape(B, -1, *features[0].shape[-2:])]
42 | 
43 |         for i, feature in enumerate(features[1:]):
44 |             cf = eval(f"self.linear_c{i+2}")(feature).permute(0, 2, 1).reshape(B, -1, *feature.shape[-2:])
45 |             outs.append(F.interpolate(cf, size=(H, W), mode='bilinear', align_corners=False))
46 | 
47 |         seg = self.linear_fuse(torch.cat(outs[::-1], dim=1))
48 |         seg = self.linear_pred(self.dropout(seg))
49 |         return seg


--------------------------------------------------------------------------------
/semseg/models/heads/sfnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.layers import ConvModule
 5 | from semseg.models.modules import PPM
 6 | 
 7 | 
 8 | class AlignedModule(nn.Module):
 9 |     def __init__(self, c1, c2, k=3):
10 |         super().__init__()
11 |         self.down_h = nn.Conv2d(c1, c2, 1, bias=False)
12 |         self.down_l = nn.Conv2d(c1, c2, 1, bias=False)
13 |         self.flow_make = nn.Conv2d(c2 * 2, 2, k, 1, 1, bias=False)
14 | 
15 |     def forward(self, low_feature: Tensor, high_feature: Tensor) -> Tensor:
16 |         high_feature_origin = high_feature
17 |         H, W = low_feature.shape[-2:]
18 |         low_feature = self.down_l(low_feature)
19 |         high_feature = self.down_h(high_feature)
20 |         high_feature = F.interpolate(high_feature, size=(H, W), mode='bilinear', align_corners=True)
21 |         flow = self.flow_make(torch.cat([high_feature, low_feature], dim=1))
22 |         high_feature = self.flow_warp(high_feature_origin, flow, (H, W))
23 |         return high_feature
24 | 
25 |     def flow_warp(self, x: Tensor, flow: Tensor, size: tuple) -> Tensor:
26 |         # norm = torch.tensor(size).reshape(1, 1, 1, -1)
27 |         norm = torch.tensor([[[[*size]]]]).type_as(x).to(x.device)
28 |         H = torch.linspace(-1.0, 1.0, size[0]).view(-1, 1).repeat(1, size[1])
29 |         W = torch.linspace(-1.0, 1.0, size[1]).repeat(size[0], 1)
30 |         grid = torch.cat((W.unsqueeze(2), H.unsqueeze(2)), dim=2)
31 |         grid = grid.repeat(x.shape[0], 1, 1, 1).type_as(x).to(x.device)
32 |         grid = grid + flow.permute(0, 2, 3, 1) / norm
33 |         output = F.grid_sample(x, grid, align_corners=False)
34 |         return output
35 | 
36 | 
37 | class SFHead(nn.Module):
38 |     def __init__(self, in_channels, channel=256, num_classes=19, scales=(1, 2, 3, 6)):
39 |         super().__init__()
40 |         self.ppm = PPM(in_channels[-1], channel, scales)
41 | 
42 |         self.fpn_in = nn.ModuleList([])
43 |         self.fpn_out = nn.ModuleList([])
44 |         self.fpn_out_align = nn.ModuleList([])
45 | 
46 |         for in_ch in in_channels[:-1]:
47 |             self.fpn_in.append(ConvModule(in_ch, channel, 1))
48 |             self.fpn_out.append(ConvModule(channel, channel, 3, 1, 1))
49 |             self.fpn_out_align.append(AlignedModule(channel, channel//2))
50 | 
51 |         self.bottleneck = ConvModule(len(in_channels) * channel, channel, 3, 1, 1)
52 |         self.dropout = nn.Dropout2d(0.1)
53 |         self.conv_seg = nn.Conv2d(channel, num_classes, 1)
54 | 
55 |     def forward(self, features: list) -> Tensor:
56 |         f = self.ppm(features[-1])
57 |         fpn_features = [f]
58 | 
59 |         for i in reversed(range(len(features) - 1)):
60 |             feature = self.fpn_in[i](features[i])
61 |             f = feature + self.fpn_out_align[i](feature, f)
62 |             fpn_features.append(self.fpn_out[i](f))
63 | 
64 |         fpn_features.reverse()
65 | 
66 |         for i in range(1, len(fpn_features)):
67 |             fpn_features[i] = F.interpolate(fpn_features[i], size=fpn_features[0].shape[-2:], mode='bilinear', align_corners=True)
68 | 
69 |         output = self.bottleneck(torch.cat(fpn_features, dim=1))
70 |         output = self.conv_seg(self.dropout(output))
71 |         return output
72 | 
73 | 


--------------------------------------------------------------------------------
/semseg/models/heads/upernet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | from torch.nn import functional as F
 4 | from typing import Tuple
 5 | from semseg.models.layers import ConvModule
 6 | from semseg.models.modules import PPM
 7 | 
 8 | 
 9 | class UPerHead(nn.Module):
10 |     """Unified Perceptual Parsing for Scene Understanding
11 |     https://arxiv.org/abs/1807.10221
12 |     scales: Pooling scales used in PPM module applied on the last feature
13 |     """
14 | 
15 |     def __init__(self, in_channels, channel=128, num_classes: int = 19, scales=(1, 2, 3, 6)):
16 |         super().__init__()
17 |         # PPM Module
18 |         self.ppm = PPM(in_channels[-1], channel, scales)
19 | 
20 |         # FPN Module
21 |         self.fpn_in = nn.ModuleList()
22 |         self.fpn_out = nn.ModuleList()
23 | 
24 |         for in_ch in in_channels[:-1]:  # skip the top layer
25 |             self.fpn_in.append(ConvModule(in_ch, channel, 1))
26 |             self.fpn_out.append(ConvModule(channel, channel, 3, 1, 1))
27 | 
28 |         self.bottleneck = ConvModule(len(in_channels) * channel, channel, 3, 1, 1)
29 |         self.dropout = nn.Dropout2d(0.1)
30 |         self.conv_seg = nn.Conv2d(channel, num_classes, 1)
31 | 
32 |     def forward(self, features: Tuple[Tensor, Tensor, Tensor, Tensor]) -> Tensor:
33 |         f = self.ppm(features[-1])
34 |         fpn_features = [f]
35 | 
36 |         for i in reversed(range(len(features) - 1)):
37 |             feature = self.fpn_in[i](features[i])
38 |             f = feature + F.interpolate(f, size=feature.shape[-2:], mode='bilinear', align_corners=True)
39 |             fpn_features.append(self.fpn_out[i](f))
40 | 
41 |         fpn_features.reverse()
42 |         for i in range(1, len(features)):
43 |             fpn_features[i] = F.interpolate(fpn_features[i], size=fpn_features[0].shape[-2:], mode='bilinear',
44 |                                             align_corners=False)
45 | 
46 |         output = self.bottleneck(torch.cat(fpn_features, dim=1))
47 |         output = self.conv_seg(self.dropout(output))
48 |         return output
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     model = UPerHead([64, 128, 256, 512], 128)
53 |     x1 = torch.randn(2, 64, 56, 56)
54 |     x2 = torch.randn(2, 128, 28, 28)
55 |     x3 = torch.randn(2, 256, 14, 14)
56 |     x4 = torch.randn(2, 512, 7, 7)
57 |     y = model([x1, x2, x3, x4])
58 |     print(y.shape)
59 | 


--------------------------------------------------------------------------------
/semseg/models/lawin.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.base import BaseModel
 5 | from semseg.models.heads import LawinHead
 6 | 
 7 | 
 8 | class Lawin(BaseModel):
 9 |     """
10 |     Notes::::: This implementation has larger params and FLOPs than the results reported in the paper.
11 |     Will update the code and weights if the original author releases the full code.
12 |     """
13 |     def __init__(self, backbone: str = 'MiT-B0', num_classes: int = 19) -> None:
14 |         super().__init__(backbone, num_classes)
15 |         self.decode_head = LawinHead(self.backbone.channels, 256 if 'B0' in backbone else 512, num_classes)
16 |         self.apply(self._init_weights)
17 | 
18 |     def forward(self, x: Tensor) -> Tensor:
19 |         y = self.backbone(x)
20 |         y = self.decode_head(y)   # 4x reduction in image size
21 |         y = F.interpolate(y, size=x.shape[2:], mode='bilinear', align_corners=False)    # to original image shape
22 |         return y
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     model = Lawin('MiT-B0')
27 |     x = torch.zeros(2, 3, 512, 512)
28 |     y = model(x)
29 |     print(y.shape)


--------------------------------------------------------------------------------
/semseg/models/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .common import *
2 | from .initialize import *


--------------------------------------------------------------------------------
/semseg/models/layers/common.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | 
 4 | 
 5 | class ConvModule(nn.Sequential):
 6 |     def __init__(self, c1, c2, k, s=1, p=0, d=1, g=1):
 7 |         super().__init__(
 8 |             nn.Conv2d(c1, c2, k, s, p, d, g, bias=False),
 9 |             nn.BatchNorm2d(c2),
10 |             nn.ReLU(True)
11 |         )
12 | 
13 | 
14 | class DropPath(nn.Module):
15 |     """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
16 |     Copied from timm
17 |     This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
18 |     the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
19 |     See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
20 |     changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
21 |     'survival rate' as the argument.
22 |     """
23 |     def __init__(self, p: float = None):
24 |         super().__init__()
25 |         self.p = p
26 | 
27 |     def forward(self, x: Tensor) -> Tensor:
28 |         if self.p == 0. or not self.training:
29 |             return x
30 |         kp = 1 - self.p
31 |         shape = (x.shape[0],) + (1,) * (x.ndim - 1)
32 |         random_tensor = kp + torch.rand(shape, dtype=x.dtype, device=x.device)
33 |         random_tensor.floor_()  # binarize
34 |         return x.div(kp) * random_tensor


--------------------------------------------------------------------------------
/semseg/models/layers/initialize.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import math
 3 | import warnings
 4 | from torch import nn, Tensor
 5 | 
 6 | 
 7 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
 8 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
 9 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
10 |     def norm_cdf(x):
11 |         # Computes standard normal cumulative distribution function
12 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
13 | 
14 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
15 |         warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
16 |                       "The distribution of values may be incorrect.",
17 |                       stacklevel=2)
18 | 
19 |     with torch.no_grad():
20 |         # Values are generated by using a truncated uniform distribution and
21 |         # then using the inverse CDF for the normal distribution.
22 |         # Get upper and lower cdf values
23 |         l = norm_cdf((a - mean) / std)
24 |         u = norm_cdf((b - mean) / std)
25 | 
26 |         # Uniformly fill tensor with values from [l, u], then translate to
27 |         # [2l-1, 2u-1].
28 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
29 | 
30 |         # Use inverse cdf transform for normal distribution to get truncated
31 |         # standard normal
32 |         tensor.erfinv_()
33 | 
34 |         # Transform to proper mean, std
35 |         tensor.mul_(std * math.sqrt(2.))
36 |         tensor.add_(mean)
37 | 
38 |         # Clamp to ensure it's in the proper range
39 |         tensor.clamp_(min=a, max=b)
40 |         return tensor
41 | 
42 | 
43 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
44 |     # type: (Tensor, float, float, float, float) -> Tensor
45 |     r"""Fills the input Tensor with values drawn from a truncated
46 |     normal distribution. The values are effectively drawn from the
47 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
48 |     with values outside :math:`[a, b]` redrawn until they are within
49 |     the bounds. The method used for generating the random values works
50 |     best when :math:`a \leq \text{mean} \leq b`.
51 |     Args:
52 |         tensor: an n-dimensional `torch.Tensor`
53 |         mean: the mean of the normal distribution
54 |         std: the standard deviation of the normal distribution
55 |         a: the minimum cutoff value
56 |         b: the maximum cutoff value
57 |     Examples:
58 |         >>> w = torch.empty(3, 5)
59 |         >>> nn.init.trunc_normal_(w)
60 |     """
61 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
62 | 


--------------------------------------------------------------------------------
/semseg/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .ppm import PPM
2 | from .psa import PSAP, PSAS
3 | from .dfem import DetailFeatureEnhanceModuleABL
4 | 
5 | 
6 | __all__ = ['PPM', 'PSAP', 'PSAS', 'DetailFeatureEnhanceModuleABL']
7 | 


--------------------------------------------------------------------------------
/semseg/models/modules/attention.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @Project : semantic-segmentation 
 3 | @File    : attention.py
 4 | @IDE     : PyCharm 
 5 | @Author  : Wang Liu
 6 | @Date    : 2022/6/6 上午10:20
 7 | @e-mail  : 1183862787@qq.com
 8 | """
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | 
13 | 
14 | class ChannelAttention(nn.Module):
15 | 
16 |     def __init__(self, in_planes, ratio: int = 1):
17 |         super(ChannelAttention, self).__init__()
18 | 
19 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
20 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
21 | 
22 |         # MLP
23 |         self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
24 |         self.relu1 = nn.ReLU()
25 |         self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
26 | 
27 |         self.sigmoid = nn.Sigmoid()
28 | 
29 |     def forward(self, x):
30 |         avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
31 |         max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
32 | 
33 |         out = avg_out + max_out
34 |         return self.sigmoid(out)
35 | 
36 | 
37 | class SpatialAttention(nn.Module):
38 | 
39 |     def __init__(self, kernel_size=7):
40 |         super(SpatialAttention, self).__init__()
41 | 
42 |         assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
43 | 
44 |         padding = 3 if kernel_size == 7 else 1
45 | 
46 |         self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
47 |         self.sigmoid = nn.Sigmoid()
48 | 
49 |     def forward(self, x):
50 |         avg_out = torch.mean(x, dim=1, keepdim=True)
51 |         max_out, _ = torch.max(x, dim=1, keepdim=True)
52 | 
53 |         x = torch.cat([avg_out, max_out], dim=1)
54 |         x = self.conv1(x)
55 |         return self.sigmoid(x)
56 | 
57 | 
58 | class CBAM(nn.Module):
59 | 
60 |     def __init__(self, in_channels, ratio=1, kernel_size=7):
61 |         super().__init__()
62 |         self.channel_attention = ChannelAttention(in_planes=in_channels, ratio=ratio)
63 |         self.spatial_attention = SpatialAttention(kernel_size=kernel_size)
64 | 
65 |     def forward(self, x):
66 |         x = x + x * self.channel_attention(x)
67 |         x = x + x * self.spatial_attention(x)
68 |         return x
69 | 
70 | 
71 | class SEModule(nn.Module):
72 |     def __init__(self, channel, reduction=4):
73 |         super(SEModule, self).__init__()
74 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
75 |         self.fc = nn.Sequential(
76 |             nn.Linear(channel, channel // reduction, bias=False),
77 |             nn.ReLU(inplace=True),
78 |             nn.Linear(channel // reduction, channel, bias=False),
79 |             nn.ReLU(inplace=True)
80 |         )
81 | 
82 |     def forward(self, x):
83 |         b, c, _, _ = x.size()
84 |         y = self.avg_pool(x).view(b, c)
85 |         y = self.fc(y).view(b, c, 1, 1)
86 |         return x * y.expand_as(x)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     _net = CBAM(24)
91 |     _x = torch.randn((1, 24, 256, 128))
92 |     _y = _net(_x)
93 |     print(_y.shape)
94 | 
95 |     from semseg.utils.utils import model_summary, init_logger
96 | 
97 |     init_logger()
98 |     model_summary(_net, (1, 24, 256, 128))
99 | 


--------------------------------------------------------------------------------
/semseg/models/modules/cc_attention.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This code is borrowed from Serge-weihao/CCNet-Pure-Pytorch
 3 | '''
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from torch.nn import Softmax
 9 | 
10 | 
11 | def INF(B,H,W):
12 |      return -torch.diag(torch.tensor(float("inf")).cuda().repeat(H),0).unsqueeze(0).repeat(B*W,1,1)
13 | 
14 | 
15 | class CrissCrossAttention(nn.Module):
16 |     """ Criss-Cross Attention Module"""
17 |     def __init__(self, in_dim):
18 |         super(CrissCrossAttention,self).__init__()
19 |         self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
20 |         self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
21 |         self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
22 |         self.softmax = Softmax(dim=3)
23 |         self.INF = INF
24 |         self.gamma = nn.Parameter(torch.zeros(1))
25 | 
26 | 
27 |     def forward(self, x):
28 |         m_batchsize, _, height, width = x.size()
29 |         proj_query = self.query_conv(x)
30 |         proj_query_H = proj_query.permute(0,3,1,2).contiguous().view(m_batchsize*width,-1,height).permute(0, 2, 1)
31 |         proj_query_W = proj_query.permute(0,2,1,3).contiguous().view(m_batchsize*height,-1,width).permute(0, 2, 1)
32 |         proj_key = self.key_conv(x)
33 |         proj_key_H = proj_key.permute(0,3,1,2).contiguous().view(m_batchsize*width,-1,height)
34 |         proj_key_W = proj_key.permute(0,2,1,3).contiguous().view(m_batchsize*height,-1,width)
35 |         proj_value = self.value_conv(x)
36 |         proj_value_H = proj_value.permute(0,3,1,2).contiguous().view(m_batchsize*width,-1,height)
37 |         proj_value_W = proj_value.permute(0,2,1,3).contiguous().view(m_batchsize*height,-1,width)
38 |         energy_H = (torch.bmm(proj_query_H, proj_key_H)+self.INF(m_batchsize, height, width)).view(m_batchsize,width,height,height).permute(0,2,1,3)
39 |         energy_W = torch.bmm(proj_query_W, proj_key_W).view(m_batchsize,height,width,width)
40 |         concate = self.softmax(torch.cat([energy_H, energy_W], 3))
41 | 
42 |         att_H = concate[:,:,:,0:height].permute(0,2,1,3).contiguous().view(m_batchsize*width,height,height)
43 |         #print(concate)
44 |         #print(att_H) 
45 |         att_W = concate[:,:,:,height:height+width].contiguous().view(m_batchsize*height,width,width)
46 |         out_H = torch.bmm(proj_value_H, att_H.permute(0, 2, 1)).view(m_batchsize,width,-1,height).permute(0,2,3,1)
47 |         out_W = torch.bmm(proj_value_W, att_W.permute(0, 2, 1)).view(m_batchsize,height,-1,width).permute(0,2,1,3)
48 |         #print(out_H.size(),out_W.size())
49 |         return self.gamma*(out_H + out_W) + x
50 | 
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     model = CrissCrossAttention(64)
55 |     x = torch.randn(2, 64, 5, 6)
56 |     out = model(x)
57 |     print(out.shape)
58 | 


--------------------------------------------------------------------------------
/semseg/models/modules/ppm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn, Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.layers import ConvModule
 5 | 
 6 | 
 7 | class PPM(nn.Module):
 8 |     """Pyramid Pooling Module in PSPNet
 9 |     """
10 | 
11 |     def __init__(self, c1, c2=128, scales=(1, 2, 3, 6)):
12 |         super().__init__()
13 |         self.stages = nn.ModuleList([
14 |             nn.Sequential(
15 |                 nn.AdaptiveAvgPool2d(scale),
16 |                 ConvModule(c1, c2, 1)
17 |             )
18 |             for scale in scales])
19 | 
20 |         self.bottleneck = ConvModule(c1 + c2 * len(scales), c2, 3, 1, 1)
21 | 
22 |     def forward(self, x: Tensor) -> Tensor:
23 |         outs = []
24 |         for stage in self.stages:
25 |             outs.append(F.interpolate(stage(x), size=x.shape[-2:], mode='bilinear', align_corners=True))
26 | 
27 |         outs = [x] + outs[::-1]
28 |         out = self.bottleneck(torch.cat(outs, dim=1))
29 |         return out
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     model = PPM(512, 128)
34 |     _x = torch.randn(2, 512, 7, 7)
35 |     _y = model(_x)
36 |     print(_y.shape)  # [2, 128, 7, 7]
37 | 


--------------------------------------------------------------------------------
/semseg/models/pspnet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @Project : semantic-segmentation 
 3 | @File    : pspnet_.py
 4 | @IDE     : PyCharm 
 5 | @Author  : Wang Liu
 6 | @Date    : 2022/7/1 下午6:50
 7 | @e-mail  : 1183862787@qq.com
 8 | """
 9 | import torch
10 | from torch import nn
11 | from torch.nn import functional as F
12 | from semseg.models.base import BaseModel
13 | 
14 | 
15 | class PSPModule(nn.Module):
16 | 
17 |     def __init__(self, features, out_features=1024, sizes=(1, 2, 3, 6)):
18 |         super().__init__()
19 |         self.stages = []
20 |         self.stages = nn.ModuleList([self._make_stage(features, size) for size in sizes])
21 |         self.bottleneck = nn.Conv2d(features * (len(sizes) + 1), out_features, kernel_size=1)
22 |         self.relu = nn.ReLU()
23 | 
24 |     @staticmethod
25 |     def _make_stage(features, size):
26 |         prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
27 |         conv = nn.Conv2d(features, features, kernel_size=1, bias=False)
28 |         return nn.Sequential(prior, conv)
29 | 
30 |     def forward(self, feats):
31 |         h, w = feats.size(2), feats.size(3)
32 |         priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in self.stages] + [feats]
33 |         bottle = self.bottleneck(torch.cat(priors, 1))
34 |         return self.relu(bottle)
35 | 
36 | 
37 | class PSPUpsample(nn.Module):
38 | 
39 |     def __init__(self, in_channels, out_channels):
40 |         super().__init__()
41 |         self.conv = nn.Sequential(
42 |             nn.Conv2d(in_channels, out_channels, 3, padding=1),
43 |             nn.BatchNorm2d(out_channels),
44 |             nn.PReLU()
45 |         )
46 | 
47 |     def forward(self, x):
48 |         h, w = 2 * x.size(2), 2 * x.size(3)
49 |         p = F.interpolate(input=x, size=(h, w), mode='bilinear', align_corners=True)
50 |         return self.conv(p)
51 | 
52 | 
53 | class PSPNet(BaseModel):
54 | 
55 |     def __init__(self, backbone: str = None, n_classes=19, sizes=(1, 2, 3, 6), deep_features_size=1024):
56 |         super().__init__(backbone, n_classes)
57 |         self.psp = PSPModule(self.backbone.channels[-1], 1024, sizes)
58 |         self.drop_1 = nn.Dropout2d(p=0.3)
59 | 
60 |         self.up_1 = PSPUpsample(1024, 256)
61 |         self.up_2 = PSPUpsample(256, 64)
62 |         self.up_3 = PSPUpsample(64, 64)
63 | 
64 |         self.drop_2 = nn.Dropout2d(p=0.15)
65 |         self.final = nn.Sequential(
66 |             nn.Conv2d(64, n_classes, kernel_size=1),
67 |             nn.LogSoftmax(dim=1)
68 |         )
69 | 
70 |         self.classifier = nn.Sequential(
71 |             nn.Linear(deep_features_size, 256),
72 |             nn.ReLU(),
73 |             nn.Linear(256, n_classes)
74 |         )
75 | 
76 |     def forward(self, x):
77 |         f = self.backbone(x)[-1]
78 |         p = self.psp(f)
79 |         p = self.drop_1(p)
80 | 
81 |         p = self.up_1(p)
82 |         p = self.drop_2(p)
83 | 
84 |         p = self.up_2(p)
85 |         p = self.drop_2(p)
86 | 
87 |         p = self.up_3(p)
88 |         p = self.drop_2(p)
89 |         y = self.final(p)
90 |         y = F.interpolate(y, size=x.shape[-2:], mode='bilinear', align_corners=True)
91 |         return y
92 | 
93 | 
94 | if __name__ == '__main__':
95 |     _model = PSPNet('MobileNetV3-large', 19)
96 |     _x = torch.rand((1, 3, 512, 1024))
97 |     _out = _model(_x)
98 |     print(_out.shape)
99 | 


--------------------------------------------------------------------------------
/semseg/models/segformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.base import BaseModel
 5 | from semseg.models.heads import SegFormerHead, UPerHead
 6 | 
 7 | 
 8 | class SegFormer0(BaseModel):
 9 |     def __init__(self, backbone: str = 'MiT-B0', num_classes: int = 19) -> None:
10 |         super().__init__(backbone, num_classes)
11 |         self.decode_head = SegFormerHead(self.backbone.channels, 256 if 'B0' in backbone or 'B1' in backbone else 768, num_classes)
12 |         self.apply(self._init_weights)
13 | 
14 |     def forward(self, x: Tensor) -> Tensor:
15 |         y = self.backbone(x)
16 |         y = self.decode_head(y)   # 4x reduction in image size
17 |         y = F.interpolate(y, size=x.shape[2:], mode='bilinear', align_corners=False)    # to original image shape
18 |         return y
19 | 
20 | 
21 | class SegFormer(BaseModel):
22 |     def __init__(self, backbone: str = 'MiT-B0', num_classes: int = 19) -> None:
23 |         super().__init__(backbone, num_classes)
24 |         self.head_bootom = SegFormerHead(self.backbone.channels,
25 |                                          256 if 'B0' in backbone or 'B1' in backbone else 768,
26 |                                          num_classes)
27 |         self.head_top = UPerHead(in_channels=self.backbone.channels,
28 |                                 channel=32,
29 |                                 num_classes=2,
30 |                                 scales=(1, 2, 3, 6))
31 |     def forward(self, x: Tensor):
32 |         f_x4, f_x8, f_x16, f_x32  = self.backbone(x)
33 |         logits_bottom = self.head_bootom([f_x4, f_x8, f_x16, f_x32 ])   # 4x reduction in image size
34 |         logits_bottom = F.interpolate(logits_bottom, size=x.shape[2:], mode='bilinear', align_corners=True)
35 | 
36 |         if self.training:
37 |             # logits_edge = self.head_edge(f_x4, f_x8)
38 |             # logits_edge = F.interpolate(logits_edge, x.shape[-2:], mode='bilinear', align_corners=True)
39 |             logits_top = self.head_top([f_x4, f_x8, f_x16, f_x32])
40 |             logits_top = F.interpolate(logits_top, x.shape[-2:], mode='bilinear', align_corners=True)
41 |             # return torch.cat([logits_seg, logits_so], dim=1), logits_edge
42 |             return logits_bottom, logits_top, None
43 | 
44 |         return logits_bottom.contiguous()
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     model = SegFormer('MiT-B0', num_classes=8)
49 |     model.train(True)
50 |     model.init_pretrained('../../checkpoints/backbones/mit/mit_b0.pth')
51 |     x = torch.zeros(4, 3, 512, 1024)
52 |     y = model(x)
53 |     if model.training:
54 |         print(y[0].shape, y[1].shape)
55 |     else:
56 |         print(y.shape)


--------------------------------------------------------------------------------
/semseg/models/sfnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import Tensor
 3 | from torch.nn import functional as F
 4 | from semseg.models.base import BaseModel
 5 | from semseg.models.heads import SFHead, UPerHead
 6 | 
 7 | 
 8 | class SFNet(BaseModel):
 9 |     def __init__(self, backbone: str = 'ResNetD-18', num_classes: int = 19):
10 |         # assert 'ResNet' in backbone
11 |         super().__init__(backbone, num_classes)
12 |         self.head = SFHead(self.backbone.channels, 128 if '18' or 'MobileNet' in backbone else 256, num_classes)
13 |         self.apply(self._init_weights)
14 | 
15 |     def forward(self, x: Tensor) -> Tensor:
16 |         outs = self.backbone(x)
17 |         out = self.head(outs)
18 |         out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=True)
19 |         return out
20 | 
21 | 
22 | class SFNet0(BaseModel):
23 |     def __init__(self, backbone: str = 'ResNetD-18', num_classes: int = 19):
24 |         # assert 'ResNet' in backbone
25 |         super().__init__(backbone, num_classes)
26 |         self.head_bottom = SFHead(self.backbone.channels, 128 if '18' or 'MobileNet' in backbone else 256, num_classes)
27 |         self.head_top = UPerHead(in_channels=self.backbone.channels,
28 |                                  channel=32,
29 |                                  num_classes=2,
30 |                                  scales=(1, 2, 3, 6))
31 |         self.apply(self._init_weights)
32 | 
33 |     def forward(self, x: Tensor):
34 |         f_x4, f_x8, f_x16, f_x32 = self.backbone(x)
35 |         logits_bottom = self.head_bottom([f_x4, f_x8, f_x16, f_x32])  # 4x reduction in image size
36 |         logits_bottom = F.interpolate(logits_bottom, size=x.shape[2:], mode='bilinear', align_corners=True)
37 | 
38 |         if self.training:
39 |             # logits_edge = self.head_edge(f_x4, f_x8)
40 |             # logits_edge = F.interpolate(logits_edge, x.shape[-2:], mode='bilinear', align_corners=True)
41 |             logits_top = self.head_top([f_x4, f_x8, f_x16, f_x32])
42 |             logits_top = F.interpolate(logits_top, x.shape[-2:], mode='bilinear', align_corners=True)
43 |             # return torch.cat([logits_seg, logits_so], dim=1), logits_edge
44 |             return logits_bottom, logits_top, None
45 | 
46 |         return logits_bottom.contiguous()
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     model = SFNet('MobileNetV3-large')
51 |     model.train()
52 |     model.init_pretrained('../../checkpoints/backbones/mobilenet_/mobilenetv3_large.pth')
53 |     x = torch.randn(2, 3, 512, 1024)
54 |     y = model(x)
55 |     if model.training:
56 |         print(y[0].shape, y[1].shape)
57 |     else:
58 |         print(y.shape)


--------------------------------------------------------------------------------
/semseg/optimizers.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.optim import AdamW, SGD
 3 | 
 4 | # def get_optimizer(model: nn.Module, optimizer: str, lr: float, weight_decay: float = 0.01):
 5 | #
 6 | #     # 获得新添加⽹络层的参数
 7 | #     backbone_param = list(map(id, model.backbone.parameters()))
 8 | #     # 获得预训练模型的参数
 9 | #     new_param = filter(lambda p: id(p) not in backbone_param, model.parameters())
10 | #     # 定义优化器和损失函数
11 | #     params = [
12 | #         {'params': model.backbone.parameters(), 'lr': lr * 0.1},
13 | #         {'params': new_param, 'lr': lr * 0.1}
14 | #     ]
15 | #
16 | #     if optimizer == 'adamw':
17 | #         return AdamW(params, lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=weight_decay)
18 | #     else:
19 | #         return SGD(params, lr, momentum=0.9, weight_decay=weight_decay)
20 | 
21 | 
22 | def get_optimizer(model: nn.Module, optimizer: str, lr: float, weight_decay: float = 0.01):
23 |     wd_params, nwd_params = [], []
24 |     for p in model.parameters():
25 |         if p.dim() == 1:
26 |             nwd_params.append(p)
27 |         else:
28 |             wd_params.append(p)
29 | 
30 |     params = [
31 |         {"params": wd_params},
32 |         {"params": nwd_params, "weight_decay": 0},
33 |     ]
34 | 
35 |     if optimizer == 'adamw':
36 |         return AdamW(params, lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=weight_decay)
37 |     else:
38 |         return SGD(params, lr, momentum=0.9, weight_decay=weight_decay)
39 | 
40 | 
41 | # params = [
42 | #     {"params": [value] for _, value in model.sharedNet.named_parameters() if value.requires_grad},
43 | #     {"params": [value for _, value in model.cls_fc_son1.named_parameters()
44 | #                 if value.requires_grad], 'lr': args.lr * 10},
45 | #     {"params": [value for _, value in model.cls_fc_son2.named_parameters()
46 | #                 if value.requires_grad], 'lr': args.lr * 10},
47 | #     {"params": [value for _, value in model.sonnet1.named_parameters()
48 | #                 if value.requires_grad], 'lr': args.lr * 10},
49 | #     {"params": [value for _, value in model.sonnet2.named_parameters()
50 | #                 if value.requires_grad], 'lr': args.lr * 10},
51 | # ]
52 | # optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay)
53 | 
54 | 
55 | # '''
56 | # ⾸先，我们期望预训练模型的学习率是新添加⽹络层学习率的⼗分之⼀。
57 | # 具体的，筛选出新添加⽹络层的参数和预训练模型的参数分别为其配置学习率
58 | #     '''
59 | # # 获得新添加⽹络层的参数
60 | # backbone_param = list(map(id, model.backbone.parameters()))
61 | # # 获得预训练模型的参数
62 | # new_param = filter(lambda p: id(p) not in backbone_param , model.parameters())
63 | # # 定义优化器和损失函数
64 | # optimizer = torch.optim.Adam([
65 | #     {'params': backbone_param, 'lr': LR * 0.1},
66 | #     {'params': new_param}
67 | # ], lr=LR)


--------------------------------------------------------------------------------
/semseg/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/semseg/utils/__init__.py


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='semseg',
 5 |     version='0.4.0',
 6 |     description='SOTA Semantic Segmentation Models',
 7 |     url='https://github.com/sithu31296/semantic-segmentation',
 8 |     author='Sithu Aung',
 9 |     author_email='sithu31296@gmail.com',
10 |     license='MIT',
11 |     packages=find_packages(include=['semseg']),
12 |     install_requires=[
13 |         'tqdm',
14 |         'tabulate',
15 |         'numpy',
16 |         'scipy',
17 |         'matplotlib',
18 |         'tensorboard',
19 |         'fvcore',
20 |         'einops'
21 |     ]
22 | )


--------------------------------------------------------------------------------
/tools/benchmark.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import argparse
 3 | import time
 4 | from fvcore.nn import flop_count_table, FlopCountAnalysis
 5 | from semseg.models import *
 6 | 
 7 | 
 8 | def main(
 9 |     model_name: str,
10 |     backbone_name: str,
11 |     image_size: list,
12 |     num_classes: int,
13 |     device: str,
14 | ):
15 |     device = torch.device('cuda' if torch.cuda.is_available() and device == 'cuda' else 'cpu')
16 |     inputs = torch.randn(1, 3, *image_size).to(device)
17 |     model = eval(model_name)(backbone_name, num_classes)
18 |     model = model.to(device)
19 |     model.eval()
20 |     print(model.training)
21 | 
22 |     print(flop_count_table(FlopCountAnalysis(model, inputs)))
23 | 
24 |     total_time = 0.0
25 |     for _ in range(10):
26 |         tic = time.perf_counter()
27 |         model(inputs)
28 |         toc = time.perf_counter()
29 |         total_time += toc - tic
30 |     total_time /= 10
31 | 
32 |     # from semseg.utils.utils import model_summary, init_logger
33 |     #
34 |     # init_logger()
35 |     # model_summary(model, (1, 3, *image_size))
36 |     print(f"Inference time: {total_time*1000:.2f}ms")
37 |     print(f"FPS: {1/total_time}")
38 | 
39 | if __name__ == '__main__':
40 |     parser = argparse.ArgumentParser()
41 |     parser.add_argument('--model-name', type=str, default='FastSCNN')
42 |     parser.add_argument('--backbone-name', type=str, default='MobileNetV3-large')
43 |     # parser.add_argument('--backbone-name', type=str, default=None)
44 |     parser.add_argument('--image-size', type=list, default=[512, 512])
45 |     parser.add_argument('--num-classes', type=int, default=8)
46 |     parser.add_argument('--device', type=str, default='cuda')
47 |     args = parser.parse_args()
48 | 
49 |     main(args.model_name, args.backbone_name, args.image_size, args.num_classes, args.device)


--------------------------------------------------------------------------------
/tools/export.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import argparse
 3 | import yaml
 4 | import onnx
 5 | from pathlib import Path
 6 | from onnxsim import simplify
 7 | from semseg.models import *
 8 | from semseg.datasets import *
 9 | 
10 | 
11 | def export_onnx(model, inputs, file):
12 |     torch.onnx.export(
13 |         model,
14 |         inputs,
15 |         f"{cfg['TEST']['MODEL_PATH'].split('.')[0]}.onnx",
16 |         input_names=['input'],
17 |         output_names=['output'],
18 |         opset_version=13
19 |     )
20 |     onnx_model = onnx.load(f"{file}.onnx")
21 |     onnx.checker.check_model(onnx_model)
22 | 
23 |     onnx_model, check = simplify(onnx_model)
24 |     onnx.save(onnx_model, f"{file}.onnx")
25 |     assert check, "Simplified ONNX model could not be validated"
26 |     print(f"ONNX model saved to {file}.onnx")
27 | 
28 | 
29 | def export_coreml(model, inputs, file):
30 |     try:
31 |         import coremltools as ct
32 |         ts_model = torch.jit.trace(model, inputs, strict=True)
33 |         ct_model = ct.convert(
34 |             ts_model,
35 |             inputs=[ct.ImageType('image', shape=inputs.shape, scale=1/255.0, bias=[0, 0, 0])]
36 |         )
37 |         ct_model.save(f"{file}.mlmodel")
38 |         print(f"CoreML model saved to {file}.mlmodel")
39 |     except:
40 |         print("Please install coremltools to export to CoreML.\n`pip install coremltools`")
41 |     
42 | 
43 | def main(cfg):
44 |     model = eval(cfg['MODEL']['NAME'])(cfg['MODEL']['BACKBONE'], len(eval(cfg['DATASET']['NAME']).PALETTE))
45 |     model.load_state_dict(torch.load(cfg['TEST']['MODEL_PATH'], map_location='cpu'))
46 |     model.eval()
47 | 
48 |     inputs = torch.randn(1, 3, *cfg['TEST']['IMAGE_SIZE'])
49 |     file = cfg['TEST']['MODEL_PATH'].split('.')[0]
50 | 
51 |     export_onnx(model, inputs, file)
52 |     export_coreml(model, inputs, file)
53 |     print(f"Finished converting.")
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     parser = argparse.ArgumentParser()
58 |     parser.add_argument('--cfg', type=str, default='configs/helen.yaml')
59 |     args = parser.parse_args()
60 | 
61 |     with open(args.cfg) as f:
62 |         cfg = yaml.load(f, Loader=yaml.SafeLoader)
63 | 
64 |     save_dir = Path(cfg['SAVE_DIR'])
65 |     save_dir.mkdir(exist_ok=True)
66 |     
67 |     main(cfg)


--------------------------------------------------------------------------------
/tools/export_small_objects.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @Project : semantic-segmentation
 3 | @File    : export_small_objects.py
 4 | @IDE     : PyCharm
 5 | @Author  : Wang Liu
 6 | @Date    : 2023/6/20 下午4:51
 7 | @e-mail  : 1183862787@qq.com
 8 | """
 9 | 
10 | import cv2
11 | import os
12 | import numpy as np
13 | from semseg.datasets import *
14 | from torch.utils.data import DataLoader
15 | from glob import glob
16 | import argparse
17 | from tqdm import tqdm
18 | import yaml
19 | 
20 | 
21 | def filtering_image(lbl, out_path, min_area=0, max_area=1024, num_classes=11, ignore_label=255):
22 |     lbl_out = np.ones_like(lbl) * ignore_label
23 |     for _id in range(num_classes):
24 |         label = np.zeros_like(lbl)
25 |         label[lbl == _id] = 255
26 |         # 找到所有的轮廓
27 |         contours, _ = cv2.findContours(label, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
28 |         # 遍历每个轮廓
29 |         for contour in contours:
30 |             # 忽略太大的轮廓
31 |             if min_area <= cv2.contourArea(contour) <= max_area :#and cv2.contourArea(contour) >= min_area:
32 |                 # 将轮廓内部的像素设为0
33 |                 cv2.drawContours(lbl_out, [contour], 0, _id, -1)
34 |         # cv2.imshow('window', lbl_out)
35 |         # cv2.waitKey(0)
36 |     # 保存抠出小目标后的语义分割标签
37 |     cv2.imwrite(out_path, lbl_out)
38 |     # cv2.imshow('window', lbl_out)
39 |     # cv2.waitKey(0)
40 | 
41 | 
42 | def filtering_by_area(in_dir,  min_area=0, max_area=1024, num_classes=11, ignore_label=255):
43 |     out_dir = f'{in_dir}_so_{min_area}_{max_area}'
44 |     os.makedirs(out_dir, exist_ok=True)
45 |     img_paths = glob(r''+in_dir+'/*.png')
46 |     img_paths.sort()
47 |     for img_path in tqdm(img_paths):
48 |         lbl = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
49 |         out_path = os.path.join(out_dir, os.path.basename(img_path))
50 |         filtering_image(lbl, out_path=out_path, min_area=min_area, max_area=max_area, num_classes=num_classes,
51 |                         ignore_label=ignore_label)
52 |         # break
53 | 
54 | def filter_objects(in_dir, num_classes=11, ignore_label=255):
55 |     areas = [0, 1024, 4096, 16384, 65536, 1048576]
56 |     for i in range(len(areas) - 1):
57 |         filtering_by_area(in_dir, 0, areas[i + 1], num_classes, ignore_label)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     parser = argparse.ArgumentParser()
62 |     parser.add_argument('--cfg', type=str, default='configs/Bisenetv2/camvid.yaml', help='config file path')
63 |     parser.add_argument('--input-dir', type=str, default='data/CamVid/testannot', help='directory of label files')
64 |     # parser.add_argument('--max-area', type=int, default=16384, help='maximum area for objects. 1024, 4096, 16384')
65 |     args = parser.parse_args()
66 |     with open(args.cfg) as f:
67 |         cfg = yaml.load(f, Loader=yaml.SafeLoader)
68 |     datasets = eval(cfg["DATASET"]["NAME"])(cfg["DATASET"]["ROOT"], 'test')
69 |     filter_objects(in_dir=args.input_dir, #out_dir=args.input_dir + '_so' + str(args.max_area),
70 |                    num_classes=datasets.n_classes, ignore_label=datasets.ignore_label)
71 | 


--------------------------------------------------------------------------------
/tools/feature_visualization.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @Project : semantic-segmentation
 3 | @File    : visualization.py
 4 | @IDE     : PyCharm
 5 | @Author  : Wang Liu
 6 | @Date    : 2022/7/19 上午10:57
 7 | @e-mail  : 1183862787@qq.com
 8 | """
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | import cv2
13 | from torch.nn import init
14 | from torchvision import io
15 | from semseg.models.backbones.mobilenetv3 import MobileNetV3
16 | from semseg.augmentations import get_val_augmentation
17 | import numpy as np
18 | import mmcv
19 | 
20 | 
21 | def show_features(f_tensor):
22 |     f_np = np.squeeze(f_tensor.detach().cpu().numpy())
23 |     for img_gray in f_np:
24 |         cv2.imshow('feature', img_gray)
25 |         q = cv2.waitKey(0)
26 |         if q == (ord('q') & 0xff):
27 |             break
28 |     return
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     model = MobileNetV3('large')
33 |     model.load_state_dict(torch.load('../checkpoints/backbones/mobilenet/mobilenetv3_large.pth',
34 |                                      map_location='cpu'), strict=False)
35 |     model.train()
36 |     model.cuda()
37 |     aug = get_val_augmentation([2160, 3840])
38 |     # img_rgb_tensor = io.read_image('../data/UAVid2020_mm/img_dir/train/seq1_000700.png')
39 |     img_rgb_tensor = io.read_image('../assests/vaihingen_area3.png')[:-1,:,:]
40 |     # img_bgr = mmcv.imread('../assests/vaihingen_area3.png')
41 |     # img_rgb_tensor = io.read_image('../data/ISPRS_DATA/Vaihingen2/img_dir/train/area1_0_0_512_512.png')
42 |     img_rgb_tensor = aug(img_rgb_tensor, img_rgb_tensor[0:1,:,:])[0].unsqueeze(dim=0).cuda()
43 | 
44 |     # _x = torch.randn(1, 3, 512, 512)
45 |     _outs = model(img_rgb_tensor)
46 | 
47 |     for y in _outs:
48 |         show_features(y)
49 | 
50 |     from semseg.utils.utils import model_summary, init_logger
51 | 
52 |     init_logger()
53 |     model_summary(model, (1, 3, 224, 224))
54 | 
55 |     # from fvcore.nn import flop_count_table, FlopCountAnalysis
56 |     # print(flop_count_table(FlopCountAnalysis(model, _x.cuda())))
57 | 


--------------------------------------------------------------------------------
/tools/infer_single.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @Project : semantic-segmentation 
 3 | @File    : infer_single.py
 4 | @IDE     : PyCharm 
 5 | @Author  : Wang Liu
 6 | @Date    : 2022/10/14 下午9:26
 7 | @e-mail  : 1183862787@qq.com
 8 | """
 9 | 
10 | 
11 | import os
12 | import argparse
13 | import yaml
14 | from torchvision import io
15 | from tools.infer import SemSeg
16 | from PIL import Image
17 | import numpy as np
18 | import cv2
19 | from semseg.datasets import *
20 | 
21 | def overlay_gt(dataset, img_path, lbl_path, save_dir, overlay=False, img_ratio=0.3):
22 |     img = Image.open(img_path)
23 |     lbl = Image.open(lbl_path).convert('P')
24 |     colormap = dataset.PALETTE.numpy().astype(np.uint8)
25 |     lbl.putpalette(colormap.flatten())
26 |     lbl = lbl.convert('RGB')
27 |     if overlay:
28 |         img = (np.array(img) * img_ratio) + (np.array(lbl) * (1 - img_ratio))
29 |         img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR)
30 |         cv2.imwrite(os.path.join(save_dir, f"{str(os.path.basename(img_path))}"), img)
31 |     else:
32 |         lbl.save(os.path.join(save_dir, f"{str(os.path.basename(img_path))}"))
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     parser = argparse.ArgumentParser()
37 |     parser.add_argument('--img_path', type=str, default='data/CamVid/test/0001TP_008550.png')
38 |     parser.add_argument('--cfg', type=str,
39 |         default='output_ablation/deeplabv3plus/camvid/DeeplabV3Plus_CamVid_hier_soem_59.41_38.53_71.33/config.yaml')
40 |     parser.add_argument('--overlay', type=str, default=True)
41 |     parser.add_argument('--gt', type=str, default=False)    # if generate gt from label
42 |     parser.add_argument('--ratio', type=float, default=0.3)
43 |     args = parser.parse_args()
44 | 
45 |     with open(args.cfg) as f:
46 |         cfg = yaml.load(f, Loader=yaml.SafeLoader)
47 | 
48 |     test_file = os.path.join(args.img_path)
49 |     save_dir = f'./vis_results/{cfg["MODEL"]["NAME"]}'
50 |     os.makedirs(save_dir, exist_ok=True)
51 | 
52 |     semseg = SemSeg(cfg)
53 | 
54 |     print(f'Inferencing {test_file} by {cfg["MODEL"]["NAME"]}...')
55 |     segmap = semseg.predict(str(test_file), args.overlay, args.ratio)
56 |     io.write_png(segmap, os.path.join(save_dir, f"{str(os.path.basename(test_file))}"))
57 | 
58 |     trainset = eval(cfg['DATASET']['NAME'])(cfg['DATASET']['ROOT'], 'train', None)
59 | 
60 |     import shutil
61 |     if args.gt:
62 |         gt_dir = os.path.join(save_dir, '..', 'gt')
63 |         os.makedirs(gt_dir, exist_ok=True)
64 |         shutil.copy(args.img_path, os.path.join(gt_dir, f"{str(os.path.basename(args.img_path))}.img.png"))
65 |         if cfg['DATASET']['NAME'] == 'ISAID':
66 |             overlay_gt(trainset, args.img_path,
67 |                        args.img_path.replace('img_dir', 'ann_dir').replace('.png', '_instance_color_RGB.png'),
68 |                        gt_dir, True)
69 |         elif cfg['DATASET']['NAME'] == 'CamVid':
70 |             overlay_gt(trainset, args.img_path,
71 |                        args.img_path.replace('test/', 'testannot/'),
72 |                        gt_dir, True, img_ratio=args.ratio)
73 |         elif cfg['DATASET']['NAME'] == 'UAVid':
74 |             overlay_gt(trainset, args.img_path,
75 |                        args.img_path.replace('img_dir/', 'ann_dir/'),
76 |                        gt_dir, True, img_ratio=args.ratio)


--------------------------------------------------------------------------------
/tools/submit/uavid_submit.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @Project : semantic-segmentation 
 3 | @File    : uavid_submit.py
 4 | @IDE     : PyCharm 
 5 | @Author  : Wang Liu
 6 | @Date    : 2022/6/8 下午8:39
 7 | @e-mail  : 1183862787@qq.com
 8 | """
 9 | 
10 | import os
11 | import os.path as osp
12 | import shutil
13 | 
14 | 
15 | def transfer(dir_path='../../output/test_results', out_dir='./uavid_submit'):
16 |     os.makedirs(out_dir, exist_ok=True)
17 |     imgs = os.listdir(dir_path)
18 |     for img_name in imgs:
19 |         if img_name.endswith('.png'):
20 |             seq_dir, basename = img_name.split('_')
21 |             seq_dir = osp.join(out_dir, seq_dir, 'Labels')
22 |             os.makedirs(seq_dir, exist_ok=True)
23 |             shutil.copy(osp.join(dir_path, img_name), osp.join(seq_dir, basename))
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     # transfer(dir_path='../../output/test_results', out_dir='./submit_SOSNet_mbv3l_soa_epoch100')
28 |     transfer(dir_path='../../output_ablation/UperNet/uavid2020/test_results', out_dir='./UperNet')
29 | 


--------------------------------------------------------------------------------