├── .gitignore ├── LICENSE ├── README.md ├── ablation_ratio.sh ├── assests ├── improve.png └── overview.png ├── checkpoints └── backbones │ ├── mit │ └── place mit-b0.pth here │ └── mobilenet_ │ └── place mobilenetv3.pth here ├── configs ├── BiSeNetV2 │ ├── camvid.yaml │ ├── uavid2020.yaml │ └── vaihingen.yaml ├── Deeplabv3plus │ ├── camvid_mbv3l.yaml │ ├── uavid2020_mbv3l.yaml │ └── vaihingen_mbv3l.yaml ├── PIDNet │ ├── camvid_S.yaml │ ├── camvid_S_focal.yaml │ ├── camvid_S_ohem.yaml │ ├── uavid2020_S.yaml │ ├── vaihingen_S.yaml │ ├── vaihingen_S_focal.yaml │ └── vaihingen_S_ohem.yaml ├── SegFormer │ ├── camvid_mitb0.yaml │ ├── camvid_mitb0_focal.yaml │ ├── camvid_mitb0_ohem.yaml │ ├── uavid2020_mitb0.yaml │ ├── vaihingen_mitb0.yaml │ ├── vaihingen_mitb0_focal.yaml │ ├── vaihingen_mitb0_ohem.yaml │ └── vaihingen_mitb3.yaml ├── TopFormer │ ├── camvid_B.yaml │ ├── camvid_B_focal.yaml │ ├── camvid_B_ohem.yaml │ ├── uavid2020_B.yaml │ ├── vaihingen_B.yaml │ ├── vaihingen_B_focal.yaml │ └── vaihingen_B_ohem.yaml ├── UperNet │ ├── camvid_mbv3l.yaml │ ├── uavid2020_mbv3l.yaml │ └── vaihingen_mbv3l.yaml ├── deeplabv3plus │ ├── camvid_mbv3l.yaml │ ├── uavid2020_mbv3l.yaml │ └── vaihingen_mbv3l.yaml └── segformer │ ├── camvid_mitb0.yaml │ ├── uavid2020_mitb0.yaml │ ├── vaihingen_mitb0.yaml │ └── vaihingen_mitb3.yaml ├── semseg ├── __init__.py ├── augmentations.py ├── datasets │ ├── __init__.py │ ├── ade20k.py │ ├── aeroscapes.py │ ├── atr.py │ ├── camvid.py │ ├── celebamaskhq.py │ ├── cihp.py │ ├── cityscapes.py │ ├── cocostuff.py │ ├── facesynthetics.py │ ├── helen.py │ ├── htht2022.py │ ├── ibugmask.py │ ├── isaid.py │ ├── lapa.py │ ├── lip.py │ ├── mapillary.py │ ├── mhpv1.py │ ├── mhpv2.py │ ├── pascalcontext.py │ ├── suim.py │ ├── sunrgbd.py │ ├── uavid2020.py │ ├── udd6.py │ └── vaihingen.py ├── losses.py ├── metrics.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── convnext.py │ │ ├── micronet.py │ │ ├── mit.py │ │ ├── mobilenetv2.py │ │ ├── mobilenetv3.py │ │ ├── mobilenetv3_.py │ │ ├── poolformer.py │ │ ├── pvt.py │ │ ├── resnet.py │ │ ├── resnetd.py │ │ ├── rest.py │ │ ├── topformer.py │ │ └── uniformer.py │ ├── base.py │ ├── bisenetv1.py │ ├── bisenetv2.py │ ├── bisenetv2_ss.py │ ├── ccnet.py │ ├── custom_cnn.py │ ├── custom_vit.py │ ├── ddrnet.py │ ├── ddrnet_official.py │ ├── deeplabv3plus.py │ ├── fast_scnn.py │ ├── fchardnet.py │ ├── heads │ │ ├── __init__.py │ │ ├── condnet.py │ │ ├── fapn.py │ │ ├── fcn.py │ │ ├── fpn.py │ │ ├── lawin.py │ │ ├── segformer.py │ │ ├── sfnet.py │ │ └── upernet.py │ ├── lawin.py │ ├── layers │ │ ├── __init__.py │ │ ├── common.py │ │ └── initialize.py │ ├── modules │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── auxiliary.py │ │ ├── cc_attention.py │ │ ├── dfem.py │ │ ├── ppm.py │ │ ├── psa.py │ │ └── sos.py │ ├── pidnet.py │ ├── pspnet.py │ ├── segformer.py │ ├── sfnet.py │ ├── sosnet.py │ ├── sosnet_ablation.py │ ├── topformer.py │ └── upernet.py ├── optimizers.py ├── schedulers.py └── utils │ ├── __init__.py │ ├── utils.py │ └── visualize.py ├── setup.py └── tools ├── benchmark.py ├── convert_datasets ├── convert_camvid.py ├── convert_uavid.py └── convert_vaihingen.py ├── export.py ├── export_small_objects.py ├── feature_visualization.py ├── infer.py ├── infer_single.py ├── submit └── uavid_submit.py ├── train.py ├── train_sosnet.py └── val.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 sithu3 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ablation_ratio.sh: -------------------------------------------------------------------------------- 1 | 2 | python tools/benchmark.py --model-name DeeplabV3Plus --backbone-name MobileNetV3-large 3 | python tools/benchmark.py --model-name SOSNet --backbone-name MobileNetV3-large 4 | python tools/benchmark.py --model-name SegFormer --backbone-name MiT-B0 5 | 6 | 7 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.0 8 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.1 9 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.2 10 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.3 11 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.4 12 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.5 13 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.6 14 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.7 15 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.8 16 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 0.9 17 | python tools/train_sosnet.py --cfg configs/UperNet/vaihingen_mbv3l.yaml --hier false --soem true --ratio 1.0 18 | -------------------------------------------------------------------------------- /assests/improve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/assests/improve.png -------------------------------------------------------------------------------- /assests/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/assests/overview.png -------------------------------------------------------------------------------- /checkpoints/backbones/mit/place mit-b0.pth here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/checkpoints/backbones/mit/place mit-b0.pth here -------------------------------------------------------------------------------- /checkpoints/backbones/mobilenet_/place mobilenetv3.pth here: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/checkpoints/backbones/mobilenet_/place mobilenetv3.pth here -------------------------------------------------------------------------------- /configs/BiSeNetV2/camvid.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/Bisenetv2/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : BiSeNetv2 # name of the model you are using 6 | BACKBONE : # model variant 7 | PRETRAINED : checkpoints/backbones/bisenetv2/bisenetv2.pth # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [352, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [352, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [352, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/BiSeNetV2/uavid2020.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/Bisenetv2/uavid2020' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : BiSeNetv2 # name of the model you are using 6 | BACKBONE : # model variant 7 | PRETRAINED : checkpoints/backbones/bisenetv2/bisenetv2.pth # backbone model's weight 8 | 9 | DATASET: 10 | NAME : UAVid2020 # dataset name to be trained 11 | ROOT : 'data/UAVid2020' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [1024, 1920] # training image size in (h, w) 18 | BATCH_SIZE : 4 # batch size used to train 19 | EPOCHS : 100 # number of epochs to train 20 | EVAL_INTERVAL : 10 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 5 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.01 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 43 | IMAGE_SIZE : [2160, 3840] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 52 | FILE : 'data/UAVid2020/img_dir/test' # filename or foldername 53 | IMAGE_SIZE : [2160, 3840] # inference image size in (h, w) 54 | OVERLAY : false # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/BiSeNetV2/vaihingen.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/Bisenetv2/Vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : BiSeNetv2 # name of the model you are using 6 | BACKBONE : # model variant 7 | PRETRAINED : checkpoints/backbones/bisenetv2/bisenetv2.pth # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/Deeplabv3plus/camvid_mbv3l.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/deeplabv3plus/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : DeeplabV3Plus # name of the model you are using 6 | BACKBONE : MobileNetV3-large # model variant 7 | PRETRAINED : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/Deeplabv3plus/uavid2020_mbv3l.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:4 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/deeplabv3plus/uavid2020' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : DeeplabV3Plus # name of the model you are using 6 | BACKBONE : MobileNetV3-large # model variant 7 | PRETRAINED : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : UAVid2020 # dataset name to be trained 11 | ROOT : 'data/UAVid2020' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [1024, 1920] # training image size in (h, w) 18 | BATCH_SIZE : 4 # batch size used to train 19 | EPOCHS : 100 # number of epochs to train 20 | EVAL_INTERVAL : 10 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 5 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.01 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 43 | IMAGE_SIZE : [2160, 3840] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 52 | FILE : 'data/UAVid2020/img_dir/test' # filename or foldername 53 | IMAGE_SIZE : [2160, 3840] # inference image size in (h, w) 54 | OVERLAY : false # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/Deeplabv3plus/vaihingen_mbv3l.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/deeplabv3plus/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : DeeplabV3Plus # name of the model you are using 6 | BACKBONE : MobileNetV3-large # model variant 7 | PRETRAINED : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : true # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/deeplabv3plus_vaihingen.pth' # trained model file path 43 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/deeplabv3plus_vaihingen.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/PIDNet/camvid_S.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/PIDNet/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : PIDNet # name of the model you are using 6 | BACKBONE : PIDNet-S # model variant 7 | PRETRAINED : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/PIDNet/camvid_S_focal.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/PIDNet/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : PIDNet # name of the model you are using 6 | BACKBONE : PIDNet-S # model variant 7 | PRETRAINED : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : Focal # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/PIDNet/camvid_S_ohem.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/PIDNet/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : PIDNet # name of the model you are using 6 | BACKBONE : PIDNet-S # model variant 7 | PRETRAINED : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : OhemCrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/PIDNet/uavid2020_S.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/PIDNet/uavid2020' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : PIDNet # name of the model you are using 6 | BACKBONE : PIDNet-S # model variant 7 | PRETRAINED : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : UAVid2020 # dataset name to be trained 11 | ROOT : 'data/UAVid2020' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [1024, 1920] # training image size in (h, w) 18 | BATCH_SIZE : 4 # batch size used to train 19 | EPOCHS : 100 # number of epochs to train 20 | EVAL_INTERVAL : 10 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 5 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.01 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 43 | IMAGE_SIZE : [2160, 3840] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 52 | FILE : 'data/UAVid2020/img_dir/test' # filename or foldername 53 | IMAGE_SIZE : [2160, 3840] # inference image size in (h, w) 54 | OVERLAY : false # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/PIDNet/vaihingen_S.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/PIDNet/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : PIDNet # name of the model you are using 6 | BACKBONE : PIDNet-S # model variant 7 | PRETRAINED : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/PIDNet/vaihingen_S_focal.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/PIDNet/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : PIDNet # name of the model you are using 6 | BACKBONE : PIDNet-S # model variant 7 | PRETRAINED : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : Focal # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/PIDNet/vaihingen_S_ohem.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/PIDNet/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : PIDNet # name of the model you are using 6 | BACKBONE : PIDNet-S # model variant 7 | PRETRAINED : 'checkpoints/backbones/pidnet/PIDNet_S_ImageNet.pth.tar' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : OhemCrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/SegFormer/camvid_mitb0.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/SegFormer/camvid_mitb0_focal.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : Focal # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/SegFormer/camvid_mitb0_ohem.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : OhemCrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/SegFormer/uavid2020_mitb0.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:5 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/uavid2020' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : UAVid2020 # dataset name to be trained 11 | ROOT : 'data/UAVid2020' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [1000, 1800] # training image size in (h, w) 18 | BATCH_SIZE : 2 # batch size used to train 19 | EPOCHS : 100 # number of epochs to train 20 | EVAL_INTERVAL : 10 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 5 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.01 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 43 | IMAGE_SIZE : [1080, 1920] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 52 | FILE : 'data/UAVid2020/img_dir/test' # filename or foldername 53 | IMAGE_SIZE : [1728, 3072] # inference image size in (h, w) 54 | OVERLAY : false # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/SegFormer/vaihingen_mitb0.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : true # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/SegFormer/vaihingen_mitb0_focal.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : Focal # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : true # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/SegFormer/vaihingen_mitb0_ohem.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : OhemCrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : true # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/SegFormer/vaihingen_mitb3.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B3 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b3.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 8 # batch size used to train 19 | EPOCHS : 2000 # number of epochs to train 20 | EVAL_INTERVAL : 40 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | 24 | LOSS: 25 | NAME : OhemCrossEntropy # loss function name (ohemce, ce, dice) 26 | CLS_WEIGHTS : true # use class weights in loss calculation 27 | 28 | OPTIMIZER: 29 | NAME : adamw # optimizer name 30 | LR : 0.001 # initial learning rate used in optimizer 31 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 32 | 33 | SCHEDULER: 34 | NAME : warmuppolylr # scheduler name 35 | POWER : 0.9 # scheduler power 36 | WARMUP : 10 # warmup epochs used in scheduler 37 | WARMUP_RATIO : 0.1 # warmup ratio 38 | 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SegFormer_MiT-B3_Vaihingen.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | 48 | 49 | TEST: 50 | MODEL_PATH : 'checkpoints/pretrained/mit/mit_b3_vaihingen.pth' # trained model file path 51 | FILE : 'assests/cityscapes' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/TopFormer/camvid_B.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/TopFormer/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : TopFormer # name of the model you are using 6 | BACKBONE : TokenPyramidTransformer-B # model variant 7 | PRETRAINED : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/TopFormer/camvid_B_focal.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/TopFormer/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : TopFormer # name of the model you are using 6 | BACKBONE : TokenPyramidTransformer-B # model variant 7 | PRETRAINED : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : Focal # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/TopFormer/camvid_B_ohem.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/TopFormer/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : TopFormer # name of the model you are using 6 | BACKBONE : TokenPyramidTransformer-B # model variant 7 | PRETRAINED : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : OhemCrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/TopFormer/uavid2020_B.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/TopFormer/uavid2020' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : TopFormer # name of the model you are using 6 | BACKBONE : TokenPyramidTransformer-B # model variant 7 | PRETRAINED : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : UAVid2020 # dataset name to be trained 11 | ROOT : 'data/UAVid2020' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [1024, 1920] # training image size in (h, w) 18 | BATCH_SIZE : 4 # batch size used to train 19 | EPOCHS : 100 # number of epochs to train 20 | EVAL_INTERVAL : 10 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 5 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.01 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 43 | IMAGE_SIZE : [2160, 3840] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 52 | FILE : 'data/UAVid2020/img_dir/test' # filename or foldername 53 | IMAGE_SIZE : [2160, 3840] # inference image size in (h, w) 54 | OVERLAY : false # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/TopFormer/vaihingen_B.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/TopFormer/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : TopFormer # name of the model you are using 6 | BACKBONE : TokenPyramidTransformer-B # model variant 7 | PRETRAINED : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/TopFormer/vaihingen_B_focal.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/TopFormer/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : TopFormer # name of the model you are using 6 | BACKBONE : TokenPyramidTransformer-B # model variant 7 | PRETRAINED : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : Focal # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/TopFormer/vaihingen_B_ohem.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/TopFormer/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : TopFormer # name of the model you are using 6 | BACKBONE : TokenPyramidTransformer-B # model variant 7 | PRETRAINED : 'checkpoints/backbones/topformer/topformer-B-224-75.3.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : OhemCrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/UperNet/camvid_mbv3l.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:2 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/UperNet/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : UperNet # name of the model you are using 6 | BACKBONE : MobileNetV3-large # model variant 7 | PRETRAINED : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/UperNet/uavid2020_mbv3l.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:1 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/UperNet/uavid2020' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : UperNet # name of the model you are using 6 | BACKBONE : MobileNetV3-large # model variant 7 | PRETRAINED : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : UAVid2020 # dataset name to be trained 11 | ROOT : 'data/UAVid2020' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [1024, 1920] # training image size in (h, w) 18 | BATCH_SIZE : 4 # batch size used to train 19 | EPOCHS : 100 # number of epochs to train 20 | EVAL_INTERVAL : 10 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 5 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.01 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 43 | IMAGE_SIZE : [2160, 3840] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 52 | FILE : 'data/UAVid2020/img_dir/test' # filename or foldername 53 | IMAGE_SIZE : [2160, 3840] # inference image size in (h, w) 54 | OVERLAY : false # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/UperNet/vaihingen_mbv3l.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:7 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/UperNet/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : UperNet # name of the model you are using 6 | BACKBONE : MobileNetV3-large # model variant 7 | PRETRAINED : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/deeplabv3plus/camvid_mbv3l.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/deeplabv3plus/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : DeeplabV3Plus # name of the model you are using 6 | BACKBONE : MobileNetV3-large # model variant 7 | PRETRAINED : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/deeplabv3plus/uavid2020_mbv3l.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:4 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/deeplabv3plus/uavid2020' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : DeeplabV3Plus # name of the model you are using 6 | BACKBONE : MobileNetV3-large # model variant 7 | PRETRAINED : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : UAVid2020 # dataset name to be trained 11 | ROOT : 'data/UAVid2020' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [1024, 1920] # training image size in (h, w) 18 | BATCH_SIZE : 4 # batch size used to train 19 | EPOCHS : 100 # number of epochs to train 20 | EVAL_INTERVAL : 10 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 5 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.01 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 43 | IMAGE_SIZE : [2160, 3840] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 52 | FILE : 'data/UAVid2020/img_dir/test' # filename or foldername 53 | IMAGE_SIZE : [2160, 3840] # inference image size in (h, w) 54 | OVERLAY : false # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/deeplabv3plus/vaihingen_mbv3l.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/deeplabv3plus/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : DeeplabV3Plus # name of the model you are using 6 | BACKBONE : MobileNetV3-large # model variant 7 | PRETRAINED : 'checkpoints/backbones/mobilenet_/mobilenetv3_large.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : true # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/deeplabv3plus_vaihingen.pth' # trained model file path 43 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/deeplabv3plus_vaihingen.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/segformer/camvid_mitb0.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:0 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/camvid' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : CamVid # dataset name to be trained with (camvid, cityscapes, ade20k) 11 | ROOT : 'data/CamVid' # dataset root path 12 | IGNORE_LABEL : 11 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [360, 480] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 968 # number of epochs to train 20 | EVAL_INTERVAL : 50 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 43 | IMAGE_SIZE : [360, 480] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/sosnet_resnet18_camvid.pth' # trained model file path 52 | FILE : 'assests/camvid' # filename or foldername 53 | IMAGE_SIZE : [360, 480] # inference image size in (h, w) 54 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/segformer/uavid2020_mitb0.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda:5 # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/uavid2020' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : UAVid2020 # dataset name to be trained 11 | ROOT : 'data/UAVid2020' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : false 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [1000, 1800] # training image size in (h, w) 18 | BATCH_SIZE : 2 # batch size used to train 19 | EPOCHS : 100 # number of epochs to train 20 | EVAL_INTERVAL : 10 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : false # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 5 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.01 # warmup ratio 39 | 40 | 41 | EVAL: 42 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 43 | IMAGE_SIZE : [1080, 1920] # evaluation image size in (h, w) 44 | MSF: 45 | ENABLE : false # multi-scale and flip evaluation 46 | FLIP : true # use flip in evaluation 47 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 48 | TRAIN_SET : false 49 | 50 | TEST: 51 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_UAVid2020_best.pth' # trained model file path 52 | FILE : 'data/UAVid2020/img_dir/test' # filename or foldername 53 | IMAGE_SIZE : [1728, 3072] # inference image size in (h, w) 54 | OVERLAY : false # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/segformer/vaihingen_mitb0.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output_ablation/segformer/vaihingen' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B0 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b0.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 5 # ignore_label for back propagation(loss) and aug filling 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 32 # batch size used to train 19 | EPOCHS : 500 # number of epochs to train 20 | EVAL_INTERVAL : 25 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | MAX_INERITER : 999999 # max iteration for each epoch 24 | 25 | LOSS: 26 | NAME : CrossEntropy # loss function name (ohemce, ce, dice) 27 | CLS_WEIGHTS : true # use class weights in loss calculation 28 | 29 | OPTIMIZER: 30 | NAME : adamw # optimizer name 31 | LR : 0.001 # initial learning rate used in optimizer 32 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 33 | 34 | SCHEDULER: 35 | NAME : warmuppolylr # scheduler name 36 | POWER : 0.9 # scheduler power 37 | WARMUP : 10 # warmup epochs used in scheduler 38 | WARMUP_RATIO : 0.1 # warmup ratio 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | TRAIN_SET : false 48 | 49 | TEST: 50 | MODEL_PATH : 'output/SOSNet_MobileNetV3-large_Vaihingen_best.pth' # trained model file path 51 | FILE : 'assests/camvid' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /configs/segformer/vaihingen_mitb3.yaml: -------------------------------------------------------------------------------- 1 | DEVICE : cuda # device used for training and evaluation (cpu, cuda, cuda0, cuda1, ...) 2 | SAVE_DIR : 'output' # output folder name used for saving the model, logs and inference results 3 | 4 | MODEL: 5 | NAME : SegFormer # name of the model you are using 6 | BACKBONE : MiT-B3 # model variant 7 | PRETRAINED : 'checkpoints/backbones/mit/mit_b3.pth' # backbone model's weight 8 | 9 | DATASET: 10 | NAME : Vaihingen # dataset name to be trained 11 | ROOT : 'data/ISPRS_DATA/Vaihingen2' # dataset root path 12 | IGNORE_LABEL : 255 13 | H_FLIP : true 14 | V_FLIP : true 15 | 16 | TRAIN: 17 | IMAGE_SIZE : [512, 512] # training image size in (h, w) 18 | BATCH_SIZE : 8 # batch size used to train 19 | EPOCHS : 2000 # number of epochs to train 20 | EVAL_INTERVAL : 40 # evaluation interval during training 21 | AMP : false # use AMP in training 22 | DDP : false # use DDP training 23 | 24 | LOSS: 25 | NAME : OhemCrossEntropy # loss function name (ohemce, ce, dice) 26 | CLS_WEIGHTS : true # use class weights in loss calculation 27 | 28 | OPTIMIZER: 29 | NAME : adamw # optimizer name 30 | LR : 0.001 # initial learning rate used in optimizer 31 | WEIGHT_DECAY : 0.01 # decay rate used in optimizer 32 | 33 | SCHEDULER: 34 | NAME : warmuppolylr # scheduler name 35 | POWER : 0.9 # scheduler power 36 | WARMUP : 10 # warmup epochs used in scheduler 37 | WARMUP_RATIO : 0.1 # warmup ratio 38 | 39 | 40 | EVAL: 41 | MODEL_PATH : 'output/SegFormer_MiT-B3_Vaihingen.pth' # trained model file path 42 | IMAGE_SIZE : [512, 512] # evaluation image size in (h, w) 43 | MSF: 44 | ENABLE : false # multi-scale and flip evaluation 45 | FLIP : true # use flip in evaluation 46 | SCALES : [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] # scales used in MSF evaluation 47 | 48 | 49 | TEST: 50 | MODEL_PATH : 'checkpoints/pretrained/mit/mit_b3_vaihingen.pth' # trained model file path 51 | FILE : 'assests/cityscapes' # filename or foldername 52 | IMAGE_SIZE : [512, 512] # inference image size in (h, w) 53 | OVERLAY : true # save the overlay result (image_alpha+label_alpha) -------------------------------------------------------------------------------- /semseg/__init__.py: -------------------------------------------------------------------------------- 1 | from tabulate import tabulate 2 | from semseg import models 3 | from semseg import datasets 4 | from semseg.models import backbones, heads 5 | 6 | 7 | def show_models(): 8 | model_names = models.__all__ 9 | numbers = list(range(1, len(model_names)+1)) 10 | print(tabulate({'No.': numbers, 'Model Names': model_names}, headers='keys')) 11 | 12 | 13 | def show_backbones(): 14 | backbone_names = backbones.__all__ 15 | variants = [] 16 | for name in backbone_names: 17 | try: 18 | variants.append(list(eval(f"backbones.{name.lower()}_settings").keys())) 19 | except: 20 | variants.append('-') 21 | print(tabulate({'Backbone Names': backbone_names, 'Variants': variants}, headers='keys')) 22 | 23 | 24 | def show_heads(): 25 | head_names = heads.__all__ 26 | numbers = list(range(1, len(head_names)+1)) 27 | print(tabulate({'No.': numbers, 'Heads': head_names}, headers='keys')) 28 | 29 | 30 | def show_datasets(): 31 | dataset_names = datasets.__all__ 32 | numbers = list(range(1, len(dataset_names)+1)) 33 | print(tabulate({'No.': numbers, 'Datasets': dataset_names}, headers='keys')) 34 | -------------------------------------------------------------------------------- /semseg/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .ade20k import ADE20K 2 | from .camvid import CamVid 3 | from .cityscapes import CityScapes 4 | from .pascalcontext import PASCALContext 5 | from .cocostuff import COCOStuff 6 | from .sunrgbd import SunRGBD 7 | from .mapillary import MapillaryVistas 8 | from .mhpv1 import MHPv1 9 | from .mhpv2 import MHPv2 10 | from .lip import LIP 11 | from .cihp import CIHP, CCIHP 12 | from .atr import ATR 13 | from .suim import SUIM 14 | from .helen import HELEN 15 | from .lapa import LaPa 16 | from .ibugmask import iBugMask 17 | from .celebamaskhq import CelebAMaskHQ 18 | from .facesynthetics import FaceSynthetics 19 | # ################################# 20 | from .vaihingen import Vaihingen 21 | from .uavid2020 import UAVid2020 22 | from .isaid import ISAID 23 | from .udd6 import UDD6 24 | from .htht2022 import HTHT2022Coarse 25 | from .aeroscapes import Aeroscapes 26 | # ################################# 27 | 28 | 29 | __all__ = [ 30 | 'CamVid', 31 | 'CityScapes', 32 | 'ADE20K', 33 | 'MHPv1', 34 | 'MHPv2', 35 | 'LIP', 36 | 'CIHP', 37 | 'CCIHP', 38 | 'ATR', 39 | 'PASCALContext', 40 | 'COCOStuff', 41 | 'SUIM', 42 | 'SunRGBD', 43 | 'MapillaryVistas', 44 | 'HELEN', 45 | 'LaPa', 46 | 'iBugMask', 47 | 'CelebAMaskHQ', 48 | 'FaceSynthetics', 49 | # ################# 50 | 'Vaihingen', 51 | 'UAVid2020', 52 | 'ISAID', 53 | 'UDD6', 54 | 'HTHT2022Coarse', 55 | 'Aeroscapes', 56 | # ################# 57 | ] 58 | -------------------------------------------------------------------------------- /semseg/datasets/atr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | 8 | 9 | class ATR(Dataset): 10 | """Single Person Fashion Dataset 11 | https://openaccess.thecvf.com/content_iccv_2015/papers/Liang_Human_Parsing_With_ICCV_2015_paper.pdf 12 | 13 | https://github.com/lemondan/HumanParsing-Dataset 14 | num_classes: 17+background 15 | 16000 train images 16 | 700 val images 17 | 1000 test images with labels 18 | """ 19 | CLASSES = ['background', 'hat', 'hair', 'sunglass', 'upper-clothes', 'skirt', 'pants', 'dress', 'belt', 'left-shoe', 'right-shoe', 'face', 'left-leg', 'right-leg', 'left-arm', 'right-arm', 'bag', 'scarf'] 20 | PALETTE = torch.tensor([[0, 0, 0], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0], [51, 85, 127], [0, 127, 0], [0, 0, 254], [50, 169, 220], [0, 254, 254], [84, 254, 169], [169, 254, 84]]) 21 | 22 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 23 | super().__init__() 24 | assert split in ['train', 'val', 'test'] 25 | self.transform = transform 26 | self.n_classes = len(self.CLASSES) 27 | self.ignore_label = 255 28 | 29 | img_path = Path(root) / 'humanparsing' / 'JPEGImages' 30 | self.files = list(img_path.glob('*.jpg')) 31 | if split == 'train': 32 | self.files = self.files[:16000] 33 | elif split == 'val': 34 | self.files = self.files[16000:16700] 35 | else: 36 | self.files = self.files[16700:17700] 37 | 38 | if not self.files: 39 | raise Exception(f"No images found in {img_path}") 40 | print(f"Found {len(self.files)} {split} images.") 41 | 42 | def __len__(self) -> int: 43 | return len(self.files) 44 | 45 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 46 | img_path = str(self.files[index]) 47 | lbl_path = str(self.files[index]).replace('JPEGImages', 'SegmentationClassAug').replace('.jpg', '.png') 48 | 49 | image = io.read_image(img_path) 50 | label = io.read_image(lbl_path) 51 | 52 | if self.transform: 53 | image, label = self.transform(image, label) 54 | return image, label.squeeze().long() 55 | 56 | 57 | if __name__ == '__main__': 58 | from semseg.utils.visualize import visualize_dataset_sample 59 | visualize_dataset_sample(ATR, '/home/sithu/datasets/LIP/ATR') -------------------------------------------------------------------------------- /semseg/datasets/celebamaskhq.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | from torchvision import transforms as T 8 | 9 | 10 | class CelebAMaskHQ(Dataset): 11 | CLASSES = [ 12 | 'background', 'skin', 'nose', 'eye_g', 'l_eye', 'r_eye', 'l_brow', 'r_brow', 'l_ear', 13 | 'r_ear', 'mouth', 'u_lip', 'l_lip', 'hair', 'hat', 'ear_r', 'neck_l', 'neck', 'cloth' 14 | ] 15 | PALETTE = torch.tensor([ 16 | [0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], 17 | [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0] 18 | ]) 19 | 20 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 21 | super().__init__() 22 | assert split in ['train', 'val', 'test'] 23 | self.root = Path(root) 24 | self.transform = transform 25 | self.n_classes = len(self.CLASSES) 26 | self.ignore_label = 255 27 | self.resize = T.Resize((512, 512)) 28 | 29 | with open(self.root / f'{split}_list.txt') as f: 30 | self.files = f.read().splitlines() 31 | 32 | if not self.files: 33 | raise Exception(f"No images found in {root}") 34 | print(f"Found {len(self.files)} {split} images.") 35 | 36 | def __len__(self) -> int: 37 | return len(self.files) 38 | 39 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 40 | img_path = self.root / 'CelebA-HQ-img' / f"{self.files[index]}.jpg" 41 | lbl_path = self.root / 'CelebAMask-HQ-label' / f"{self.files[index]}.png" 42 | image = io.read_image(str(img_path)) 43 | image = self.resize(image) 44 | label = io.read_image(str(lbl_path)) 45 | 46 | if self.transform: 47 | image, label = self.transform(image, label) 48 | return image, label.squeeze().long() 49 | 50 | 51 | if __name__ == '__main__': 52 | from semseg.utils.visualize import visualize_dataset_sample 53 | visualize_dataset_sample(CelebAMaskHQ, '/home/sithu/datasets/CelebAMask-HQ') -------------------------------------------------------------------------------- /semseg/datasets/cihp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | 8 | 9 | class CIHP(Dataset): 10 | """This has Best Human Parsing Labels 11 | num_classes: 19+background 12 | 28280 train images 13 | 5000 val images 14 | """ 15 | CLASSES = ['background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', 'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', 'face', 'left-arm', 'right-arm', 'left-leg', 'right-leg', 'left-shoe', 'right-shoe'] 16 | PALETTE = torch.tensor([[120, 120, 120], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0], [51, 85, 127], [0, 127, 0], [0, 0, 254], [50, 169, 220], [0, 254, 254], [84, 254, 169], [169, 254, 84], [254, 254, 0], [254, 169, 0]]) 17 | 18 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 19 | super().__init__() 20 | assert split in ['train', 'val'] 21 | split = 'Training' if split == 'train' else 'Validation' 22 | self.transform = transform 23 | self.n_classes = len(self.CLASSES) 24 | self.ignore_label = 255 25 | 26 | img_path = Path(root) / 'instance-level_human_parsing' / split / 'Images' 27 | self.files = list(img_path.glob('*.jpg')) 28 | 29 | if not self.files: 30 | raise Exception(f"No images found in {img_path}") 31 | print(f"Found {len(self.files)} {split} images.") 32 | 33 | def __len__(self) -> int: 34 | return len(self.files) 35 | 36 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 37 | img_path = str(self.files[index]) 38 | lbl_path = str(self.files[index]).replace('Images', 'Category_ids').replace('.jpg', '.png') 39 | 40 | image = io.read_image(img_path) 41 | label = io.read_image(lbl_path) 42 | 43 | if self.transform: 44 | image, label = self.transform(image, label) 45 | return image, label.squeeze().long() 46 | 47 | 48 | class CCIHP(CIHP): 49 | CLASSES = ['background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', 'facemask', 'coat', 'socks', 'pants', 'torso-skin', 'scarf', 'skirt', 'face', 'left-arm', 'right-arm', 'left-leg', 'right-leg', 'left-shoe', 'right-shoe', 'bag', 'others'] 50 | PALETTE = torch.tensor([[120, 120, 120], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0], [51, 85, 127], [0, 127, 0], [0, 0, 254], [50, 169, 220], [0, 254, 254], [84, 254, 169], [169, 254, 84], [254, 254, 0], [254, 169, 0], [102, 254, 0], [182, 255, 0]]) 51 | 52 | 53 | if __name__ == '__main__': 54 | import sys 55 | sys.path.insert(0, '.') 56 | from semseg.utils.visualize import visualize_dataset_sample 57 | visualize_dataset_sample(CCIHP, 'C:\\Users\\sithu\\Documents\\Datasets\\LIP\\CIHP') -------------------------------------------------------------------------------- /semseg/datasets/facesynthetics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | 8 | 9 | class FaceSynthetics(Dataset): 10 | CLASSES = ['background', 'skin', 'nose', 'r-eye', 'l-eye', 'r-brow', 'l-brow', 'r-ear', 'l-ear', 'i-mouth', 't-lip', 'b-lip', 'neck', 'hair', 'beard', 'clothing', 'glasses', 'headwear', 'facewear'] 11 | PALETTE = torch.tensor([ 12 | [0, 0, 0], [204, 0, 0], [76, 153, 0], [204, 204, 0], [51, 51, 255], [204, 0, 204], [0, 255, 255], [255, 204, 204], [102, 51, 0], [255, 0, 0], 13 | [102, 204, 0], [255, 255, 0], [0, 0, 153], [0, 0, 204], [255, 51, 153], [0, 204, 204], [0, 51, 0], [255, 153, 51], [0, 204, 0] 14 | ]) 15 | 16 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 17 | super().__init__() 18 | assert split in ['train', 'val', 'test'] 19 | if split == 'train': 20 | split = 'dataset_100000' 21 | elif split == 'val': 22 | split = 'dataset_1000' 23 | else: 24 | split = 'dataset_100' 25 | 26 | self.transform = transform 27 | self.n_classes = len(self.CLASSES) 28 | self.ignore_label = 255 29 | 30 | img_path = Path(root) / split 31 | images = img_path.glob('*.png') 32 | self.files = [path for path in images if '_seg' not in path.name] 33 | 34 | if not self.files: raise Exception(f"No images found in {root}") 35 | print(f"Found {len(self.files)} {split} images.") 36 | 37 | def __len__(self) -> int: 38 | return len(self.files) 39 | 40 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 41 | img_path = str(self.files[index]) 42 | lbl_path = str(self.files[index]).replace('.png', '_seg.png') 43 | image = io.read_image(str(img_path)) 44 | label = io.read_image(str(lbl_path)) 45 | 46 | if self.transform: 47 | image, label = self.transform(image, label) 48 | return image, label.squeeze().long() 49 | 50 | 51 | if __name__ == '__main__': 52 | import sys 53 | sys.path.insert(0, '.') 54 | from semseg.utils.visualize import visualize_dataset_sample 55 | visualize_dataset_sample(FaceSynthetics, 'C:\\Users\\sithu\\Documents\\Datasets') -------------------------------------------------------------------------------- /semseg/datasets/helen.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | 8 | 9 | class HELEN(Dataset): 10 | CLASSES = ['background', 'skin', 'l-brow', 'r-brow', 'l-eye', 'r-eye', 'nose', 'u-lip', 'i-mouth', 'l-lip', 'hair'] 11 | PALETTE = torch.tensor([[0, 0 ,0], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0]]) 12 | 13 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 14 | super().__init__() 15 | assert split in ['train', 'val', 'test'] 16 | self.transform = transform 17 | self.n_classes = len(self.CLASSES) 18 | self.ignore_label = 255 19 | 20 | self.files = self.get_files(root, split) 21 | if not self.files: raise Exception(f"No images found in {root}") 22 | print(f"Found {len(self.files)} {split} images.") 23 | 24 | def get_files(self, root: str, split: str): 25 | root = Path(root) 26 | if split == 'train': 27 | split = 'exemplars' 28 | elif split == 'val': 29 | split = 'tuning' 30 | else: 31 | split = 'testing' 32 | with open(root / f'{split}.txt') as f: 33 | lines = f.read().splitlines() 34 | 35 | split_names = [line.split(',')[-1].strip() for line in lines if line != ''] 36 | files = (root / 'images').glob("*.jpg") 37 | files = list(filter(lambda x: x.stem in split_names, files)) 38 | return files 39 | 40 | def __len__(self) -> int: 41 | return len(self.files) 42 | 43 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 44 | img_path = str(self.files[index]) 45 | lbl_path = str(self.files[index]).split('.')[0].replace('images', 'labels') 46 | image = io.read_image(img_path) 47 | label = self.encode(lbl_path) 48 | 49 | if self.transform: 50 | image, label = self.transform(image, label) 51 | return image, label.squeeze().long() 52 | 53 | def encode(self, label_path: str) -> Tensor: 54 | mask_paths = sorted(list(Path(label_path).glob('*.png'))) 55 | for i, mask_path in enumerate(mask_paths): 56 | mask = io.read_image(str(mask_path)).squeeze() 57 | if i == 0: 58 | label = torch.zeros(self.n_classes, *mask.shape) 59 | label[i, ...] = mask 60 | label = label.argmax(dim=0).unsqueeze(0) 61 | return label 62 | 63 | 64 | if __name__ == '__main__': 65 | from semseg.utils.visualize import visualize_dataset_sample 66 | visualize_dataset_sample(HELEN, '/home/sithu/datasets/SmithCVPR2013_dataset_resized') -------------------------------------------------------------------------------- /semseg/datasets/ibugmask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | 8 | 9 | class iBugMask(Dataset): 10 | CLASSES = ['background', 'skin', 'l-brow', 'r-brow', 'l-eye', 'r-eye', 'nose', 'u-lip', 'i-mouth', 'l-lip', 'hair'] 11 | PALETTE = torch.tensor([[0, 0, 0], [255, 255, 0], [139, 76, 57], [139, 54, 38], [0, 205, 0], [0, 138, 0], [154, 50, 205], [72, 118, 255], [255, 165, 0], [0, 0, 139], [255, 0, 0]]) 12 | 13 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 14 | super().__init__() 15 | assert split in ['train', 'val', 'test'] 16 | split = 'train' if split == 'train' else 'test' 17 | self.transform = transform 18 | self.n_classes = len(self.CLASSES) 19 | self.ignore_label = 255 20 | 21 | img_path = Path(root) / split 22 | self.files = list(img_path.glob('*.jpg')) 23 | 24 | if not self.files: raise Exception(f"No images found in {root}") 25 | print(f"Found {len(self.files)} {split} images.") 26 | 27 | def __len__(self) -> int: 28 | return len(self.files) 29 | 30 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 31 | img_path = str(self.files[index]) 32 | lbl_path = str(self.files[index]).replace('.jpg', '.png') 33 | image = io.read_image(str(img_path)) 34 | label = io.read_image(str(lbl_path)) 35 | 36 | if self.transform: 37 | image, label = self.transform(image, label) 38 | return image, label.squeeze().long() 39 | 40 | 41 | if __name__ == '__main__': 42 | from semseg.utils.visualize import visualize_dataset_sample 43 | visualize_dataset_sample(iBugMask, '/home/sithu/datasets/ibugmask_release') -------------------------------------------------------------------------------- /semseg/datasets/isaid.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | 4 | import cv2 5 | import torch 6 | import logging 7 | from torch import Tensor 8 | from torch.utils.data import Dataset 9 | from torchvision import io 10 | from pathlib import Path 11 | from typing import Tuple 12 | from glob import glob 13 | 14 | 15 | class ISAID(Dataset): 16 | """ 17 | num_classes: 16, ignore index is 255 (impervious_surface). 18 | """ 19 | CLASSES = ['background', 'ship', 'store_tank', 'baseball_diamond', 'tennis_court', 20 | 'basketball_court', 'Ground_Track_Field', 'Bridge', 'Large_Vehicle', 'Small_Vehicle', 21 | 'Helicopter', 'Swimming_pool', 'Roundabout', 'Soccer_ball_field', 'plane', 22 | 'Harbor'] 23 | 24 | PALETTE = torch.tensor([[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127], 25 | [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, 127], [0, 0, 127], 26 | [0, 0, 191], [0, 0, 255], [0, 191, 127], [0, 127, 191], [0, 127, 255], 27 | [0, 100, 155]]) 28 | 29 | SMALL_OBJECT = [1, 2, 3, 7, 8, 9, 10, 11, 12, 14, 15] 30 | 31 | def __init__(self, root: str, split: str = 'train', transform=None, preload=False) -> None: 32 | super().__init__() 33 | assert split in ['train', 'val'] 34 | self.split = split 35 | self.transform = transform 36 | self.n_classes = len(self.CLASSES) 37 | self.ignore_label = 255 38 | self.preload = preload 39 | self.pairs = [] 40 | 41 | imgs = glob(osp.join(root, 'img_dir', self.split) + '/*.png') 42 | imgs.sort() 43 | for img_path in imgs: 44 | lbl_path = img_path.replace('img_dir', 'ann_dir').replace('.png', '_instance_color_RGB.png') 45 | data_pair = [ 46 | io.read_image(img_path) if self.preload else img_path, 47 | io.read_image(lbl_path) if self.preload else lbl_path, 48 | ] 49 | self.pairs.append(data_pair) 50 | 51 | assert len(self.pairs) > 0, f"No images found in {root}" 52 | logging.info(f"Found {len(self.pairs)} {split} images.") 53 | 54 | def __len__(self) -> int: 55 | return len(self.pairs) 56 | 57 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 58 | image, label = self.pairs[index] 59 | if not self.preload: 60 | image = io.read_image(image) 61 | label = io.read_image(label) 62 | 63 | if self.transform: 64 | image, label = self.transform(image, label) 65 | return image, torch.squeeze(label.long()) 66 | 67 | 68 | if __name__ == '__main__': 69 | # from semseg.utils.visualize import visualize_dataset_sample 70 | # visualize_dataset_sample(ISAID, '../../data/iSAID2') 71 | 72 | 73 | from torch.utils.data import DataLoader 74 | import numpy as np 75 | 76 | 77 | train_dataset = ISAID('../../data/iSAID2', split='train') 78 | val_dataset = ISAID('../../data/iSAID2', split='val') 79 | print(f'train size={len(train_dataset)}, val size={len(val_dataset)}') 80 | 81 | train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=False) 82 | for _img, _lbl in train_dataloader: 83 | print(_img.cpu().numpy().shape, _lbl.cpu().numpy().shape, np.unique(_lbl.cpu().numpy())) 84 | cc = _lbl.cpu().numpy().squeeze().astype(np.uint8) 85 | cv2.imshow('img', _img.cpu().numpy().squeeze().transpose((1,2,0))) 86 | # cv2.imshow('lbl', np.array([cc, cc, cc]).transpose((1, 2, 0)).astype(np.uint8)) 87 | cv2.imshow('lbl', cc) 88 | cv2.waitKey(0) 89 | -------------------------------------------------------------------------------- /semseg/datasets/lapa.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | 8 | 9 | class LaPa(Dataset): 10 | CLASSES = ['background', 'skin', 'l-brow', 'r-brow', 'l-eye', 'r-eye', 'nose', 'u-lip', 'i-mouth', 'l-lip', 'hair'] 11 | PALETTE = torch.tensor([[0, 0, 0], [0, 153, 255], [102, 255, 153], [0, 204, 153], [255, 255, 102], [255, 255, 204], [255, 153, 0], [255, 102, 255], [102, 0, 51], [255, 204, 255], [255, 0, 102]]) 12 | 13 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 14 | super().__init__() 15 | assert split in ['train', 'val', 'test'] 16 | self.transform = transform 17 | self.n_classes = len(self.CLASSES) 18 | self.ignore_label = 255 19 | 20 | img_path = Path(root) / split / 'images' 21 | self.files = list(img_path.glob('*.jpg')) 22 | 23 | if not self.files: raise Exception(f"No images found in {root}") 24 | print(f"Found {len(self.files)} {split} images.") 25 | 26 | def __len__(self) -> int: 27 | return len(self.files) 28 | 29 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 30 | img_path = str(self.files[index]) 31 | lbl_path = str(self.files[index]).replace('images', 'labels').replace('.jpg', '.png') 32 | image = io.read_image(str(img_path)) 33 | label = io.read_image(str(lbl_path)) 34 | 35 | if self.transform: 36 | image, label = self.transform(image, label) 37 | return image, label.squeeze().long() 38 | 39 | 40 | if __name__ == '__main__': 41 | from semseg.utils.visualize import visualize_dataset_sample 42 | visualize_dataset_sample(LaPa, '/home/sithu/datasets/LaPa') -------------------------------------------------------------------------------- /semseg/datasets/lip.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | 8 | 9 | class LIP(Dataset): 10 | """ 11 | num_classes: 19+background 12 | 30462 train images 13 | 10000 val images 14 | """ 15 | CLASSES = ['background', 'hat', 'hair', 'glove', 'sunglasses', 'upperclothes', 'dress', 'coat', 'socks', 'pants', 'jumpsuits', 'scarf', 'skirt', 'face', 'left-arm', 'right-arm', 'left-leg', 'right-leg', 'left-shoe', 'right-shoe'] 16 | PALETTE = torch.tensor([[0, 0, 0], [127, 0, 0], [254, 0, 0], [0, 84, 0], [169, 0, 50], [254, 84, 0], [255, 0, 84], [0, 118, 220], [84, 84, 0], [0, 84, 84], [84, 50, 0], [51, 85, 127], [0, 127, 0], [0, 0, 254], [50, 169, 220], [0, 254, 254], [84, 254, 169], [169, 254, 84], [254, 254, 0], [254, 169, 0]]) 17 | 18 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 19 | super().__init__() 20 | assert split in ['train', 'val'] 21 | self.split = split 22 | self.transform = transform 23 | self.n_classes = len(self.CLASSES) 24 | self.ignore_label = 255 25 | 26 | img_path = Path(root) / 'TrainVal_images' / f'{split}_images' 27 | self.files = list(img_path.glob('*.jpg')) 28 | 29 | if not self.files: 30 | raise Exception(f"No images found in {img_path}") 31 | print(f"Found {len(self.files)} {split} images.") 32 | 33 | def __len__(self) -> int: 34 | return len(self.files) 35 | 36 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 37 | img_path = str(self.files[index]) 38 | lbl_path = str(self.files[index]).replace('TrainVal_images', 'TrainVal_parsing_annotations').replace(f'{self.split}_images', f'{self.split}_segmentations').replace('.jpg', '.png') 39 | 40 | image = io.read_image(img_path) 41 | label = io.read_image(lbl_path) 42 | 43 | if self.transform: 44 | image, label = self.transform(image, label) 45 | return image, label.squeeze().long() 46 | 47 | 48 | if __name__ == '__main__': 49 | from semseg.utils.visualize import visualize_dataset_sample 50 | visualize_dataset_sample(LIP, '/home/sithu/datasets/LIP/LIP') -------------------------------------------------------------------------------- /semseg/datasets/mapillary.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | 8 | 9 | class MapillaryVistas(Dataset): 10 | CLASSES = [ 11 | 'Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier', 'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking', 'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane', 'Sidewalk', 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist', 'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk', 'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow', 'Terrain', 'Vegetation', 'Water', 'Banner', 12 | 'Bench', 'Bike Rack', 'Billboard', 'Catch Basin', 'CCTV Camera', 'Fire Hydrant', 'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole', 'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole', 'Traffic Light', 'Traffic Sign (Back)', 'Traffic Sign (Front)', 'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan', 'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck', 'Wheeled Slow', 'Car Mount', 'Ego Vehicle' 13 | ] 14 | PALETTE = torch.tensor([ 15 | [165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153], [180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255], [140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96], [230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232], [150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60], [255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128], [255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180], [190, 255, 255], [152, 251, 152], [107, 142, 35], 16 | [0, 170, 30], [255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220], [220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40], [33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150], [210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80], [250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20], [119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142], [0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110], [0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10] 17 | ]) 18 | 19 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 20 | super().__init__() 21 | assert split in ['train', 'val'] 22 | split = 'training' if split == 'train' else 'validation' 23 | self.transform = transform 24 | self.n_classes = len(self.CLASSES) 25 | self.ignore_label = 65 26 | 27 | img_path = Path(root) / split / 'images' 28 | self.files = list(img_path.glob("*.jpg")) 29 | 30 | if not self.files: 31 | raise Exception(f"No images found in {img_path}") 32 | print(f"Found {len(self.files)} {split} images.") 33 | 34 | def __len__(self) -> int: 35 | return len(self.files) 36 | 37 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 38 | img_path = str(self.files[index]) 39 | lbl_path = str(self.files[index]).replace('images', 'labels').replace('.jpg', '.png') 40 | 41 | image = io.read_image(img_path, io.ImageReadMode.RGB) 42 | label = io.read_image(lbl_path) 43 | 44 | if self.transform: 45 | image, label = self.transform(image, label) 46 | return image, label.squeeze().long() 47 | 48 | 49 | if __name__ == '__main__': 50 | from semseg.utils.visualize import visualize_dataset_sample 51 | visualize_dataset_sample(MapillaryVistas, '/home/sithu/datasets/Mapillary') -------------------------------------------------------------------------------- /semseg/datasets/suim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.utils.data import Dataset 4 | from torchvision import io 5 | from pathlib import Path 6 | from typing import Tuple 7 | from PIL import Image 8 | from torchvision.transforms import functional as TF 9 | 10 | 11 | class SUIM(Dataset): 12 | CLASSES = ['water', 'human divers', 'aquatic plants and sea-grass', 'wrecks and ruins', 'robots (AUVs/ROVs/instruments)', 'reefs and invertebrates', 'fish and vertebrates', 'sea-floor and rocks'] 13 | PALETTE = torch.tensor([[0, 0, 0], [0, 0, 255], [0, 255, 0], [0, 255, 255], [255, 0, 0], [255, 0, 255], [255, 255, 0], [255, 255, 255]]) 14 | 15 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 16 | super().__init__() 17 | assert split in ['train', 'val'] 18 | self.split = 'train_val' if split == 'train' else 'TEST' 19 | self.transform = transform 20 | self.n_classes = len(self.CLASSES) 21 | self.ignore_label = 255 22 | 23 | img_path = Path(root) / self.split / 'images' 24 | self.files = list(img_path.glob("*.jpg")) 25 | 26 | if not self.files: 27 | raise Exception(f"No images found in {img_path}") 28 | print(f"Found {len(self.files)} {split} images.") 29 | 30 | def __len__(self) -> int: 31 | return len(self.files) 32 | 33 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 34 | img_path = str(self.files[index]) 35 | lbl_path = str(self.files[index]).replace('images', 'masks').replace('.jpg', '.bmp') 36 | 37 | image = io.read_image(img_path) 38 | label = TF.pil_to_tensor(Image.open(lbl_path).convert('RGB')) 39 | 40 | if self.transform: 41 | image, label = self.transform(image, label) 42 | return image, self.encode(label).long() 43 | 44 | def encode(self, label: Tensor) -> Tensor: 45 | label = label.permute(1, 2, 0) 46 | mask = torch.zeros(label.shape[:-1]) 47 | 48 | for index, color in enumerate(self.PALETTE): 49 | bool_mask = torch.eq(label, color) 50 | class_map = torch.all(bool_mask, dim=-1) 51 | mask[class_map] = index 52 | return mask 53 | 54 | 55 | if __name__ == '__main__': 56 | from semseg.utils.visualize import visualize_dataset_sample 57 | visualize_dataset_sample(SUIM, '/home/sithu/datasets/SUIM') -------------------------------------------------------------------------------- /semseg/datasets/sunrgbd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch import Tensor 4 | from torch.utils.data import Dataset 5 | from torchvision import io 6 | from scipy import io as sio 7 | from pathlib import Path 8 | from typing import Tuple 9 | 10 | 11 | class SunRGBD(Dataset): 12 | """ 13 | num_classes: 37 14 | """ 15 | CLASSES = [ 16 | 'wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 'bookshelf', 'picture', 'counter', 'blinds', 'desk', 'shelves', 'curtain', 'dresser', 'pillow', 'mirror', 17 | 'floor mat', 'clothes', 'ceiling', 'books', 'fridge', 'tv', 'paper', 'towel', 'shower curtain', 'box', 'whiteboard', 'person', 'night stand', 'toilet', 'sink', 'lamp', 'bathtub', 'bag' 18 | ] 19 | 20 | PALETTE = torch.tensor([ 21 | (119, 119, 119), (244, 243, 131), (137, 28, 157), (150, 255, 255), (54, 114, 113), (0, 0, 176), (255, 69, 0), (87, 112, 255), (0, 163, 33), 22 | (255, 150, 255), (255, 180, 10), (101, 70, 86), (38, 230, 0), (255, 120, 70), (117, 41, 121), (150, 255, 0), (132, 0, 255), (24, 209, 255), 23 | (191, 130, 35), (219, 200, 109), (154, 62, 86), (255, 190, 190), (255, 0, 255), (152, 163, 55), (192, 79, 212), (230, 230, 230), (53, 130, 64), 24 | (155, 249, 152), (87, 64, 34), (214, 209, 175), (170, 0, 59), (255, 0, 0), (193, 195, 234), (70, 72, 115), (255, 255, 0), (52, 57, 131), (12, 83, 45) 25 | ]) 26 | 27 | def __init__(self, root: str, split: str = 'train', transform = None) -> None: 28 | super().__init__() 29 | assert split in ['alltrain', 'train', 'val', 'test'] 30 | self.transform = transform 31 | self.n_classes = len(self.CLASSES) 32 | self.ignore_label = -1 33 | self.files, self.labels = self.get_data(root, split) 34 | print(f"Found {len(self.files)} {split} images.") 35 | 36 | def get_data(self, root: str, split: str): 37 | root = Path(root) 38 | files, labels = [], [] 39 | split_path = root / 'SUNRGBDtoolbox' / 'traintestSUNRGBD' / 'allsplit.mat' 40 | split_mat = sio.loadmat(split_path, squeeze_me=True, struct_as_record=False) 41 | if split == 'train': 42 | file_lists = split_mat['trainvalsplit'].train 43 | elif split == 'val': 44 | file_lists = split_mat['trainvalsplit'].val 45 | elif split == 'test': 46 | file_lists = split_mat['alltest'] 47 | else: 48 | file_lists = split_mat['alltrain'] 49 | 50 | for fl in file_lists: 51 | real_fl = root / fl.split('/n/fs/sun3d/data/')[-1] 52 | files.append(str(list((real_fl / 'image').glob('*.jpg'))[0])) 53 | labels.append(real_fl / 'seg.mat') 54 | 55 | assert len(files) == len(labels) 56 | return files, labels 57 | 58 | def __len__(self) -> int: 59 | return len(self.files) 60 | 61 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 62 | image = io.read_image(self.files[index], io.ImageReadMode.RGB) 63 | label = sio.loadmat(self.labels[index], squeeze_me=True, struct_as_record=False)['seglabel'] 64 | label = torch.from_numpy(label.astype(np.uint8)).unsqueeze(0) 65 | 66 | if self.transform: 67 | image, label = self.transform(image, label) 68 | return image, self.encode(label.squeeze()).long() - 1 # subtract -1 to remove void class 69 | 70 | def encode(self, label: Tensor) -> Tensor: 71 | label[label > self.n_classes] = 0 72 | return label 73 | 74 | 75 | if __name__ == '__main__': 76 | from semseg.utils.visualize import visualize_dataset_sample 77 | visualize_dataset_sample(SunRGBD, '/home/sithu/datasets/sunrgbd') -------------------------------------------------------------------------------- /semseg/datasets/uavid2020.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Project : semantic-segmentation 3 | @File : uavid2020.py 4 | @IDE : PyCharm 5 | @Author : Wang Liu 6 | @Date : 2022/4/30 下午8:02 7 | @e-mail : 1183862787@qq.com 8 | """ 9 | import os 10 | import os.path as osp 11 | import torch 12 | import logging 13 | from torch import Tensor 14 | from torch.utils.data import Dataset 15 | from torchvision import io 16 | from pathlib import Path 17 | from typing import Tuple 18 | from glob import glob 19 | 20 | 21 | class UAVid2020(Dataset): 22 | """UAVid2020 dataset. 23 | 24 | In segmentation map annotation for UAVid2020, 0 stands for background, which is 25 | included in 8 categories. ``reduce_zero_label`` is fixed to False. The 26 | ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to '.png', too. 27 | In UAVid2020, 200 images for training, 70 images for validating, and 150 images for testing. 28 | The 8 classes and corresponding label color (R,G,B) are as follows: 29 | 'label name' 'R,G,B' 'label id' 30 | Background clutter (0,0,0) 0 31 | Building (128,0,0) 1 32 | Road (128,64,128) 2 33 | Static car (192,0,192) 3 34 | Tree (0,128,0) 4 35 | Low vegetation (128,128,0) 5 36 | Human (64,64,0) 6 37 | Moving car (64,0,128) 7 38 | 39 | """ 40 | 41 | CLASSES = ('Background clutter', 'Building', 'Road', 'Static car', 42 | 'Tree', 'Low vegetation', 'Human', 'Moving car') 43 | 44 | PALETTE = torch.tensor([[0, 0, 0], [128, 0, 0], [128, 64, 128], [192, 0, 192], 45 | [0, 128, 0], [128, 128, 0], [64, 64, 0], [64, 0, 128]]) 46 | 47 | SMALL_OBJECT = [3, 6, 7] 48 | 49 | def __init__(self, root: str, split: str = 'train', transform=None, preload=False, **kwargs) -> None: 50 | super().__init__() 51 | assert split in ['train', 'val', 'test'] 52 | # assert split in ['train', 'val', 'test'] 53 | self.split = split 54 | self.transform = transform 55 | self.n_classes = len(self.CLASSES) 56 | self.ignore_label = 255 57 | self.preload = preload 58 | self.pairs = [] 59 | 60 | imgs = glob(osp.join(root, 'img_dir', self.split) + '/*.png') 61 | for img_path in imgs: 62 | lbl_path = img_path.replace('img_dir', 'ann_dir') 63 | data_pair = [ 64 | io.read_image(img_path) if self.preload else img_path, 65 | io.read_image(lbl_path)[-1:] if self.preload else lbl_path, 66 | ] 67 | self.pairs.append(data_pair) 68 | 69 | assert len(self.pairs) > 0, f"No images found in {root}" 70 | logging.info(f"Found {len(self.pairs)} {split} images.") 71 | 72 | def __len__(self) -> int: 73 | return len(self.pairs) 74 | 75 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 76 | image, label = self.pairs[index] 77 | if not self.preload: 78 | image = io.read_image(image) 79 | label = io.read_image(label)[-1:] 80 | 81 | if self.transform: 82 | image, label = self.transform(image, label) 83 | return image, torch.squeeze(label.long()) 84 | 85 | 86 | if __name__ == '__main__': 87 | _dataset = UAVid2020('../../data/UAVid2020_mm', 'train', preload=False) 88 | for _i, _l in _dataset: 89 | print(_i.size(), _l.size()) 90 | -------------------------------------------------------------------------------- /semseg/datasets/udd6.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Project : semantic-segmentation 3 | @File : uavid2020.py 4 | @IDE : PyCharm 5 | @Author : Wang Liu 6 | @Date : 2022/4/30 下午8:02 7 | @e-mail : 1183862787@qq.com 8 | """ 9 | import os 10 | import os.path as osp 11 | import torch 12 | import logging 13 | from torch import Tensor 14 | from torch.utils.data import Dataset 15 | from torchvision import io 16 | from pathlib import Path 17 | from typing import Tuple 18 | from glob import glob 19 | 20 | 21 | class UDD6(Dataset): 22 | """UDD6 dataset. 23 | 24 | 'label name' 'R,G,B' 'label id' 25 | Other (0,0,0) 0 26 | Facade (102,102,156) 1 27 | Road (128,64,128) 2 28 | Vegetation (107,142,35) 3 29 | Vehicle (0,0,142) 4 30 | Roof (70,70,70) 5 31 | 32 | """ 33 | 34 | CLASSES = ('Other', 'Facade', 'Road', 'Vegetation', 'Vehicle', 'Roof') 35 | 36 | PALETTE = torch.tensor([[0, 0, 0], [102, 102, 156], [128, 64, 128], [107, 142, 35], 37 | [0, 0, 142], [70, 70, 70]]) 38 | 39 | SMALL_OBJECT = [4] 40 | 41 | def __init__(self, root: str, split: str = 'train', transform=None, preload=False) -> None: 42 | super().__init__() 43 | assert split in ['train', 'val'] 44 | # assert split in ['train', 'val', 'test'] 45 | self.split = split 46 | self.transform = transform 47 | self.n_classes = len(self.CLASSES) 48 | self.ignore_label = 255 49 | self.preload = preload 50 | self.pairs = [] 51 | # r=osp.join(root, 'img_dir', self.split) + '/*.png' 52 | imgs = glob(osp.join(root, 'img_dir', self.split) + '/*.png') 53 | for img_path in imgs: 54 | lbl_path = img_path.replace('img_dir', 'ann_dir') 55 | data_pair = [ 56 | io.read_image(img_path) if self.preload else img_path, 57 | io.read_image(lbl_path)[-1:] if self.preload else lbl_path, 58 | ] 59 | self.pairs.append(data_pair) 60 | 61 | assert len(self.pairs) > 0, f"No images found in {root}" 62 | logging.info(f"Found {len(self.pairs)} {split} images.") 63 | 64 | def __len__(self) -> int: 65 | return len(self.pairs) 66 | 67 | def __getitem__(self, index: int) -> Tuple[Tensor, Tensor]: 68 | image, label = self.pairs[index] 69 | if not self.preload: 70 | image = io.read_image(image) 71 | label = io.read_image(label)[-1:] 72 | 73 | if self.transform: 74 | image, label = self.transform(image, label) 75 | return image, torch.squeeze(label.long()) 76 | 77 | 78 | if __name__ == '__main__': 79 | _dataset = UDD6('../../data/UDD6', 'train', preload=False) 80 | for _i, _l in _dataset: 81 | break 82 | -------------------------------------------------------------------------------- /semseg/metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from typing import Tuple 4 | 5 | 6 | class Metrics: 7 | 8 | def __init__(self, num_classes: int, ignore_label: int, device) -> None: 9 | self.ignore_label = ignore_label 10 | self.num_classes = num_classes 11 | self.hist = torch.zeros(num_classes, num_classes).cuda() 12 | 13 | def update(self, pred: Tensor, target: Tensor) -> None: 14 | pred = pred.argmax(dim=1) 15 | keep = target != self.ignore_label 16 | self.hist += torch.bincount(target[keep] * self.num_classes + pred[keep], 17 | minlength=self.num_classes ** 2).view(self.num_classes, self.num_classes) 18 | 19 | def compute_iou(self) -> Tuple[Tensor, Tensor]: 20 | ious = self.hist.diag() / (self.hist.sum(0) + self.hist.sum(1) - self.hist.diag()) 21 | if self.ignore_label < self.num_classes: 22 | ious[self.ignore_label] = torch.tensor(float('nan')) 23 | miou = ious[~ious.isnan()].mean().item() 24 | ious *= 100 25 | miou *= 100 26 | return ious.cpu().numpy().round(2).tolist(), round(miou, 2) 27 | 28 | def compute_f1(self) -> Tuple[Tensor, Tensor]: 29 | f1 = 2 * self.hist.diag() / (self.hist.sum(0) + self.hist.sum(1)) 30 | if self.ignore_label < self.num_classes: 31 | f1[self.ignore_label] = torch.tensor(float('nan')) 32 | mf1 = f1[~f1.isnan()].mean().item() 33 | f1 *= 100 34 | mf1 *= 100 35 | return f1.cpu().numpy().round(2).tolist(), round(mf1, 2) 36 | 37 | def compute_pixel_acc(self) -> Tuple[Tensor, Tensor]: 38 | acc = self.hist.diag() / self.hist.sum(1) 39 | if self.ignore_label < self.num_classes: 40 | acc[self.ignore_label] = torch.tensor(float('nan')) 41 | macc = acc[~acc.isnan()].mean().item() 42 | acc *= 100 43 | macc *= 100 44 | return acc.cpu().numpy().round(2).tolist(), round(macc, 2) 45 | 46 | def compute_oa(self): 47 | oa = self.hist.diag().sum() / self.hist.sum() 48 | return oa.item() 49 | -------------------------------------------------------------------------------- /semseg/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .segformer import SegFormer 2 | # from .ddrnet import DDRNet 3 | from .ddrnet_official import DDRNet 4 | from .fchardnet import FCHarDNet 5 | from .sfnet import SFNet 6 | from .bisenetv1 import BiSeNetv1 7 | from .bisenetv2 import BiSeNetv2 8 | from .lawin import Lawin 9 | 10 | # added models 11 | from .deeplabv3plus import DeeplabV3Plus 12 | from .pspnet import PSPNet 13 | from .upernet import UperNet 14 | # from .sosnet_ablation import SOSNetBaseline, SOSNetSB, SOSNetDFEMABL 15 | from .fast_scnn import FastSCNN 16 | from .ccnet import CCNet 17 | from .topformer import TopFormer 18 | from .pidnet import PIDNet 19 | 20 | 21 | __all__ = [ 22 | 'SegFormer', 23 | 'Lawin', 24 | 'SFNet', 25 | 'BiSeNetv1', 26 | 'TopFormer', 27 | 'PSPNet', 28 | 'DeeplabV3Plus', 29 | 'UperNet', 30 | 'CCNet', 31 | # Standalone Models 32 | 'FastSCNN', 33 | 'DDRNet', 34 | 'FCHarDNet', 35 | 'BiSeNetv2', 36 | 'PIDNet', 37 | ] 38 | -------------------------------------------------------------------------------- /semseg/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet, resnet_settings 2 | from .resnetd import ResNetD, resnetd_settings 3 | from .micronet import MicroNet, micronet_settings 4 | from .mobilenetv2 import MobileNetV2, mobilenetv2_settings 5 | from .mobilenetv3_ import MobileNetV3, mobilenetv3_settings 6 | 7 | from .mit import MiT, mit_settings 8 | from .pvt import PVTv2, pvtv2_settings 9 | from .rest import ResT, rest_settings 10 | from .poolformer import PoolFormer, poolformer_settings 11 | from .convnext import ConvNeXt, convnext_settings 12 | from .topformer import TokenPyramidTransformer, topformer_cfgs 13 | 14 | 15 | __all__ = [ 16 | 'ResNet', 17 | 'ResNetD', 18 | 'MicroNet', 19 | 'MobileNetV2', 20 | 'MobileNetV3', 21 | 22 | 'MiT', 23 | 'PVTv2', 24 | 'ResT', 25 | 'PoolFormer', 26 | 'ConvNeXt', 27 | 'TokenPyramidTransformer', 28 | ] -------------------------------------------------------------------------------- /semseg/models/backbones/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | 4 | 5 | class ConvModule(nn.Sequential): 6 | def __init__(self, c1, c2, k, s=1, p=0, d=1, g=1): 7 | super().__init__( 8 | nn.Conv2d(c1, c2, k, s, p, d, g, bias=False), 9 | nn.BatchNorm2d(c2), 10 | nn.ReLU6(True) 11 | ) 12 | 13 | 14 | class InvertedResidual(nn.Module): 15 | def __init__(self, c1, c2, s, expand_ratio): 16 | super().__init__() 17 | ch = int(round(c1 * expand_ratio)) 18 | self.use_res_connect = s == 1 and c1 == c2 19 | 20 | layers = [] 21 | 22 | if expand_ratio != 1: 23 | layers.append(ConvModule(c1, ch, 1)) 24 | 25 | layers.extend([ 26 | ConvModule(ch, ch, 3, s, 1, g=ch), 27 | nn.Conv2d(ch, c2, 1, bias=False), 28 | nn.BatchNorm2d(c2) 29 | ]) 30 | 31 | self.conv = nn.Sequential(*layers) 32 | 33 | def forward(self, x: Tensor) -> Tensor: 34 | if self.use_res_connect: 35 | return x + self.conv(x) 36 | else: 37 | return self.conv(x) 38 | 39 | 40 | mobilenetv2_settings = { 41 | '1.0': [] 42 | } 43 | 44 | 45 | class MobileNetV2(nn.Module): 46 | def __init__(self, variant: str = None): 47 | super().__init__() 48 | self.out_indices = [3, 6, 13, 17] 49 | self.channels = [24, 32, 96, 320] 50 | input_channel = 32 51 | 52 | inverted_residual_setting = [ 53 | # t, c, n, s 54 | [1, 16, 1, 1], 55 | [6, 24, 2, 2], 56 | [6, 32, 3, 2], 57 | [6, 64, 4, 2], 58 | [6, 96, 3, 1], 59 | [6, 160, 3, 2], 60 | [6, 320, 1, 1], 61 | ] 62 | 63 | self.features = nn.ModuleList([ConvModule(3, input_channel, 3, 2, 1)]) 64 | 65 | for t, c, n, s in inverted_residual_setting: 66 | output_channel = c 67 | for i in range(n): 68 | stride = s if i == 0 else 1 69 | self.features.append(InvertedResidual(input_channel, output_channel, stride, t)) 70 | input_channel = output_channel 71 | 72 | def forward(self, x: Tensor) -> Tensor: 73 | outs = [] 74 | for i, m in enumerate(self.features): 75 | x = m(x) 76 | if i in self.out_indices: 77 | outs.append(x) 78 | return outs 79 | 80 | 81 | if __name__ == '__main__': 82 | model = MobileNetV2() 83 | # model.load_state_dict(torch.load('checkpoints/backbones/mobilenet_v2.pth', map_location='cpu'), strict=False) 84 | model.eval() 85 | x = torch.randn(1, 3, 224, 224) 86 | outs = model(x) 87 | for y in outs: 88 | print(y.shape) 89 | 90 | from fvcore.nn import flop_count_table, FlopCountAnalysis 91 | flops = FlopCountAnalysis(model, x) 92 | print(flops.total() / 1e6) 93 | # print(flop_count_table(flops)) -------------------------------------------------------------------------------- /semseg/models/base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | from torch import nn 4 | from semseg.models.backbones import * 5 | from semseg.models.layers import trunc_normal_ 6 | 7 | 8 | class BaseModel(nn.Module): 9 | def __init__(self, backbone: str = 'MiT-B0', num_classes: int = 19) -> None: 10 | super().__init__() 11 | self.backbone = None 12 | self.num_classes = num_classes 13 | if backbone is not None and backbone != 'None': 14 | backbone, variant = backbone.split('-') 15 | self.backbone = eval(backbone)(variant) 16 | 17 | def _init_weights(self, m: nn.Module) -> None: 18 | if isinstance(m, nn.Linear): 19 | trunc_normal_(m.weight, std=.02) 20 | if m.bias is not None: 21 | nn.init.zeros_(m.bias) 22 | elif isinstance(m, nn.Conv2d): 23 | fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 24 | fan_out // m.groups 25 | m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) 26 | if m.bias is not None: 27 | nn.init.zeros_(m.bias) 28 | elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)): 29 | nn.init.ones_(m.weight) 30 | nn.init.zeros_(m.bias) 31 | 32 | def init_pretrained(self, pretrained: str = None) -> None: 33 | if pretrained and isinstance(self.backbone, nn.Module): 34 | self.backbone.load_state_dict(torch.load(pretrained, map_location='cpu'), strict=False) 35 | 36 | def freeze_backbone(self): 37 | if isinstance(self.backbone, nn.Module): 38 | for p in self.backbone.parameters(): 39 | p.requires_grad = False 40 | 41 | def unfreeze_backbone(self): 42 | if isinstance(self.backbone, nn.Module): 43 | for p in self.backbone.parameters(): 44 | p.requires_grad = True 45 | -------------------------------------------------------------------------------- /semseg/models/custom_cnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.base import BaseModel 5 | from semseg.models.heads import UPerHead 6 | 7 | 8 | class CustomCNN(BaseModel): 9 | def __init__(self, backbone: str = 'ResNet-50', num_classes: int = 19): 10 | super().__init__(backbone, num_classes) 11 | self.decode_head = UPerHead(self.backbone.channels, 256, num_classes) 12 | self.apply(self._init_weights) 13 | 14 | def forward(self, x: Tensor) -> Tensor: 15 | y = self.backbone(x) 16 | y = self.decode_head(y) # 4x reduction in image size 17 | y = F.interpolate(y, size=x.shape[2:], mode='bilinear', align_corners=False) # to original image shape 18 | return y 19 | 20 | 21 | if __name__ == '__main__': 22 | model = CustomCNN('ResNet-50', 19) 23 | # model.init_pretrained('checkpoints/backbones/resnet/resnet18.pth') 24 | from semseg.utils.utils import count_parameters 25 | print(f'model params cnt: {count_parameters(model)}MB') 26 | x = torch.randn(2, 3, 224, 224) 27 | y = model(x) 28 | print(y.shape) -------------------------------------------------------------------------------- /semseg/models/custom_vit.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.base import BaseModel 5 | from semseg.models.heads import UPerHead 6 | 7 | 8 | class CustomVIT(BaseModel): 9 | def __init__(self, backbone: str = 'ResT-S', num_classes: int = 19) -> None: 10 | super().__init__(backbone, num_classes) 11 | self.decode_head = UPerHead(self.backbone.channels, 128, num_classes) 12 | self.apply(self._init_weights) 13 | 14 | def forward(self, x: Tensor) -> Tensor: 15 | y = self.backbone(x) 16 | y = self.decode_head(y) # 4x reduction in image size 17 | y = F.interpolate(y, size=x.shape[2:], mode='bilinear', align_corners=False) # to original image shape 18 | return y 19 | 20 | 21 | if __name__ == '__main__': 22 | model = CustomVIT('ResT-S', 19) 23 | model.init_pretrained('checkpoints/backbones/rest/rest_small.pth') 24 | x = torch.zeros(2, 3, 512, 512) 25 | y = model(x) 26 | print(y.shape) 27 | 28 | 29 | -------------------------------------------------------------------------------- /semseg/models/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .upernet import UPerHead 2 | from .segformer import SegFormerHead 3 | from .sfnet import SFHead 4 | from .fpn import FPNHead 5 | from .fapn import FaPNHead 6 | from .fcn import FCNHead 7 | from .condnet import CondHead 8 | from .lawin import LawinHead 9 | 10 | __all__ = ['UPerHead', 'SegFormerHead', 'SFHead', 'FPNHead', 'FaPNHead', 'FCNHead', 'CondHead', 'LawinHead'] -------------------------------------------------------------------------------- /semseg/models/heads/condnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.layers import ConvModule 5 | 6 | 7 | class CondHead(nn.Module): 8 | def __init__(self, in_channel: int = 2048, channel: int = 512, num_classes: int = 19): 9 | super().__init__() 10 | self.num_classes = num_classes 11 | self.weight_num = channel * num_classes 12 | self.bias_num = num_classes 13 | 14 | self.conv = ConvModule(in_channel, channel, 1) 15 | self.dropout = nn.Dropout2d(0.1) 16 | 17 | self.guidance_project = nn.Conv2d(channel, num_classes, 1) 18 | self.filter_project = nn.Conv2d(channel*num_classes, self.weight_num + self.bias_num, 1, groups=num_classes) 19 | 20 | def forward(self, features) -> Tensor: 21 | x = self.dropout(self.conv(features[-1])) 22 | B, C, H, W = x.shape 23 | guidance_mask = self.guidance_project(x) 24 | cond_logit = guidance_mask 25 | 26 | key = x 27 | value = x 28 | guidance_mask = guidance_mask.softmax(dim=1).view(*guidance_mask.shape[:2], -1) 29 | key = key.view(B, C, -1).permute(0, 2, 1) 30 | 31 | cond_filters = torch.matmul(guidance_mask, key) 32 | cond_filters /= H * W 33 | cond_filters = cond_filters.view(B, -1, 1, 1) 34 | cond_filters = self.filter_project(cond_filters) 35 | cond_filters = cond_filters.view(B, -1) 36 | 37 | weight, bias = torch.split(cond_filters, [self.weight_num, self.bias_num], dim=1) 38 | weight = weight.reshape(B * self.num_classes, -1, 1, 1) 39 | bias = bias.reshape(B * self.num_classes) 40 | 41 | value = value.view(-1, H, W).unsqueeze(0) 42 | seg_logit = F.conv2d(value, weight, bias, 1, 0, groups=B).view(B, self.num_classes, H, W) 43 | 44 | if self.training: 45 | return cond_logit, seg_logit 46 | return seg_logit 47 | 48 | 49 | if __name__ == '__main__': 50 | from semseg.models.backbones import ResNetD 51 | backbone = ResNetD('50') 52 | head = CondHead() 53 | x = torch.randn(2, 3, 224, 224) 54 | features = backbone(x) 55 | outs = head(features) 56 | for out in outs: 57 | out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False) 58 | print(out.shape) -------------------------------------------------------------------------------- /semseg/models/heads/fapn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch.nn import functional as F 4 | from torchvision.ops import DeformConv2d 5 | from semseg.models.layers import ConvModule 6 | 7 | 8 | class DCNv2(nn.Module): 9 | def __init__(self, c1, c2, k, s, p, g=1): 10 | super().__init__() 11 | self.dcn = DeformConv2d(c1, c2, k, s, p, groups=g) 12 | self.offset_mask = nn.Conv2d(c2, g* 3 * k * k, k, s, p) 13 | self._init_offset() 14 | 15 | def _init_offset(self): 16 | self.offset_mask.weight.data.zero_() 17 | self.offset_mask.bias.data.zero_() 18 | 19 | def forward(self, x, offset): 20 | out = self.offset_mask(offset) 21 | o1, o2, mask = torch.chunk(out, 3, dim=1) 22 | offset = torch.cat([o1, o2], dim=1) 23 | mask = mask.sigmoid() 24 | return self.dcn(x, offset, mask) 25 | 26 | 27 | class FSM(nn.Module): 28 | def __init__(self, c1, c2): 29 | super().__init__() 30 | self.conv_atten = nn.Conv2d(c1, c1, 1, bias=False) 31 | self.conv = nn.Conv2d(c1, c2, 1, bias=False) 32 | 33 | def forward(self, x: Tensor) -> Tensor: 34 | atten = self.conv_atten(F.avg_pool2d(x, x.shape[2:])).sigmoid() 35 | feat = torch.mul(x, atten) 36 | x = x + feat 37 | return self.conv(x) 38 | 39 | 40 | class FAM(nn.Module): 41 | def __init__(self, c1, c2): 42 | super().__init__() 43 | self.lateral_conv = FSM(c1, c2) 44 | self.offset = nn.Conv2d(c2*2, c2, 1, bias=False) 45 | self.dcpack_l2 = DCNv2(c2, c2, 3, 1, 1, 8) 46 | 47 | def forward(self, feat_l, feat_s): 48 | feat_up = feat_s 49 | if feat_l.shape[2:] != feat_s.shape[2:]: 50 | feat_up = F.interpolate(feat_s, size=feat_l.shape[2:], mode='bilinear', align_corners=False) 51 | 52 | feat_arm = self.lateral_conv(feat_l) 53 | offset = self.offset(torch.cat([feat_arm, feat_up*2], dim=1)) 54 | 55 | feat_align = F.relu(self.dcpack_l2(feat_up, offset)) 56 | return feat_align + feat_arm 57 | 58 | 59 | class FaPNHead(nn.Module): 60 | def __init__(self, in_channels, channel=128, num_classes=19): 61 | super().__init__() 62 | in_channels = in_channels[::-1] 63 | self.align_modules = nn.ModuleList([ConvModule(in_channels[0], channel, 1)]) 64 | self.output_convs = nn.ModuleList([]) 65 | 66 | for ch in in_channels[1:]: 67 | self.align_modules.append(FAM(ch, channel)) 68 | self.output_convs.append(ConvModule(channel, channel, 3, 1, 1)) 69 | 70 | self.conv_seg = nn.Conv2d(channel, num_classes, 1) 71 | self.dropout = nn.Dropout2d(0.1) 72 | 73 | def forward(self, features) -> Tensor: 74 | features = features[::-1] 75 | out = self.align_modules[0](features[0]) 76 | 77 | for feat, align_module, output_conv in zip(features[1:], self.align_modules[1:], self.output_convs): 78 | out = align_module(feat, out) 79 | out = output_conv(out) 80 | out = self.conv_seg(self.dropout(out)) 81 | return out 82 | 83 | 84 | if __name__ == '__main__': 85 | from semseg.models.backbones import ResNet 86 | backbone = ResNet('50') 87 | head = FaPNHead([256, 512, 1024, 2048], 128, 19) 88 | x = torch.randn(2, 3, 224, 224) 89 | features = backbone(x) 90 | out = head(features) 91 | out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False) 92 | print(out.shape) -------------------------------------------------------------------------------- /semseg/models/heads/fcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.layers import ConvModule 5 | 6 | 7 | class FCNHead(nn.Module): 8 | def __init__(self, c1, c2, num_classes: int = 19): 9 | super().__init__() 10 | self.conv = ConvModule(c1, c2, 1) 11 | self.cls = nn.Conv2d(c2, num_classes, 1) 12 | 13 | def forward(self, features) -> Tensor: 14 | x = self.conv(features[-1]) 15 | x = self.cls(x) 16 | return x 17 | 18 | 19 | if __name__ == '__main__': 20 | from semseg.models.backbones import ResNet 21 | backbone = ResNet('50') 22 | head = FCNHead(2048, 256, 19) 23 | x = torch.randn(2, 3, 224, 224) 24 | features = backbone(x) 25 | out = head(features) 26 | out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False) 27 | print(out.shape) 28 | -------------------------------------------------------------------------------- /semseg/models/heads/fpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.layers import ConvModule 5 | 6 | 7 | class FPNHead(nn.Module): 8 | """Panoptic Feature Pyramid Networks 9 | https://arxiv.org/abs/1901.02446 10 | """ 11 | def __init__(self, in_channels, channel=128, num_classes=19): 12 | super().__init__() 13 | self.lateral_convs = nn.ModuleList([]) 14 | self.output_convs = nn.ModuleList([]) 15 | 16 | for ch in in_channels[::-1]: 17 | self.lateral_convs.append(ConvModule(ch, channel, 1)) 18 | self.output_convs.append(ConvModule(channel, channel, 3, 1, 1)) 19 | 20 | self.conv_seg = nn.Conv2d(channel, num_classes, 1) 21 | self.dropout = nn.Dropout2d(0.1) 22 | 23 | def forward(self, features) -> Tensor: 24 | features = features[::-1] 25 | out = self.lateral_convs[0](features[0]) 26 | 27 | for i in range(1, len(features)): 28 | out = F.interpolate(out, scale_factor=2.0, mode='nearest') 29 | out = out + self.lateral_convs[i](features[i]) 30 | out = self.output_convs[i](out) 31 | out = self.conv_seg(self.dropout(out)) 32 | return out 33 | 34 | 35 | if __name__ == '__main__': 36 | from semseg.models.backbones import ResNet 37 | backbone = ResNet('50') 38 | head = FPNHead([256, 512, 1024, 2048], 128, 19) 39 | x = torch.randn(2, 3, 224, 224) 40 | features = backbone(x) 41 | out = head(features) 42 | out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=False) 43 | print(out.shape) -------------------------------------------------------------------------------- /semseg/models/heads/segformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from typing import Tuple 4 | from torch.nn import functional as F 5 | 6 | 7 | class MLP(nn.Module): 8 | def __init__(self, dim, embed_dim): 9 | super().__init__() 10 | self.proj = nn.Linear(dim, embed_dim) 11 | 12 | def forward(self, x: Tensor) -> Tensor: 13 | x = x.flatten(2).transpose(1, 2) 14 | x = self.proj(x) 15 | return x 16 | 17 | 18 | class ConvModule(nn.Module): 19 | def __init__(self, c1, c2): 20 | super().__init__() 21 | self.conv = nn.Conv2d(c1, c2, 1, bias=False) 22 | self.bn = nn.BatchNorm2d(c2) # use SyncBN in original 23 | self.activate = nn.ReLU(True) 24 | 25 | def forward(self, x: Tensor) -> Tensor: 26 | return self.activate(self.bn(self.conv(x))) 27 | 28 | 29 | class SegFormerHead(nn.Module): 30 | def __init__(self, dims: list, embed_dim: int = 256, num_classes: int = 19): 31 | super().__init__() 32 | for i, dim in enumerate(dims): 33 | self.add_module(f"linear_c{i+1}", MLP(dim, embed_dim)) 34 | 35 | self.linear_fuse = ConvModule(embed_dim*4, embed_dim) 36 | self.linear_pred = nn.Conv2d(embed_dim, num_classes, 1) 37 | self.dropout = nn.Dropout2d(0.1) 38 | 39 | def forward(self, features: Tuple[Tensor, Tensor, Tensor, Tensor]) -> Tensor: 40 | B, _, H, W = features[0].shape 41 | outs = [self.linear_c1(features[0]).permute(0, 2, 1).reshape(B, -1, *features[0].shape[-2:])] 42 | 43 | for i, feature in enumerate(features[1:]): 44 | cf = eval(f"self.linear_c{i+2}")(feature).permute(0, 2, 1).reshape(B, -1, *feature.shape[-2:]) 45 | outs.append(F.interpolate(cf, size=(H, W), mode='bilinear', align_corners=False)) 46 | 47 | seg = self.linear_fuse(torch.cat(outs[::-1], dim=1)) 48 | seg = self.linear_pred(self.dropout(seg)) 49 | return seg -------------------------------------------------------------------------------- /semseg/models/heads/sfnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.layers import ConvModule 5 | from semseg.models.modules import PPM 6 | 7 | 8 | class AlignedModule(nn.Module): 9 | def __init__(self, c1, c2, k=3): 10 | super().__init__() 11 | self.down_h = nn.Conv2d(c1, c2, 1, bias=False) 12 | self.down_l = nn.Conv2d(c1, c2, 1, bias=False) 13 | self.flow_make = nn.Conv2d(c2 * 2, 2, k, 1, 1, bias=False) 14 | 15 | def forward(self, low_feature: Tensor, high_feature: Tensor) -> Tensor: 16 | high_feature_origin = high_feature 17 | H, W = low_feature.shape[-2:] 18 | low_feature = self.down_l(low_feature) 19 | high_feature = self.down_h(high_feature) 20 | high_feature = F.interpolate(high_feature, size=(H, W), mode='bilinear', align_corners=True) 21 | flow = self.flow_make(torch.cat([high_feature, low_feature], dim=1)) 22 | high_feature = self.flow_warp(high_feature_origin, flow, (H, W)) 23 | return high_feature 24 | 25 | def flow_warp(self, x: Tensor, flow: Tensor, size: tuple) -> Tensor: 26 | # norm = torch.tensor(size).reshape(1, 1, 1, -1) 27 | norm = torch.tensor([[[[*size]]]]).type_as(x).to(x.device) 28 | H = torch.linspace(-1.0, 1.0, size[0]).view(-1, 1).repeat(1, size[1]) 29 | W = torch.linspace(-1.0, 1.0, size[1]).repeat(size[0], 1) 30 | grid = torch.cat((W.unsqueeze(2), H.unsqueeze(2)), dim=2) 31 | grid = grid.repeat(x.shape[0], 1, 1, 1).type_as(x).to(x.device) 32 | grid = grid + flow.permute(0, 2, 3, 1) / norm 33 | output = F.grid_sample(x, grid, align_corners=False) 34 | return output 35 | 36 | 37 | class SFHead(nn.Module): 38 | def __init__(self, in_channels, channel=256, num_classes=19, scales=(1, 2, 3, 6)): 39 | super().__init__() 40 | self.ppm = PPM(in_channels[-1], channel, scales) 41 | 42 | self.fpn_in = nn.ModuleList([]) 43 | self.fpn_out = nn.ModuleList([]) 44 | self.fpn_out_align = nn.ModuleList([]) 45 | 46 | for in_ch in in_channels[:-1]: 47 | self.fpn_in.append(ConvModule(in_ch, channel, 1)) 48 | self.fpn_out.append(ConvModule(channel, channel, 3, 1, 1)) 49 | self.fpn_out_align.append(AlignedModule(channel, channel//2)) 50 | 51 | self.bottleneck = ConvModule(len(in_channels) * channel, channel, 3, 1, 1) 52 | self.dropout = nn.Dropout2d(0.1) 53 | self.conv_seg = nn.Conv2d(channel, num_classes, 1) 54 | 55 | def forward(self, features: list) -> Tensor: 56 | f = self.ppm(features[-1]) 57 | fpn_features = [f] 58 | 59 | for i in reversed(range(len(features) - 1)): 60 | feature = self.fpn_in[i](features[i]) 61 | f = feature + self.fpn_out_align[i](feature, f) 62 | fpn_features.append(self.fpn_out[i](f)) 63 | 64 | fpn_features.reverse() 65 | 66 | for i in range(1, len(fpn_features)): 67 | fpn_features[i] = F.interpolate(fpn_features[i], size=fpn_features[0].shape[-2:], mode='bilinear', align_corners=True) 68 | 69 | output = self.bottleneck(torch.cat(fpn_features, dim=1)) 70 | output = self.conv_seg(self.dropout(output)) 71 | return output 72 | 73 | -------------------------------------------------------------------------------- /semseg/models/heads/upernet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch.nn import functional as F 4 | from typing import Tuple 5 | from semseg.models.layers import ConvModule 6 | from semseg.models.modules import PPM 7 | 8 | 9 | class UPerHead(nn.Module): 10 | """Unified Perceptual Parsing for Scene Understanding 11 | https://arxiv.org/abs/1807.10221 12 | scales: Pooling scales used in PPM module applied on the last feature 13 | """ 14 | 15 | def __init__(self, in_channels, channel=128, num_classes: int = 19, scales=(1, 2, 3, 6)): 16 | super().__init__() 17 | # PPM Module 18 | self.ppm = PPM(in_channels[-1], channel, scales) 19 | 20 | # FPN Module 21 | self.fpn_in = nn.ModuleList() 22 | self.fpn_out = nn.ModuleList() 23 | 24 | for in_ch in in_channels[:-1]: # skip the top layer 25 | self.fpn_in.append(ConvModule(in_ch, channel, 1)) 26 | self.fpn_out.append(ConvModule(channel, channel, 3, 1, 1)) 27 | 28 | self.bottleneck = ConvModule(len(in_channels) * channel, channel, 3, 1, 1) 29 | self.dropout = nn.Dropout2d(0.1) 30 | self.conv_seg = nn.Conv2d(channel, num_classes, 1) 31 | 32 | def forward(self, features: Tuple[Tensor, Tensor, Tensor, Tensor]) -> Tensor: 33 | f = self.ppm(features[-1]) 34 | fpn_features = [f] 35 | 36 | for i in reversed(range(len(features) - 1)): 37 | feature = self.fpn_in[i](features[i]) 38 | f = feature + F.interpolate(f, size=feature.shape[-2:], mode='bilinear', align_corners=True) 39 | fpn_features.append(self.fpn_out[i](f)) 40 | 41 | fpn_features.reverse() 42 | for i in range(1, len(features)): 43 | fpn_features[i] = F.interpolate(fpn_features[i], size=fpn_features[0].shape[-2:], mode='bilinear', 44 | align_corners=False) 45 | 46 | output = self.bottleneck(torch.cat(fpn_features, dim=1)) 47 | output = self.conv_seg(self.dropout(output)) 48 | return output 49 | 50 | 51 | if __name__ == '__main__': 52 | model = UPerHead([64, 128, 256, 512], 128) 53 | x1 = torch.randn(2, 64, 56, 56) 54 | x2 = torch.randn(2, 128, 28, 28) 55 | x3 = torch.randn(2, 256, 14, 14) 56 | x4 = torch.randn(2, 512, 7, 7) 57 | y = model([x1, x2, x3, x4]) 58 | print(y.shape) 59 | -------------------------------------------------------------------------------- /semseg/models/lawin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.base import BaseModel 5 | from semseg.models.heads import LawinHead 6 | 7 | 8 | class Lawin(BaseModel): 9 | """ 10 | Notes::::: This implementation has larger params and FLOPs than the results reported in the paper. 11 | Will update the code and weights if the original author releases the full code. 12 | """ 13 | def __init__(self, backbone: str = 'MiT-B0', num_classes: int = 19) -> None: 14 | super().__init__(backbone, num_classes) 15 | self.decode_head = LawinHead(self.backbone.channels, 256 if 'B0' in backbone else 512, num_classes) 16 | self.apply(self._init_weights) 17 | 18 | def forward(self, x: Tensor) -> Tensor: 19 | y = self.backbone(x) 20 | y = self.decode_head(y) # 4x reduction in image size 21 | y = F.interpolate(y, size=x.shape[2:], mode='bilinear', align_corners=False) # to original image shape 22 | return y 23 | 24 | 25 | if __name__ == '__main__': 26 | model = Lawin('MiT-B0') 27 | x = torch.zeros(2, 3, 512, 512) 28 | y = model(x) 29 | print(y.shape) -------------------------------------------------------------------------------- /semseg/models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .common import * 2 | from .initialize import * -------------------------------------------------------------------------------- /semseg/models/layers/common.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | 4 | 5 | class ConvModule(nn.Sequential): 6 | def __init__(self, c1, c2, k, s=1, p=0, d=1, g=1): 7 | super().__init__( 8 | nn.Conv2d(c1, c2, k, s, p, d, g, bias=False), 9 | nn.BatchNorm2d(c2), 10 | nn.ReLU(True) 11 | ) 12 | 13 | 14 | class DropPath(nn.Module): 15 | """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). 16 | Copied from timm 17 | This is the same as the DropConnect impl I created for EfficientNet, etc networks, however, 18 | the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... 19 | See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for 20 | changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 21 | 'survival rate' as the argument. 22 | """ 23 | def __init__(self, p: float = None): 24 | super().__init__() 25 | self.p = p 26 | 27 | def forward(self, x: Tensor) -> Tensor: 28 | if self.p == 0. or not self.training: 29 | return x 30 | kp = 1 - self.p 31 | shape = (x.shape[0],) + (1,) * (x.ndim - 1) 32 | random_tensor = kp + torch.rand(shape, dtype=x.dtype, device=x.device) 33 | random_tensor.floor_() # binarize 34 | return x.div(kp) * random_tensor -------------------------------------------------------------------------------- /semseg/models/layers/initialize.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import warnings 4 | from torch import nn, Tensor 5 | 6 | 7 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 8 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 9 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 10 | def norm_cdf(x): 11 | # Computes standard normal cumulative distribution function 12 | return (1. + math.erf(x / math.sqrt(2.))) / 2. 13 | 14 | if (mean < a - 2 * std) or (mean > b + 2 * std): 15 | warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 16 | "The distribution of values may be incorrect.", 17 | stacklevel=2) 18 | 19 | with torch.no_grad(): 20 | # Values are generated by using a truncated uniform distribution and 21 | # then using the inverse CDF for the normal distribution. 22 | # Get upper and lower cdf values 23 | l = norm_cdf((a - mean) / std) 24 | u = norm_cdf((b - mean) / std) 25 | 26 | # Uniformly fill tensor with values from [l, u], then translate to 27 | # [2l-1, 2u-1]. 28 | tensor.uniform_(2 * l - 1, 2 * u - 1) 29 | 30 | # Use inverse cdf transform for normal distribution to get truncated 31 | # standard normal 32 | tensor.erfinv_() 33 | 34 | # Transform to proper mean, std 35 | tensor.mul_(std * math.sqrt(2.)) 36 | tensor.add_(mean) 37 | 38 | # Clamp to ensure it's in the proper range 39 | tensor.clamp_(min=a, max=b) 40 | return tensor 41 | 42 | 43 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): 44 | # type: (Tensor, float, float, float, float) -> Tensor 45 | r"""Fills the input Tensor with values drawn from a truncated 46 | normal distribution. The values are effectively drawn from the 47 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 48 | with values outside :math:`[a, b]` redrawn until they are within 49 | the bounds. The method used for generating the random values works 50 | best when :math:`a \leq \text{mean} \leq b`. 51 | Args: 52 | tensor: an n-dimensional `torch.Tensor` 53 | mean: the mean of the normal distribution 54 | std: the standard deviation of the normal distribution 55 | a: the minimum cutoff value 56 | b: the maximum cutoff value 57 | Examples: 58 | >>> w = torch.empty(3, 5) 59 | >>> nn.init.trunc_normal_(w) 60 | """ 61 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 62 | -------------------------------------------------------------------------------- /semseg/models/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .ppm import PPM 2 | from .psa import PSAP, PSAS 3 | from .dfem import DetailFeatureEnhanceModuleABL 4 | 5 | 6 | __all__ = ['PPM', 'PSAP', 'PSAS', 'DetailFeatureEnhanceModuleABL'] 7 | -------------------------------------------------------------------------------- /semseg/models/modules/attention.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Project : semantic-segmentation 3 | @File : attention.py 4 | @IDE : PyCharm 5 | @Author : Wang Liu 6 | @Date : 2022/6/6 上午10:20 7 | @e-mail : 1183862787@qq.com 8 | """ 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class ChannelAttention(nn.Module): 15 | 16 | def __init__(self, in_planes, ratio: int = 1): 17 | super(ChannelAttention, self).__init__() 18 | 19 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 20 | self.max_pool = nn.AdaptiveMaxPool2d(1) 21 | 22 | # MLP 23 | self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False) 24 | self.relu1 = nn.ReLU() 25 | self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False) 26 | 27 | self.sigmoid = nn.Sigmoid() 28 | 29 | def forward(self, x): 30 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 31 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 32 | 33 | out = avg_out + max_out 34 | return self.sigmoid(out) 35 | 36 | 37 | class SpatialAttention(nn.Module): 38 | 39 | def __init__(self, kernel_size=7): 40 | super(SpatialAttention, self).__init__() 41 | 42 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7' 43 | 44 | padding = 3 if kernel_size == 7 else 1 45 | 46 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 47 | self.sigmoid = nn.Sigmoid() 48 | 49 | def forward(self, x): 50 | avg_out = torch.mean(x, dim=1, keepdim=True) 51 | max_out, _ = torch.max(x, dim=1, keepdim=True) 52 | 53 | x = torch.cat([avg_out, max_out], dim=1) 54 | x = self.conv1(x) 55 | return self.sigmoid(x) 56 | 57 | 58 | class CBAM(nn.Module): 59 | 60 | def __init__(self, in_channels, ratio=1, kernel_size=7): 61 | super().__init__() 62 | self.channel_attention = ChannelAttention(in_planes=in_channels, ratio=ratio) 63 | self.spatial_attention = SpatialAttention(kernel_size=kernel_size) 64 | 65 | def forward(self, x): 66 | x = x + x * self.channel_attention(x) 67 | x = x + x * self.spatial_attention(x) 68 | return x 69 | 70 | 71 | class SEModule(nn.Module): 72 | def __init__(self, channel, reduction=4): 73 | super(SEModule, self).__init__() 74 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 75 | self.fc = nn.Sequential( 76 | nn.Linear(channel, channel // reduction, bias=False), 77 | nn.ReLU(inplace=True), 78 | nn.Linear(channel // reduction, channel, bias=False), 79 | nn.ReLU(inplace=True) 80 | ) 81 | 82 | def forward(self, x): 83 | b, c, _, _ = x.size() 84 | y = self.avg_pool(x).view(b, c) 85 | y = self.fc(y).view(b, c, 1, 1) 86 | return x * y.expand_as(x) 87 | 88 | 89 | if __name__ == '__main__': 90 | _net = CBAM(24) 91 | _x = torch.randn((1, 24, 256, 128)) 92 | _y = _net(_x) 93 | print(_y.shape) 94 | 95 | from semseg.utils.utils import model_summary, init_logger 96 | 97 | init_logger() 98 | model_summary(_net, (1, 24, 256, 128)) 99 | -------------------------------------------------------------------------------- /semseg/models/modules/cc_attention.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This code is borrowed from Serge-weihao/CCNet-Pure-Pytorch 3 | ''' 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.nn import Softmax 9 | 10 | 11 | def INF(B,H,W): 12 | return -torch.diag(torch.tensor(float("inf")).cuda().repeat(H),0).unsqueeze(0).repeat(B*W,1,1) 13 | 14 | 15 | class CrissCrossAttention(nn.Module): 16 | """ Criss-Cross Attention Module""" 17 | def __init__(self, in_dim): 18 | super(CrissCrossAttention,self).__init__() 19 | self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1) 20 | self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1) 21 | self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1) 22 | self.softmax = Softmax(dim=3) 23 | self.INF = INF 24 | self.gamma = nn.Parameter(torch.zeros(1)) 25 | 26 | 27 | def forward(self, x): 28 | m_batchsize, _, height, width = x.size() 29 | proj_query = self.query_conv(x) 30 | proj_query_H = proj_query.permute(0,3,1,2).contiguous().view(m_batchsize*width,-1,height).permute(0, 2, 1) 31 | proj_query_W = proj_query.permute(0,2,1,3).contiguous().view(m_batchsize*height,-1,width).permute(0, 2, 1) 32 | proj_key = self.key_conv(x) 33 | proj_key_H = proj_key.permute(0,3,1,2).contiguous().view(m_batchsize*width,-1,height) 34 | proj_key_W = proj_key.permute(0,2,1,3).contiguous().view(m_batchsize*height,-1,width) 35 | proj_value = self.value_conv(x) 36 | proj_value_H = proj_value.permute(0,3,1,2).contiguous().view(m_batchsize*width,-1,height) 37 | proj_value_W = proj_value.permute(0,2,1,3).contiguous().view(m_batchsize*height,-1,width) 38 | energy_H = (torch.bmm(proj_query_H, proj_key_H)+self.INF(m_batchsize, height, width)).view(m_batchsize,width,height,height).permute(0,2,1,3) 39 | energy_W = torch.bmm(proj_query_W, proj_key_W).view(m_batchsize,height,width,width) 40 | concate = self.softmax(torch.cat([energy_H, energy_W], 3)) 41 | 42 | att_H = concate[:,:,:,0:height].permute(0,2,1,3).contiguous().view(m_batchsize*width,height,height) 43 | #print(concate) 44 | #print(att_H) 45 | att_W = concate[:,:,:,height:height+width].contiguous().view(m_batchsize*height,width,width) 46 | out_H = torch.bmm(proj_value_H, att_H.permute(0, 2, 1)).view(m_batchsize,width,-1,height).permute(0,2,3,1) 47 | out_W = torch.bmm(proj_value_W, att_W.permute(0, 2, 1)).view(m_batchsize,height,-1,width).permute(0,2,1,3) 48 | #print(out_H.size(),out_W.size()) 49 | return self.gamma*(out_H + out_W) + x 50 | 51 | 52 | 53 | if __name__ == '__main__': 54 | model = CrissCrossAttention(64) 55 | x = torch.randn(2, 64, 5, 6) 56 | out = model(x) 57 | print(out.shape) 58 | -------------------------------------------------------------------------------- /semseg/models/modules/ppm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn, Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.layers import ConvModule 5 | 6 | 7 | class PPM(nn.Module): 8 | """Pyramid Pooling Module in PSPNet 9 | """ 10 | 11 | def __init__(self, c1, c2=128, scales=(1, 2, 3, 6)): 12 | super().__init__() 13 | self.stages = nn.ModuleList([ 14 | nn.Sequential( 15 | nn.AdaptiveAvgPool2d(scale), 16 | ConvModule(c1, c2, 1) 17 | ) 18 | for scale in scales]) 19 | 20 | self.bottleneck = ConvModule(c1 + c2 * len(scales), c2, 3, 1, 1) 21 | 22 | def forward(self, x: Tensor) -> Tensor: 23 | outs = [] 24 | for stage in self.stages: 25 | outs.append(F.interpolate(stage(x), size=x.shape[-2:], mode='bilinear', align_corners=True)) 26 | 27 | outs = [x] + outs[::-1] 28 | out = self.bottleneck(torch.cat(outs, dim=1)) 29 | return out 30 | 31 | 32 | if __name__ == '__main__': 33 | model = PPM(512, 128) 34 | _x = torch.randn(2, 512, 7, 7) 35 | _y = model(_x) 36 | print(_y.shape) # [2, 128, 7, 7] 37 | -------------------------------------------------------------------------------- /semseg/models/pspnet.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Project : semantic-segmentation 3 | @File : pspnet_.py 4 | @IDE : PyCharm 5 | @Author : Wang Liu 6 | @Date : 2022/7/1 下午6:50 7 | @e-mail : 1183862787@qq.com 8 | """ 9 | import torch 10 | from torch import nn 11 | from torch.nn import functional as F 12 | from semseg.models.base import BaseModel 13 | 14 | 15 | class PSPModule(nn.Module): 16 | 17 | def __init__(self, features, out_features=1024, sizes=(1, 2, 3, 6)): 18 | super().__init__() 19 | self.stages = [] 20 | self.stages = nn.ModuleList([self._make_stage(features, size) for size in sizes]) 21 | self.bottleneck = nn.Conv2d(features * (len(sizes) + 1), out_features, kernel_size=1) 22 | self.relu = nn.ReLU() 23 | 24 | @staticmethod 25 | def _make_stage(features, size): 26 | prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) 27 | conv = nn.Conv2d(features, features, kernel_size=1, bias=False) 28 | return nn.Sequential(prior, conv) 29 | 30 | def forward(self, feats): 31 | h, w = feats.size(2), feats.size(3) 32 | priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in self.stages] + [feats] 33 | bottle = self.bottleneck(torch.cat(priors, 1)) 34 | return self.relu(bottle) 35 | 36 | 37 | class PSPUpsample(nn.Module): 38 | 39 | def __init__(self, in_channels, out_channels): 40 | super().__init__() 41 | self.conv = nn.Sequential( 42 | nn.Conv2d(in_channels, out_channels, 3, padding=1), 43 | nn.BatchNorm2d(out_channels), 44 | nn.PReLU() 45 | ) 46 | 47 | def forward(self, x): 48 | h, w = 2 * x.size(2), 2 * x.size(3) 49 | p = F.interpolate(input=x, size=(h, w), mode='bilinear', align_corners=True) 50 | return self.conv(p) 51 | 52 | 53 | class PSPNet(BaseModel): 54 | 55 | def __init__(self, backbone: str = None, n_classes=19, sizes=(1, 2, 3, 6), deep_features_size=1024): 56 | super().__init__(backbone, n_classes) 57 | self.psp = PSPModule(self.backbone.channels[-1], 1024, sizes) 58 | self.drop_1 = nn.Dropout2d(p=0.3) 59 | 60 | self.up_1 = PSPUpsample(1024, 256) 61 | self.up_2 = PSPUpsample(256, 64) 62 | self.up_3 = PSPUpsample(64, 64) 63 | 64 | self.drop_2 = nn.Dropout2d(p=0.15) 65 | self.final = nn.Sequential( 66 | nn.Conv2d(64, n_classes, kernel_size=1), 67 | nn.LogSoftmax(dim=1) 68 | ) 69 | 70 | self.classifier = nn.Sequential( 71 | nn.Linear(deep_features_size, 256), 72 | nn.ReLU(), 73 | nn.Linear(256, n_classes) 74 | ) 75 | 76 | def forward(self, x): 77 | f = self.backbone(x)[-1] 78 | p = self.psp(f) 79 | p = self.drop_1(p) 80 | 81 | p = self.up_1(p) 82 | p = self.drop_2(p) 83 | 84 | p = self.up_2(p) 85 | p = self.drop_2(p) 86 | 87 | p = self.up_3(p) 88 | p = self.drop_2(p) 89 | y = self.final(p) 90 | y = F.interpolate(y, size=x.shape[-2:], mode='bilinear', align_corners=True) 91 | return y 92 | 93 | 94 | if __name__ == '__main__': 95 | _model = PSPNet('MobileNetV3-large', 19) 96 | _x = torch.rand((1, 3, 512, 1024)) 97 | _out = _model(_x) 98 | print(_out.shape) 99 | -------------------------------------------------------------------------------- /semseg/models/segformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.base import BaseModel 5 | from semseg.models.heads import SegFormerHead, UPerHead 6 | 7 | 8 | class SegFormer0(BaseModel): 9 | def __init__(self, backbone: str = 'MiT-B0', num_classes: int = 19) -> None: 10 | super().__init__(backbone, num_classes) 11 | self.decode_head = SegFormerHead(self.backbone.channels, 256 if 'B0' in backbone or 'B1' in backbone else 768, num_classes) 12 | self.apply(self._init_weights) 13 | 14 | def forward(self, x: Tensor) -> Tensor: 15 | y = self.backbone(x) 16 | y = self.decode_head(y) # 4x reduction in image size 17 | y = F.interpolate(y, size=x.shape[2:], mode='bilinear', align_corners=False) # to original image shape 18 | return y 19 | 20 | 21 | class SegFormer(BaseModel): 22 | def __init__(self, backbone: str = 'MiT-B0', num_classes: int = 19) -> None: 23 | super().__init__(backbone, num_classes) 24 | self.head_bootom = SegFormerHead(self.backbone.channels, 25 | 256 if 'B0' in backbone or 'B1' in backbone else 768, 26 | num_classes) 27 | self.head_top = UPerHead(in_channels=self.backbone.channels, 28 | channel=32, 29 | num_classes=2, 30 | scales=(1, 2, 3, 6)) 31 | def forward(self, x: Tensor): 32 | f_x4, f_x8, f_x16, f_x32 = self.backbone(x) 33 | logits_bottom = self.head_bootom([f_x4, f_x8, f_x16, f_x32 ]) # 4x reduction in image size 34 | logits_bottom = F.interpolate(logits_bottom, size=x.shape[2:], mode='bilinear', align_corners=True) 35 | 36 | if self.training: 37 | # logits_edge = self.head_edge(f_x4, f_x8) 38 | # logits_edge = F.interpolate(logits_edge, x.shape[-2:], mode='bilinear', align_corners=True) 39 | logits_top = self.head_top([f_x4, f_x8, f_x16, f_x32]) 40 | logits_top = F.interpolate(logits_top, x.shape[-2:], mode='bilinear', align_corners=True) 41 | # return torch.cat([logits_seg, logits_so], dim=1), logits_edge 42 | return logits_bottom, logits_top, None 43 | 44 | return logits_bottom.contiguous() 45 | 46 | 47 | if __name__ == '__main__': 48 | model = SegFormer('MiT-B0', num_classes=8) 49 | model.train(True) 50 | model.init_pretrained('../../checkpoints/backbones/mit/mit_b0.pth') 51 | x = torch.zeros(4, 3, 512, 1024) 52 | y = model(x) 53 | if model.training: 54 | print(y[0].shape, y[1].shape) 55 | else: 56 | print(y.shape) -------------------------------------------------------------------------------- /semseg/models/sfnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | from torch.nn import functional as F 4 | from semseg.models.base import BaseModel 5 | from semseg.models.heads import SFHead, UPerHead 6 | 7 | 8 | class SFNet(BaseModel): 9 | def __init__(self, backbone: str = 'ResNetD-18', num_classes: int = 19): 10 | # assert 'ResNet' in backbone 11 | super().__init__(backbone, num_classes) 12 | self.head = SFHead(self.backbone.channels, 128 if '18' or 'MobileNet' in backbone else 256, num_classes) 13 | self.apply(self._init_weights) 14 | 15 | def forward(self, x: Tensor) -> Tensor: 16 | outs = self.backbone(x) 17 | out = self.head(outs) 18 | out = F.interpolate(out, size=x.shape[-2:], mode='bilinear', align_corners=True) 19 | return out 20 | 21 | 22 | class SFNet0(BaseModel): 23 | def __init__(self, backbone: str = 'ResNetD-18', num_classes: int = 19): 24 | # assert 'ResNet' in backbone 25 | super().__init__(backbone, num_classes) 26 | self.head_bottom = SFHead(self.backbone.channels, 128 if '18' or 'MobileNet' in backbone else 256, num_classes) 27 | self.head_top = UPerHead(in_channels=self.backbone.channels, 28 | channel=32, 29 | num_classes=2, 30 | scales=(1, 2, 3, 6)) 31 | self.apply(self._init_weights) 32 | 33 | def forward(self, x: Tensor): 34 | f_x4, f_x8, f_x16, f_x32 = self.backbone(x) 35 | logits_bottom = self.head_bottom([f_x4, f_x8, f_x16, f_x32]) # 4x reduction in image size 36 | logits_bottom = F.interpolate(logits_bottom, size=x.shape[2:], mode='bilinear', align_corners=True) 37 | 38 | if self.training: 39 | # logits_edge = self.head_edge(f_x4, f_x8) 40 | # logits_edge = F.interpolate(logits_edge, x.shape[-2:], mode='bilinear', align_corners=True) 41 | logits_top = self.head_top([f_x4, f_x8, f_x16, f_x32]) 42 | logits_top = F.interpolate(logits_top, x.shape[-2:], mode='bilinear', align_corners=True) 43 | # return torch.cat([logits_seg, logits_so], dim=1), logits_edge 44 | return logits_bottom, logits_top, None 45 | 46 | return logits_bottom.contiguous() 47 | 48 | 49 | if __name__ == '__main__': 50 | model = SFNet('MobileNetV3-large') 51 | model.train() 52 | model.init_pretrained('../../checkpoints/backbones/mobilenet_/mobilenetv3_large.pth') 53 | x = torch.randn(2, 3, 512, 1024) 54 | y = model(x) 55 | if model.training: 56 | print(y[0].shape, y[1].shape) 57 | else: 58 | print(y.shape) -------------------------------------------------------------------------------- /semseg/optimizers.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.optim import AdamW, SGD 3 | 4 | # def get_optimizer(model: nn.Module, optimizer: str, lr: float, weight_decay: float = 0.01): 5 | # 6 | # # 获得新添加⽹络层的参数 7 | # backbone_param = list(map(id, model.backbone.parameters())) 8 | # # 获得预训练模型的参数 9 | # new_param = filter(lambda p: id(p) not in backbone_param, model.parameters()) 10 | # # 定义优化器和损失函数 11 | # params = [ 12 | # {'params': model.backbone.parameters(), 'lr': lr * 0.1}, 13 | # {'params': new_param, 'lr': lr * 0.1} 14 | # ] 15 | # 16 | # if optimizer == 'adamw': 17 | # return AdamW(params, lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=weight_decay) 18 | # else: 19 | # return SGD(params, lr, momentum=0.9, weight_decay=weight_decay) 20 | 21 | 22 | def get_optimizer(model: nn.Module, optimizer: str, lr: float, weight_decay: float = 0.01): 23 | wd_params, nwd_params = [], [] 24 | for p in model.parameters(): 25 | if p.dim() == 1: 26 | nwd_params.append(p) 27 | else: 28 | wd_params.append(p) 29 | 30 | params = [ 31 | {"params": wd_params}, 32 | {"params": nwd_params, "weight_decay": 0}, 33 | ] 34 | 35 | if optimizer == 'adamw': 36 | return AdamW(params, lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=weight_decay) 37 | else: 38 | return SGD(params, lr, momentum=0.9, weight_decay=weight_decay) 39 | 40 | 41 | # params = [ 42 | # {"params": [value] for _, value in model.sharedNet.named_parameters() if value.requires_grad}, 43 | # {"params": [value for _, value in model.cls_fc_son1.named_parameters() 44 | # if value.requires_grad], 'lr': args.lr * 10}, 45 | # {"params": [value for _, value in model.cls_fc_son2.named_parameters() 46 | # if value.requires_grad], 'lr': args.lr * 10}, 47 | # {"params": [value for _, value in model.sonnet1.named_parameters() 48 | # if value.requires_grad], 'lr': args.lr * 10}, 49 | # {"params": [value for _, value in model.sonnet2.named_parameters() 50 | # if value.requires_grad], 'lr': args.lr * 10}, 51 | # ] 52 | # optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) 53 | 54 | 55 | # ''' 56 | # ⾸先,我们期望预训练模型的学习率是新添加⽹络层学习率的⼗分之⼀。 57 | # 具体的,筛选出新添加⽹络层的参数和预训练模型的参数分别为其配置学习率 58 | # ''' 59 | # # 获得新添加⽹络层的参数 60 | # backbone_param = list(map(id, model.backbone.parameters())) 61 | # # 获得预训练模型的参数 62 | # new_param = filter(lambda p: id(p) not in backbone_param , model.parameters()) 63 | # # 定义优化器和损失函数 64 | # optimizer = torch.optim.Adam([ 65 | # {'params': backbone_param, 'lr': LR * 0.1}, 66 | # {'params': new_param} 67 | # ], lr=LR) -------------------------------------------------------------------------------- /semseg/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StuLiu/SOSNet/0e4832eeb76daeebd4a0e31a750e7fce86b7b8ac/semseg/utils/__init__.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='semseg', 5 | version='0.4.0', 6 | description='SOTA Semantic Segmentation Models', 7 | url='https://github.com/sithu31296/semantic-segmentation', 8 | author='Sithu Aung', 9 | author_email='sithu31296@gmail.com', 10 | license='MIT', 11 | packages=find_packages(include=['semseg']), 12 | install_requires=[ 13 | 'tqdm', 14 | 'tabulate', 15 | 'numpy', 16 | 'scipy', 17 | 'matplotlib', 18 | 'tensorboard', 19 | 'fvcore', 20 | 'einops' 21 | ] 22 | ) -------------------------------------------------------------------------------- /tools/benchmark.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | import time 4 | from fvcore.nn import flop_count_table, FlopCountAnalysis 5 | from semseg.models import * 6 | 7 | 8 | def main( 9 | model_name: str, 10 | backbone_name: str, 11 | image_size: list, 12 | num_classes: int, 13 | device: str, 14 | ): 15 | device = torch.device('cuda' if torch.cuda.is_available() and device == 'cuda' else 'cpu') 16 | inputs = torch.randn(1, 3, *image_size).to(device) 17 | model = eval(model_name)(backbone_name, num_classes) 18 | model = model.to(device) 19 | model.eval() 20 | print(model.training) 21 | 22 | print(flop_count_table(FlopCountAnalysis(model, inputs))) 23 | 24 | total_time = 0.0 25 | for _ in range(10): 26 | tic = time.perf_counter() 27 | model(inputs) 28 | toc = time.perf_counter() 29 | total_time += toc - tic 30 | total_time /= 10 31 | 32 | # from semseg.utils.utils import model_summary, init_logger 33 | # 34 | # init_logger() 35 | # model_summary(model, (1, 3, *image_size)) 36 | print(f"Inference time: {total_time*1000:.2f}ms") 37 | print(f"FPS: {1/total_time}") 38 | 39 | if __name__ == '__main__': 40 | parser = argparse.ArgumentParser() 41 | parser.add_argument('--model-name', type=str, default='FastSCNN') 42 | parser.add_argument('--backbone-name', type=str, default='MobileNetV3-large') 43 | # parser.add_argument('--backbone-name', type=str, default=None) 44 | parser.add_argument('--image-size', type=list, default=[512, 512]) 45 | parser.add_argument('--num-classes', type=int, default=8) 46 | parser.add_argument('--device', type=str, default='cuda') 47 | args = parser.parse_args() 48 | 49 | main(args.model_name, args.backbone_name, args.image_size, args.num_classes, args.device) -------------------------------------------------------------------------------- /tools/export.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | import yaml 4 | import onnx 5 | from pathlib import Path 6 | from onnxsim import simplify 7 | from semseg.models import * 8 | from semseg.datasets import * 9 | 10 | 11 | def export_onnx(model, inputs, file): 12 | torch.onnx.export( 13 | model, 14 | inputs, 15 | f"{cfg['TEST']['MODEL_PATH'].split('.')[0]}.onnx", 16 | input_names=['input'], 17 | output_names=['output'], 18 | opset_version=13 19 | ) 20 | onnx_model = onnx.load(f"{file}.onnx") 21 | onnx.checker.check_model(onnx_model) 22 | 23 | onnx_model, check = simplify(onnx_model) 24 | onnx.save(onnx_model, f"{file}.onnx") 25 | assert check, "Simplified ONNX model could not be validated" 26 | print(f"ONNX model saved to {file}.onnx") 27 | 28 | 29 | def export_coreml(model, inputs, file): 30 | try: 31 | import coremltools as ct 32 | ts_model = torch.jit.trace(model, inputs, strict=True) 33 | ct_model = ct.convert( 34 | ts_model, 35 | inputs=[ct.ImageType('image', shape=inputs.shape, scale=1/255.0, bias=[0, 0, 0])] 36 | ) 37 | ct_model.save(f"{file}.mlmodel") 38 | print(f"CoreML model saved to {file}.mlmodel") 39 | except: 40 | print("Please install coremltools to export to CoreML.\n`pip install coremltools`") 41 | 42 | 43 | def main(cfg): 44 | model = eval(cfg['MODEL']['NAME'])(cfg['MODEL']['BACKBONE'], len(eval(cfg['DATASET']['NAME']).PALETTE)) 45 | model.load_state_dict(torch.load(cfg['TEST']['MODEL_PATH'], map_location='cpu')) 46 | model.eval() 47 | 48 | inputs = torch.randn(1, 3, *cfg['TEST']['IMAGE_SIZE']) 49 | file = cfg['TEST']['MODEL_PATH'].split('.')[0] 50 | 51 | export_onnx(model, inputs, file) 52 | export_coreml(model, inputs, file) 53 | print(f"Finished converting.") 54 | 55 | 56 | if __name__ == '__main__': 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument('--cfg', type=str, default='configs/helen.yaml') 59 | args = parser.parse_args() 60 | 61 | with open(args.cfg) as f: 62 | cfg = yaml.load(f, Loader=yaml.SafeLoader) 63 | 64 | save_dir = Path(cfg['SAVE_DIR']) 65 | save_dir.mkdir(exist_ok=True) 66 | 67 | main(cfg) -------------------------------------------------------------------------------- /tools/export_small_objects.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Project : semantic-segmentation 3 | @File : export_small_objects.py 4 | @IDE : PyCharm 5 | @Author : Wang Liu 6 | @Date : 2023/6/20 下午4:51 7 | @e-mail : 1183862787@qq.com 8 | """ 9 | 10 | import cv2 11 | import os 12 | import numpy as np 13 | from semseg.datasets import * 14 | from torch.utils.data import DataLoader 15 | from glob import glob 16 | import argparse 17 | from tqdm import tqdm 18 | import yaml 19 | 20 | 21 | def filtering_image(lbl, out_path, min_area=0, max_area=1024, num_classes=11, ignore_label=255): 22 | lbl_out = np.ones_like(lbl) * ignore_label 23 | for _id in range(num_classes): 24 | label = np.zeros_like(lbl) 25 | label[lbl == _id] = 255 26 | # 找到所有的轮廓 27 | contours, _ = cv2.findContours(label, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 28 | # 遍历每个轮廓 29 | for contour in contours: 30 | # 忽略太大的轮廓 31 | if min_area <= cv2.contourArea(contour) <= max_area :#and cv2.contourArea(contour) >= min_area: 32 | # 将轮廓内部的像素设为0 33 | cv2.drawContours(lbl_out, [contour], 0, _id, -1) 34 | # cv2.imshow('window', lbl_out) 35 | # cv2.waitKey(0) 36 | # 保存抠出小目标后的语义分割标签 37 | cv2.imwrite(out_path, lbl_out) 38 | # cv2.imshow('window', lbl_out) 39 | # cv2.waitKey(0) 40 | 41 | 42 | def filtering_by_area(in_dir, min_area=0, max_area=1024, num_classes=11, ignore_label=255): 43 | out_dir = f'{in_dir}_so_{min_area}_{max_area}' 44 | os.makedirs(out_dir, exist_ok=True) 45 | img_paths = glob(r''+in_dir+'/*.png') 46 | img_paths.sort() 47 | for img_path in tqdm(img_paths): 48 | lbl = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 49 | out_path = os.path.join(out_dir, os.path.basename(img_path)) 50 | filtering_image(lbl, out_path=out_path, min_area=min_area, max_area=max_area, num_classes=num_classes, 51 | ignore_label=ignore_label) 52 | # break 53 | 54 | def filter_objects(in_dir, num_classes=11, ignore_label=255): 55 | areas = [0, 1024, 4096, 16384, 65536, 1048576] 56 | for i in range(len(areas) - 1): 57 | filtering_by_area(in_dir, 0, areas[i + 1], num_classes, ignore_label) 58 | 59 | 60 | if __name__ == '__main__': 61 | parser = argparse.ArgumentParser() 62 | parser.add_argument('--cfg', type=str, default='configs/Bisenetv2/camvid.yaml', help='config file path') 63 | parser.add_argument('--input-dir', type=str, default='data/CamVid/testannot', help='directory of label files') 64 | # parser.add_argument('--max-area', type=int, default=16384, help='maximum area for objects. 1024, 4096, 16384') 65 | args = parser.parse_args() 66 | with open(args.cfg) as f: 67 | cfg = yaml.load(f, Loader=yaml.SafeLoader) 68 | datasets = eval(cfg["DATASET"]["NAME"])(cfg["DATASET"]["ROOT"], 'test') 69 | filter_objects(in_dir=args.input_dir, #out_dir=args.input_dir + '_so' + str(args.max_area), 70 | num_classes=datasets.n_classes, ignore_label=datasets.ignore_label) 71 | -------------------------------------------------------------------------------- /tools/feature_visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Project : semantic-segmentation 3 | @File : visualization.py 4 | @IDE : PyCharm 5 | @Author : Wang Liu 6 | @Date : 2022/7/19 上午10:57 7 | @e-mail : 1183862787@qq.com 8 | """ 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import cv2 13 | from torch.nn import init 14 | from torchvision import io 15 | from semseg.models.backbones.mobilenetv3 import MobileNetV3 16 | from semseg.augmentations import get_val_augmentation 17 | import numpy as np 18 | import mmcv 19 | 20 | 21 | def show_features(f_tensor): 22 | f_np = np.squeeze(f_tensor.detach().cpu().numpy()) 23 | for img_gray in f_np: 24 | cv2.imshow('feature', img_gray) 25 | q = cv2.waitKey(0) 26 | if q == (ord('q') & 0xff): 27 | break 28 | return 29 | 30 | 31 | if __name__ == '__main__': 32 | model = MobileNetV3('large') 33 | model.load_state_dict(torch.load('../checkpoints/backbones/mobilenet/mobilenetv3_large.pth', 34 | map_location='cpu'), strict=False) 35 | model.train() 36 | model.cuda() 37 | aug = get_val_augmentation([2160, 3840]) 38 | # img_rgb_tensor = io.read_image('../data/UAVid2020_mm/img_dir/train/seq1_000700.png') 39 | img_rgb_tensor = io.read_image('../assests/vaihingen_area3.png')[:-1,:,:] 40 | # img_bgr = mmcv.imread('../assests/vaihingen_area3.png') 41 | # img_rgb_tensor = io.read_image('../data/ISPRS_DATA/Vaihingen2/img_dir/train/area1_0_0_512_512.png') 42 | img_rgb_tensor = aug(img_rgb_tensor, img_rgb_tensor[0:1,:,:])[0].unsqueeze(dim=0).cuda() 43 | 44 | # _x = torch.randn(1, 3, 512, 512) 45 | _outs = model(img_rgb_tensor) 46 | 47 | for y in _outs: 48 | show_features(y) 49 | 50 | from semseg.utils.utils import model_summary, init_logger 51 | 52 | init_logger() 53 | model_summary(model, (1, 3, 224, 224)) 54 | 55 | # from fvcore.nn import flop_count_table, FlopCountAnalysis 56 | # print(flop_count_table(FlopCountAnalysis(model, _x.cuda()))) 57 | -------------------------------------------------------------------------------- /tools/infer_single.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Project : semantic-segmentation 3 | @File : infer_single.py 4 | @IDE : PyCharm 5 | @Author : Wang Liu 6 | @Date : 2022/10/14 下午9:26 7 | @e-mail : 1183862787@qq.com 8 | """ 9 | 10 | 11 | import os 12 | import argparse 13 | import yaml 14 | from torchvision import io 15 | from tools.infer import SemSeg 16 | from PIL import Image 17 | import numpy as np 18 | import cv2 19 | from semseg.datasets import * 20 | 21 | def overlay_gt(dataset, img_path, lbl_path, save_dir, overlay=False, img_ratio=0.3): 22 | img = Image.open(img_path) 23 | lbl = Image.open(lbl_path).convert('P') 24 | colormap = dataset.PALETTE.numpy().astype(np.uint8) 25 | lbl.putpalette(colormap.flatten()) 26 | lbl = lbl.convert('RGB') 27 | if overlay: 28 | img = (np.array(img) * img_ratio) + (np.array(lbl) * (1 - img_ratio)) 29 | img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR) 30 | cv2.imwrite(os.path.join(save_dir, f"{str(os.path.basename(img_path))}"), img) 31 | else: 32 | lbl.save(os.path.join(save_dir, f"{str(os.path.basename(img_path))}")) 33 | 34 | 35 | if __name__ == '__main__': 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--img_path', type=str, default='data/CamVid/test/0001TP_008550.png') 38 | parser.add_argument('--cfg', type=str, 39 | default='output_ablation/deeplabv3plus/camvid/DeeplabV3Plus_CamVid_hier_soem_59.41_38.53_71.33/config.yaml') 40 | parser.add_argument('--overlay', type=str, default=True) 41 | parser.add_argument('--gt', type=str, default=False) # if generate gt from label 42 | parser.add_argument('--ratio', type=float, default=0.3) 43 | args = parser.parse_args() 44 | 45 | with open(args.cfg) as f: 46 | cfg = yaml.load(f, Loader=yaml.SafeLoader) 47 | 48 | test_file = os.path.join(args.img_path) 49 | save_dir = f'./vis_results/{cfg["MODEL"]["NAME"]}' 50 | os.makedirs(save_dir, exist_ok=True) 51 | 52 | semseg = SemSeg(cfg) 53 | 54 | print(f'Inferencing {test_file} by {cfg["MODEL"]["NAME"]}...') 55 | segmap = semseg.predict(str(test_file), args.overlay, args.ratio) 56 | io.write_png(segmap, os.path.join(save_dir, f"{str(os.path.basename(test_file))}")) 57 | 58 | trainset = eval(cfg['DATASET']['NAME'])(cfg['DATASET']['ROOT'], 'train', None) 59 | 60 | import shutil 61 | if args.gt: 62 | gt_dir = os.path.join(save_dir, '..', 'gt') 63 | os.makedirs(gt_dir, exist_ok=True) 64 | shutil.copy(args.img_path, os.path.join(gt_dir, f"{str(os.path.basename(args.img_path))}.img.png")) 65 | if cfg['DATASET']['NAME'] == 'ISAID': 66 | overlay_gt(trainset, args.img_path, 67 | args.img_path.replace('img_dir', 'ann_dir').replace('.png', '_instance_color_RGB.png'), 68 | gt_dir, True) 69 | elif cfg['DATASET']['NAME'] == 'CamVid': 70 | overlay_gt(trainset, args.img_path, 71 | args.img_path.replace('test/', 'testannot/'), 72 | gt_dir, True, img_ratio=args.ratio) 73 | elif cfg['DATASET']['NAME'] == 'UAVid': 74 | overlay_gt(trainset, args.img_path, 75 | args.img_path.replace('img_dir/', 'ann_dir/'), 76 | gt_dir, True, img_ratio=args.ratio) -------------------------------------------------------------------------------- /tools/submit/uavid_submit.py: -------------------------------------------------------------------------------- 1 | """ 2 | @Project : semantic-segmentation 3 | @File : uavid_submit.py 4 | @IDE : PyCharm 5 | @Author : Wang Liu 6 | @Date : 2022/6/8 下午8:39 7 | @e-mail : 1183862787@qq.com 8 | """ 9 | 10 | import os 11 | import os.path as osp 12 | import shutil 13 | 14 | 15 | def transfer(dir_path='../../output/test_results', out_dir='./uavid_submit'): 16 | os.makedirs(out_dir, exist_ok=True) 17 | imgs = os.listdir(dir_path) 18 | for img_name in imgs: 19 | if img_name.endswith('.png'): 20 | seq_dir, basename = img_name.split('_') 21 | seq_dir = osp.join(out_dir, seq_dir, 'Labels') 22 | os.makedirs(seq_dir, exist_ok=True) 23 | shutil.copy(osp.join(dir_path, img_name), osp.join(seq_dir, basename)) 24 | 25 | 26 | if __name__ == '__main__': 27 | # transfer(dir_path='../../output/test_results', out_dir='./submit_SOSNet_mbv3l_soa_epoch100') 28 | transfer(dir_path='../../output_ablation/UperNet/uavid2020/test_results', out_dir='./UperNet') 29 | --------------------------------------------------------------------------------