├── .gitignore ├── HRNet-Semantic-Segmentation-HRNet-OCR ├── .gitignore ├── LICENSE ├── README.md ├── experiments │ ├── ade20k │ │ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml │ │ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml │ │ ├── seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml │ │ ├── seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml │ │ └── seg_hrnet_w48_520x520_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml │ ├── cityscapes │ │ ├── seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml │ │ ├── seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml │ │ ├── seg_hrnet_ocr_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml │ │ ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml │ │ ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484_paddle.yaml │ │ ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml │ │ ├── seg_hrnet_w48_train_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml │ │ ├── seg_hrnet_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml │ │ └── seg_hrnet_w48_trainval_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml │ ├── cocostuff │ │ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml │ │ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml │ │ ├── seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml │ │ ├── seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml │ │ └── seg_hrnet_w48_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml │ ├── lip │ │ ├── seg_hrnet_ocr_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml │ │ ├── seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml │ │ └── seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150_paddle.yaml │ └── pascal_ctx │ │ ├── seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml │ │ ├── seg_hrnet_ocr_w48_cls60_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml │ │ ├── seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml │ │ └── seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle.yaml ├── figures │ ├── OCR.PNG │ ├── SegmentationTransformerOCR.png │ ├── SegmentationTransformerOCR1.png │ ├── SegmentationTransformerOCR2.png │ └── seg-hrnet.png ├── hubconf.py ├── le50_test.txt ├── lib │ ├── config │ │ ├── __init__.py │ │ ├── default.py │ │ ├── hrnet_config.py │ │ └── models.py │ ├── core │ │ ├── criterion.py │ │ └── function.py │ ├── datasets │ │ ├── __init__.py │ │ ├── ade20k.py │ │ ├── base_dataset.py │ │ ├── cityscapes.py │ │ ├── cocostuff.py │ │ ├── lip.py │ │ └── pascal_ctx.py │ ├── models │ │ ├── __init__.py │ │ ├── bn_helper.py │ │ ├── hrnet.py │ │ ├── seg_hrnet.py │ │ ├── seg_hrnet_ocr.py │ │ └── sync_bn │ │ │ ├── LICENSE │ │ │ ├── __init__.py │ │ │ └── inplace_abn │ │ │ ├── __init__.py │ │ │ ├── bn.py │ │ │ ├── functions.py │ │ │ └── src │ │ │ ├── common.h │ │ │ ├── inplace_abn.cpp │ │ │ ├── inplace_abn.h │ │ │ ├── inplace_abn_cpu.cpp │ │ │ └── inplace_abn_cuda.cu │ └── utils │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── modelsummary.py │ │ └── utils.py ├── local_log.txt ├── requirements.txt ├── run_dist.sh ├── run_local.sh └── tools │ ├── _init_paths.py │ ├── inference.py │ ├── test.py │ └── train.py ├── README.md ├── assets └── pipeline.png ├── data.py ├── environment.yml ├── evaluation ├── __init__.py └── metrics.py ├── model ├── austnet.py ├── austnet_s.py └── encoder_decoder.py ├── options.py ├── pytorch_iou └── __init__.py ├── pytorch_ssim └── __init__.py ├── test_austnet.py ├── test_austnet_s.py ├── train_austnet.py └── train_austnet_s.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__/ 3 | *.py[co] 4 | data/ 5 | log/ 6 | output/ 7 | pretrained_models 8 | scripts/ 9 | detail-api/ 10 | data/list 11 | *.pth -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__/ 3 | *.py[co] 4 | data/ 5 | log/ 6 | output/ 7 | pretrained_models 8 | scripts/ 9 | detail-api/ 10 | data/list 11 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [2019] [Microsoft] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | ======================================================================================= 24 | 3-clause BSD licenses 25 | ======================================================================================= 26 | 1. syncbn - For details, see lib/models/syncbn/LICENSE 27 | Copyright (c) 2017 mapillary 28 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: 'data/' 14 | TEST_SET: 'list/ade20k/val.lst' 15 | TRAIN_SET: 'list/ade20k/train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 120 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.02 88 | WD: 0.0001 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 520 99 | - 520 100 | BASE_SIZE: 520 101 | BATCH_SIZE_PER_GPU: 1 102 | NUM_SAMPLES: 200 103 | FLIP_TEST: false 104 | MULTI_SCALE: false 105 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: '../../../../dataset/ade20k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 2 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 120 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.02 88 | WD: 0.0001 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 520 99 | - 520 100 | BASE_SIZE: 520 101 | BATCH_SIZE_PER_GPU: 1 102 | NUM_SAMPLES: 200 103 | FLIP_TEST: false 104 | MULTI_SCALE: false 105 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/ade20k/seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: 'data/' 14 | TEST_SET: 'list/ade20k/val.lst' 15 | TRAIN_SET: 'list/ade20k/train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 4 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 120 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.02 87 | WD: 0.0001 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 520 98 | - 520 99 | BASE_SIZE: 520 100 | BATCH_SIZE_PER_GPU: 1 101 | NUM_SAMPLES: 200 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/ade20k/seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: '../../../../dataset/ade20k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 2 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 120 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.02 87 | WD: 0.0001 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 520 98 | - 520 99 | BASE_SIZE: 520 100 | BATCH_SIZE_PER_GPU: 1 101 | NUM_SAMPLES: 200 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/ade20k/seg_hrnet_w48_520x520_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: 'data/' 14 | TEST_SET: 'list/ade20k/val.lst' 15 | TRAIN_SET: 'list/ade20k/train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 4 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 120 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.02 87 | WD: 0.0001 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 520 98 | - 520 99 | BASE_SIZE: 520 100 | BATCH_SIZE_PER_GPU: 1 101 | NUM_SAMPLES: 200 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: data/ 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: "pretrained_models/hrnetv2_w48_imagenet_pretrained.pth" 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 3 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.01 88 | WD: 0.0005 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 2048 99 | - 1024 100 | BASE_SIZE: 2048 101 | BATCH_SIZE_PER_GPU: 4 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: '../../../../dataset/original_cityscapes/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 2 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.01 88 | WD: 0.0005 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 2048 99 | - 1024 100 | BASE_SIZE: 2048 101 | BATCH_SIZE_PER_GPU: 2 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cityscapes/seg_hrnet_ocr_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: data/ 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/trainval.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: "pretrained_models/hrnetv2_w48_imagenet_pretrained.pth" 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 3 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.01 88 | WD: 0.0005 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 2048 99 | - 1024 100 | BASE_SIZE: 2048 101 | BATCH_SIZE_PER_GPU: 4 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: '../../../../dataset/original_cityscapes/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: '../../../../dataset/pretrained_models/hrnetv2_w48_imagenet_pretrained_top1_21.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 1024 78 | - 512 79 | BASE_SIZE: 2048 80 | BATCH_SIZE_PER_GPU: 3 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 484 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.01 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 16 95 | TEST: 96 | IMAGE_SIZE: 97 | - 2048 98 | - 1024 99 | BASE_SIZE: 2048 100 | BATCH_SIZE_PER_GPU: 4 101 | FLIP_TEST: false 102 | MULTI_SCALE: false 103 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: '../../../../dataset/original_cityscapes/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 1024 78 | - 512 79 | BASE_SIZE: 2048 80 | BATCH_SIZE_PER_GPU: 3 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 484 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.01 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 16 95 | TEST: 96 | IMAGE_SIZE: 97 | - 2048 98 | - 1024 99 | BASE_SIZE: 2048 100 | BATCH_SIZE_PER_GPU: 4 101 | FLIP_TEST: false 102 | MULTI_SCALE: false 103 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: '../../../../dataset/original_cityscapes/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 1024 78 | - 512 79 | BASE_SIZE: 2048 80 | BATCH_SIZE_PER_GPU: 2 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 484 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.01 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 16 95 | TEST: 96 | IMAGE_SIZE: 97 | - 2048 98 | - 1024 99 | BASE_SIZE: 2048 100 | BATCH_SIZE_PER_GPU: 4 101 | FLIP_TEST: false 102 | MULTI_SCALE: false -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cityscapes/seg_hrnet_w48_train_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 1024 78 | - 512 79 | BASE_SIZE: 2048 80 | BATCH_SIZE_PER_GPU: 3 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 484 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.01 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 16 95 | TEST: 96 | IMAGE_SIZE: 97 | - 2048 98 | - 1024 99 | BASE_SIZE: 2048 100 | BATCH_SIZE_PER_GPU: 4 101 | FLIP_TEST: false 102 | MULTI_SCALE: false 103 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cityscapes/seg_hrnet_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/train.lst' 16 | EXTRA_TRAIN_SET: 'list/cityscapes/trainval.lst' 17 | NUM_CLASSES: 19 18 | MODEL: 19 | NAME: seg_hrnet 20 | ALIGN_CORNERS: False 21 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 22 | EXTRA: 23 | FINAL_CONV_KERNEL: 1 24 | STAGE1: 25 | NUM_MODULES: 1 26 | NUM_RANCHES: 1 27 | BLOCK: BOTTLENECK 28 | NUM_BLOCKS: 29 | - 4 30 | NUM_CHANNELS: 31 | - 64 32 | FUSE_METHOD: SUM 33 | STAGE2: 34 | NUM_MODULES: 1 35 | NUM_BRANCHES: 2 36 | BLOCK: BASIC 37 | NUM_BLOCKS: 38 | - 4 39 | - 4 40 | NUM_CHANNELS: 41 | - 48 42 | - 96 43 | FUSE_METHOD: SUM 44 | STAGE3: 45 | NUM_MODULES: 4 46 | NUM_BRANCHES: 3 47 | BLOCK: BASIC 48 | NUM_BLOCKS: 49 | - 4 50 | - 4 51 | - 4 52 | NUM_CHANNELS: 53 | - 48 54 | - 96 55 | - 192 56 | FUSE_METHOD: SUM 57 | STAGE4: 58 | NUM_MODULES: 3 59 | NUM_BRANCHES: 4 60 | BLOCK: BASIC 61 | NUM_BLOCKS: 62 | - 4 63 | - 4 64 | - 4 65 | - 4 66 | NUM_CHANNELS: 67 | - 48 68 | - 96 69 | - 192 70 | - 384 71 | FUSE_METHOD: SUM 72 | LOSS: 73 | USE_OHEM: false 74 | OHEMTHRES: 0.9 75 | OHEMKEEP: 131072 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 3 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | EXTRA_EPOCH: 484 86 | RESUME: true 87 | OPTIMIZER: sgd 88 | LR: 0.01 89 | EXTRA_LR: 0.001 90 | WD: 0.0005 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: 255 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 2048 101 | - 1024 102 | BASE_SIZE: 2048 103 | BATCH_SIZE_PER_GPU: 4 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cityscapes/seg_hrnet_w48_trainval_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/train.lst' 16 | EXTRA_TRAIN_SET: 'list/cityscapes/trainval.lst' 17 | NUM_CLASSES: 19 18 | MODEL: 19 | NAME: seg_hrnet 20 | ALIGN_CORNERS: False 21 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 22 | EXTRA: 23 | FINAL_CONV_KERNEL: 1 24 | STAGE1: 25 | NUM_MODULES: 1 26 | NUM_RANCHES: 1 27 | BLOCK: BOTTLENECK 28 | NUM_BLOCKS: 29 | - 4 30 | NUM_CHANNELS: 31 | - 64 32 | FUSE_METHOD: SUM 33 | STAGE2: 34 | NUM_MODULES: 1 35 | NUM_BRANCHES: 2 36 | BLOCK: BASIC 37 | NUM_BLOCKS: 38 | - 4 39 | - 4 40 | NUM_CHANNELS: 41 | - 48 42 | - 96 43 | FUSE_METHOD: SUM 44 | STAGE3: 45 | NUM_MODULES: 4 46 | NUM_BRANCHES: 3 47 | BLOCK: BASIC 48 | NUM_BLOCKS: 49 | - 4 50 | - 4 51 | - 4 52 | NUM_CHANNELS: 53 | - 48 54 | - 96 55 | - 192 56 | FUSE_METHOD: SUM 57 | STAGE4: 58 | NUM_MODULES: 3 59 | NUM_BRANCHES: 4 60 | BLOCK: BASIC 61 | NUM_BLOCKS: 62 | - 4 63 | - 4 64 | - 4 65 | - 4 66 | NUM_CHANNELS: 67 | - 48 68 | - 96 69 | - 192 70 | - 384 71 | FUSE_METHOD: SUM 72 | LOSS: 73 | USE_OHEM: true 74 | OHEMTHRES: 0.9 75 | OHEMKEEP: 131072 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 3 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | EXTRA_EPOCH: 484 86 | RESUME: true 87 | OPTIMIZER: sgd 88 | LR: 0.01 89 | EXTRA_LR: 0.001 90 | WD: 0.0005 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: 255 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 2048 101 | - 1024 102 | BASE_SIZE: 2048 103 | BATCH_SIZE_PER_GPU: 4 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cocostuff/val.lst' 15 | TRAIN_SET: 'list/cocostuff/train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: '/home/wupenghao/transfuser/dirl/HRNet-Semantic-Segmentation-HRNet-OCR/hrnet_ocr_cocostuff_3965_torch04.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 110 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.001 88 | WD: 0.0001 89 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr'] 90 | NONBACKBONE_MULT: 10 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: 255 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 1 104 | NUM_SAMPLES: 200 105 | FLIP_TEST: false 106 | MULTI_SCALE: false 107 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: '../../../../dataset/coco_stuff_10k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 2 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 110 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.001 88 | WD: 0.0001 89 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr'] 90 | NONBACKBONE_MULT: 10 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: 255 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 1 104 | NUM_SAMPLES: 200 105 | FLIP_TEST: false 106 | MULTI_SCALE: false 107 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cocostuff/seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: '../../../../dataset/coco_stuff_10k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 4 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 110 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.001 87 | WD: 0.0001 88 | NONBACKBONE_KEYWORDS: ['last_layer'] 89 | NONBACKBONE_MULT: 10 90 | MOMENTUM: 0.9 91 | NESTEROV: false 92 | FLIP: true 93 | MULTI_SCALE: true 94 | DOWNSAMPLERATE: 1 95 | IGNORE_LABEL: 255 96 | SCALE_FACTOR: 16 97 | TEST: 98 | IMAGE_SIZE: 99 | - 520 100 | - 520 101 | BASE_SIZE: 520 102 | BATCH_SIZE_PER_GPU: 1 103 | NUM_SAMPLES: 200 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cocostuff/seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: '../../../../dataset/coco_stuff_10k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 2 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 110 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.001 87 | WD: 0.0001 88 | NONBACKBONE_KEYWORDS: ['last_layer'] 89 | NONBACKBONE_MULT: 10 90 | MOMENTUM: 0.9 91 | NESTEROV: false 92 | FLIP: true 93 | MULTI_SCALE: true 94 | DOWNSAMPLERATE: 1 95 | IGNORE_LABEL: 255 96 | SCALE_FACTOR: 16 97 | TEST: 98 | IMAGE_SIZE: 99 | - 520 100 | - 520 101 | BASE_SIZE: 520 102 | BATCH_SIZE_PER_GPU: 1 103 | NUM_SAMPLES: 200 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cocostuff/seg_hrnet_w48_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cocostuff/val.lst' 15 | TRAIN_SET: 'list/cocostuff/train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 4 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 110 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.001 87 | WD: 0.0001 88 | NONBACKBONE_KEYWORDS: ['last_layer'] 89 | NONBACKBONE_MULT: 10 90 | MOMENTUM: 0.9 91 | NESTEROV: false 92 | FLIP: true 93 | MULTI_SCALE: true 94 | DOWNSAMPLERATE: 1 95 | IGNORE_LABEL: 255 96 | SCALE_FACTOR: 16 97 | TEST: 98 | IMAGE_SIZE: 99 | - 520 100 | - 520 101 | BASE_SIZE: 520 102 | BATCH_SIZE_PER_GPU: 1 103 | NUM_SAMPLES: 200 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/lip/seg_hrnet_ocr_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: lip 13 | ROOT: 'data/' 14 | TEST_SET: 'list/lip/valList.txt' 15 | TRAIN_SET: 'list/lip/trainList.txt' 16 | NUM_CLASSES: 20 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained_2.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 473 79 | - 473 80 | BASE_SIZE: 473 81 | BATCH_SIZE_PER_GPU: 10 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 150 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.007 88 | WD: 0.0005 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 11 96 | TEST: 97 | IMAGE_SIZE: 98 | - 473 99 | - 473 100 | BASE_SIZE: 473 101 | BATCH_SIZE_PER_GPU: 10 102 | NUM_SAMPLES: 2000 103 | FLIP_TEST: false 104 | MULTI_SCALE: false 105 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: lip 13 | ROOT: 'data/' 14 | TEST_SET: 'list/lip/valList.txt' 15 | TRAIN_SET: 'list/lip/trainList.txt' 16 | NUM_CLASSES: 20 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 473 78 | - 473 79 | BASE_SIZE: 473 80 | BATCH_SIZE_PER_GPU: 10 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 150 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.007 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 473 98 | - 473 99 | BASE_SIZE: 473 100 | BATCH_SIZE_PER_GPU: 16 101 | NUM_SAMPLES: 2000 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: lip 13 | ROOT: '../../../../dataset/lip/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 20 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 473 78 | - 473 79 | BASE_SIZE: 473 80 | BATCH_SIZE_PER_GPU: 10 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 150 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.007 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 473 98 | - 473 99 | BASE_SIZE: 473 100 | BATCH_SIZE_PER_GPU: 8 101 | NUM_SAMPLES: 2000 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/pascal_ctx/seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: pascal_ctx 13 | ROOT: 'data/' 14 | TEST_SET: 'val' 15 | TRAIN_SET: 'train' 16 | NUM_CLASSES: 59 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr'] 83 | NONBACKBONE_MULT: 10 84 | SHUFFLE: true 85 | BEGIN_EPOCH: 0 86 | END_EPOCH: 200 87 | RESUME: true 88 | OPTIMIZER: sgd 89 | LR: 0.001 90 | WD: 0.0001 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: -1 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 16 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/pascal_ctx/seg_hrnet_ocr_w48_cls60_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: pascal_ctx 13 | ROOT: 'data/' 14 | TEST_SET: 'val' 15 | TRAIN_SET: 'train' 16 | NUM_CLASSES: 60 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr'] 83 | NONBACKBONE_MULT: 10 84 | SHUFFLE: true 85 | BEGIN_EPOCH: 0 86 | END_EPOCH: 200 87 | RESUME: true 88 | OPTIMIZER: sgd 89 | LR: 0.001 90 | WD: 0.0001 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: -1 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 16 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/pascal_ctx/seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: pascal_ctx 13 | ROOT: 'data/' 14 | TEST_SET: 'val' 15 | TRAIN_SET: 'train' 16 | NUM_CLASSES: 59 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | NUM_OUTPUTS: 1 21 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 22 | EXTRA: 23 | FINAL_CONV_KERNEL: 1 24 | STAGE1: 25 | NUM_MODULES: 1 26 | NUM_RANCHES: 1 27 | BLOCK: BOTTLENECK 28 | NUM_BLOCKS: 29 | - 4 30 | NUM_CHANNELS: 31 | - 64 32 | FUSE_METHOD: SUM 33 | STAGE2: 34 | NUM_MODULES: 1 35 | NUM_BRANCHES: 2 36 | BLOCK: BASIC 37 | NUM_BLOCKS: 38 | - 4 39 | - 4 40 | NUM_CHANNELS: 41 | - 48 42 | - 96 43 | FUSE_METHOD: SUM 44 | STAGE3: 45 | NUM_MODULES: 4 46 | NUM_BRANCHES: 3 47 | BLOCK: BASIC 48 | NUM_BLOCKS: 49 | - 4 50 | - 4 51 | - 4 52 | NUM_CHANNELS: 53 | - 48 54 | - 96 55 | - 192 56 | FUSE_METHOD: SUM 57 | STAGE4: 58 | NUM_MODULES: 3 59 | NUM_BRANCHES: 4 60 | BLOCK: BASIC 61 | NUM_BLOCKS: 62 | - 4 63 | - 4 64 | - 4 65 | - 4 66 | NUM_CHANNELS: 67 | - 48 68 | - 96 69 | - 192 70 | - 384 71 | FUSE_METHOD: SUM 72 | LOSS: 73 | USE_OHEM: false 74 | OHEMTHRES: 0.9 75 | OHEMKEEP: 131072 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | NONBACKBONE_KEYWORDS: ['last_layer'] 83 | NONBACKBONE_MULT: 10 84 | SHUFFLE: true 85 | BEGIN_EPOCH: 0 86 | END_EPOCH: 200 87 | RESUME: true 88 | OPTIMIZER: sgd 89 | LR: 0.001 90 | WD: 0.0001 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: -1 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 16 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/experiments/pascal_ctx/seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: pascal_ctx 13 | ROOT: '../../../../dataset/pascal_context/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 59 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | NUM_OUTPUTS: 1 21 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 22 | EXTRA: 23 | FINAL_CONV_KERNEL: 1 24 | STAGE1: 25 | NUM_MODULES: 1 26 | NUM_RANCHES: 1 27 | BLOCK: BOTTLENECK 28 | NUM_BLOCKS: 29 | - 4 30 | NUM_CHANNELS: 31 | - 64 32 | FUSE_METHOD: SUM 33 | STAGE2: 34 | NUM_MODULES: 1 35 | NUM_BRANCHES: 2 36 | BLOCK: BASIC 37 | NUM_BLOCKS: 38 | - 4 39 | - 4 40 | NUM_CHANNELS: 41 | - 48 42 | - 96 43 | FUSE_METHOD: SUM 44 | STAGE3: 45 | NUM_MODULES: 4 46 | NUM_BRANCHES: 3 47 | BLOCK: BASIC 48 | NUM_BLOCKS: 49 | - 4 50 | - 4 51 | - 4 52 | NUM_CHANNELS: 53 | - 48 54 | - 96 55 | - 192 56 | FUSE_METHOD: SUM 57 | STAGE4: 58 | NUM_MODULES: 3 59 | NUM_BRANCHES: 4 60 | BLOCK: BASIC 61 | NUM_BLOCKS: 62 | - 4 63 | - 4 64 | - 4 65 | - 4 66 | NUM_CHANNELS: 67 | - 48 68 | - 96 69 | - 192 70 | - 384 71 | FUSE_METHOD: SUM 72 | LOSS: 73 | USE_OHEM: false 74 | OHEMTHRES: 0.9 75 | OHEMKEEP: 131072 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | NONBACKBONE_KEYWORDS: ['last_layer'] 83 | NONBACKBONE_MULT: 10 84 | SHUFFLE: true 85 | BEGIN_EPOCH: 0 86 | END_EPOCH: 200 87 | RESUME: true 88 | OPTIMIZER: sgd 89 | LR: 0.001 90 | WD: 0.0001 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: -1 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 16 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/figures/OCR.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/AustNet-Inharmonious-Region-Localization/a02b53ae85b2991829bad84173d5334d2774dd02/HRNet-Semantic-Segmentation-HRNet-OCR/figures/OCR.PNG -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/figures/SegmentationTransformerOCR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/AustNet-Inharmonious-Region-Localization/a02b53ae85b2991829bad84173d5334d2774dd02/HRNet-Semantic-Segmentation-HRNet-OCR/figures/SegmentationTransformerOCR.png -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/figures/SegmentationTransformerOCR1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/AustNet-Inharmonious-Region-Localization/a02b53ae85b2991829bad84173d5334d2774dd02/HRNet-Semantic-Segmentation-HRNet-OCR/figures/SegmentationTransformerOCR1.png -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/figures/SegmentationTransformerOCR2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/AustNet-Inharmonious-Region-Localization/a02b53ae85b2991829bad84173d5334d2774dd02/HRNet-Semantic-Segmentation-HRNet-OCR/figures/SegmentationTransformerOCR2.png -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/figures/seg-hrnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/AustNet-Inharmonious-Region-Localization/a02b53ae85b2991829bad84173d5334d2774dd02/HRNet-Semantic-Segmentation-HRNet-OCR/figures/seg-hrnet.png -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/hubconf.py: -------------------------------------------------------------------------------- 1 | """File for accessing HRNet via PyTorch Hub https://pytorch.org/hub/ 2 | 3 | Usage: 4 | import torch 5 | model = torch.hub.load('AlexeyAB/PyTorch_YOLOv4:u5_preview', 'yolov4_pacsp_s', pretrained=True, channels=3, classes=80) 6 | """ 7 | 8 | dependencies = ['torch'] 9 | import torch 10 | from lib.models.seg_hrnet import get_seg_model 11 | 12 | 13 | state_dict_url = 'https://github.com/huawei-noah/ghostnet/raw/master/pytorch/models/state_dict_93.98.pth' 14 | 15 | 16 | def hrnet_w48_cityscapes(pretrained=False, **kwargs): 17 | """ # This docstring shows up in hub.help() 18 | HRNetW48 model pretrained on Cityscapes 19 | pretrained (bool): kwargs, load pretrained weights into the model 20 | """ 21 | model = ghostnet(num_classes=1000, width=1.0, dropout=0.2) 22 | if pretrained: 23 | state_dict = torch.hub.load_state_dict_from_url(state_dict_url, progress=True) 24 | model.load_state_dict(state_dict) 25 | return model -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from .default import _C as config 11 | from .default import update_config 12 | from .models import MODEL_EXTRAS 13 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/config/default.py: -------------------------------------------------------------------------------- 1 | 2 | # ------------------------------------------------------------------------------ 3 | # Copyright (c) Microsoft 4 | # Licensed under the MIT License. 5 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | 14 | from yacs.config import CfgNode as CN 15 | 16 | 17 | _C = CN() 18 | 19 | _C.OUTPUT_DIR = '' 20 | _C.LOG_DIR = '' 21 | _C.GPUS = (0,) 22 | _C.WORKERS = 4 23 | _C.PRINT_FREQ = 20 24 | _C.AUTO_RESUME = False 25 | _C.PIN_MEMORY = True 26 | _C.RANK = 0 27 | 28 | # Cudnn related params 29 | _C.CUDNN = CN() 30 | _C.CUDNN.BENCHMARK = True 31 | _C.CUDNN.DETERMINISTIC = False 32 | _C.CUDNN.ENABLED = True 33 | 34 | # common params for NETWORK 35 | _C.MODEL = CN() 36 | _C.MODEL.NAME = 'seg_hrnet' 37 | _C.MODEL.PRETRAINED = '/home/wupenghao/transfuser/dirl/HRNet-Semantic-Segmentation-HRNet-OCR/hrnet_ocr_cocostuff_3965_torch04.pth' 38 | _C.MODEL.ALIGN_CORNERS = True 39 | _C.MODEL.NUM_OUTPUTS = 1 40 | _C.MODEL.EXTRA = CN(new_allowed=True) 41 | 42 | 43 | _C.MODEL.OCR = CN() 44 | _C.MODEL.OCR.MID_CHANNELS = 512 45 | _C.MODEL.OCR.KEY_CHANNELS = 256 46 | _C.MODEL.OCR.DROPOUT = 0.05 47 | _C.MODEL.OCR.SCALE = 1 48 | 49 | _C.LOSS = CN() 50 | _C.LOSS.USE_OHEM = False 51 | _C.LOSS.OHEMTHRES = 0.9 52 | _C.LOSS.OHEMKEEP = 100000 53 | _C.LOSS.CLASS_BALANCE = False 54 | _C.LOSS.BALANCE_WEIGHTS = [1] 55 | 56 | # DATASET related params 57 | _C.DATASET = CN() 58 | _C.DATASET.ROOT = '' 59 | _C.DATASET.DATASET = 'cityscapes' 60 | _C.DATASET.NUM_CLASSES = 19 61 | _C.DATASET.TRAIN_SET = 'list/cityscapes/train.lst' 62 | _C.DATASET.EXTRA_TRAIN_SET = '' 63 | _C.DATASET.TEST_SET = 'list/cityscapes/val.lst' 64 | 65 | # training 66 | _C.TRAIN = CN() 67 | 68 | _C.TRAIN.FREEZE_LAYERS = '' 69 | _C.TRAIN.FREEZE_EPOCHS = -1 70 | _C.TRAIN.NONBACKBONE_KEYWORDS = [] 71 | _C.TRAIN.NONBACKBONE_MULT = 10 72 | 73 | _C.TRAIN.IMAGE_SIZE = [1024, 512] # width * height 74 | _C.TRAIN.BASE_SIZE = 2048 75 | _C.TRAIN.DOWNSAMPLERATE = 1 76 | _C.TRAIN.FLIP = True 77 | _C.TRAIN.MULTI_SCALE = True 78 | _C.TRAIN.SCALE_FACTOR = 16 79 | 80 | _C.TRAIN.RANDOM_BRIGHTNESS = False 81 | _C.TRAIN.RANDOM_BRIGHTNESS_SHIFT_VALUE = 10 82 | 83 | _C.TRAIN.LR_FACTOR = 0.1 84 | _C.TRAIN.LR_STEP = [90, 110] 85 | _C.TRAIN.LR = 0.01 86 | _C.TRAIN.EXTRA_LR = 0.001 87 | 88 | _C.TRAIN.OPTIMIZER = 'sgd' 89 | _C.TRAIN.MOMENTUM = 0.9 90 | _C.TRAIN.WD = 0.0001 91 | _C.TRAIN.NESTEROV = False 92 | _C.TRAIN.IGNORE_LABEL = -1 93 | 94 | _C.TRAIN.BEGIN_EPOCH = 0 95 | _C.TRAIN.END_EPOCH = 484 96 | _C.TRAIN.EXTRA_EPOCH = 0 97 | 98 | _C.TRAIN.RESUME = False 99 | 100 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32 101 | _C.TRAIN.SHUFFLE = True 102 | # only using some training samples 103 | _C.TRAIN.NUM_SAMPLES = 0 104 | 105 | # testing 106 | _C.TEST = CN() 107 | 108 | _C.TEST.IMAGE_SIZE = [2048, 1024] # width * height 109 | _C.TEST.BASE_SIZE = 2048 110 | 111 | _C.TEST.BATCH_SIZE_PER_GPU = 32 112 | # only testing some samples 113 | _C.TEST.NUM_SAMPLES = 0 114 | 115 | _C.TEST.MODEL_FILE = '../hrnet_ocr_cocostuff_3965_torch04.pth' 116 | _C.TEST.FLIP_TEST = False 117 | _C.TEST.MULTI_SCALE = False 118 | _C.TEST.SCALE_LIST = [1] 119 | 120 | _C.TEST.OUTPUT_INDEX = -1 121 | 122 | # debug 123 | _C.DEBUG = CN() 124 | _C.DEBUG.DEBUG = False 125 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 126 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 127 | _C.DEBUG.SAVE_HEATMAPS_GT = False 128 | _C.DEBUG.SAVE_HEATMAPS_PRED = False 129 | 130 | 131 | def update_config(cfg, args): 132 | cfg.defrost() 133 | 134 | cfg.merge_from_file(args.cfg) 135 | cfg.merge_from_list(args.opts) 136 | 137 | cfg.freeze() 138 | 139 | 140 | if __name__ == '__main__': 141 | import sys 142 | with open(sys.argv[1], 'w') as f: 143 | print(_C, file=f) 144 | 145 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/config/hrnet_config.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Create by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn), Rainbowsecret (yuyua@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | from yacs.config import CfgNode as CN 13 | 14 | 15 | # configs for HRNet48 16 | HRNET_48 = CN() 17 | HRNET_48.FINAL_CONV_KERNEL = 1 18 | 19 | HRNET_48.STAGE1 = CN() 20 | HRNET_48.STAGE1.NUM_MODULES = 1 21 | HRNET_48.STAGE1.NUM_BRANCHES = 1 22 | HRNET_48.STAGE1.NUM_BLOCKS = [4] 23 | HRNET_48.STAGE1.NUM_CHANNELS = [64] 24 | HRNET_48.STAGE1.BLOCK = 'BOTTLENECK' 25 | HRNET_48.STAGE1.FUSE_METHOD = 'SUM' 26 | 27 | HRNET_48.STAGE2 = CN() 28 | HRNET_48.STAGE2.NUM_MODULES = 1 29 | HRNET_48.STAGE2.NUM_BRANCHES = 2 30 | HRNET_48.STAGE2.NUM_BLOCKS = [4, 4] 31 | HRNET_48.STAGE2.NUM_CHANNELS = [48, 96] 32 | HRNET_48.STAGE2.BLOCK = 'BASIC' 33 | HRNET_48.STAGE2.FUSE_METHOD = 'SUM' 34 | 35 | HRNET_48.STAGE3 = CN() 36 | HRNET_48.STAGE3.NUM_MODULES = 4 37 | HRNET_48.STAGE3.NUM_BRANCHES = 3 38 | HRNET_48.STAGE3.NUM_BLOCKS = [4, 4, 4] 39 | HRNET_48.STAGE3.NUM_CHANNELS = [48, 96, 192] 40 | HRNET_48.STAGE3.BLOCK = 'BASIC' 41 | HRNET_48.STAGE3.FUSE_METHOD = 'SUM' 42 | 43 | HRNET_48.STAGE4 = CN() 44 | HRNET_48.STAGE4.NUM_MODULES = 3 45 | HRNET_48.STAGE4.NUM_BRANCHES = 4 46 | HRNET_48.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 47 | HRNET_48.STAGE4.NUM_CHANNELS = [48, 96, 192, 384] 48 | HRNET_48.STAGE4.BLOCK = 'BASIC' 49 | HRNET_48.STAGE4.FUSE_METHOD = 'SUM' 50 | 51 | 52 | # configs for HRNet32 53 | HRNET_32 = CN() 54 | HRNET_32.FINAL_CONV_KERNEL = 1 55 | 56 | HRNET_32.STAGE1 = CN() 57 | HRNET_32.STAGE1.NUM_MODULES = 1 58 | HRNET_32.STAGE1.NUM_BRANCHES = 1 59 | HRNET_32.STAGE1.NUM_BLOCKS = [4] 60 | HRNET_32.STAGE1.NUM_CHANNELS = [64] 61 | HRNET_32.STAGE1.BLOCK = 'BOTTLENECK' 62 | HRNET_32.STAGE1.FUSE_METHOD = 'SUM' 63 | 64 | HRNET_32.STAGE2 = CN() 65 | HRNET_32.STAGE2.NUM_MODULES = 1 66 | HRNET_32.STAGE2.NUM_BRANCHES = 2 67 | HRNET_32.STAGE2.NUM_BLOCKS = [4, 4] 68 | HRNET_32.STAGE2.NUM_CHANNELS = [32, 64] 69 | HRNET_32.STAGE2.BLOCK = 'BASIC' 70 | HRNET_32.STAGE2.FUSE_METHOD = 'SUM' 71 | 72 | HRNET_32.STAGE3 = CN() 73 | HRNET_32.STAGE3.NUM_MODULES = 4 74 | HRNET_32.STAGE3.NUM_BRANCHES = 3 75 | HRNET_32.STAGE3.NUM_BLOCKS = [4, 4, 4] 76 | HRNET_32.STAGE3.NUM_CHANNELS = [32, 64, 128] 77 | HRNET_32.STAGE3.BLOCK = 'BASIC' 78 | HRNET_32.STAGE3.FUSE_METHOD = 'SUM' 79 | 80 | HRNET_32.STAGE4 = CN() 81 | HRNET_32.STAGE4.NUM_MODULES = 3 82 | HRNET_32.STAGE4.NUM_BRANCHES = 4 83 | HRNET_32.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 84 | HRNET_32.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 85 | HRNET_32.STAGE4.BLOCK = 'BASIC' 86 | HRNET_32.STAGE4.FUSE_METHOD = 'SUM' 87 | 88 | 89 | # configs for HRNet18 90 | HRNET_18 = CN() 91 | HRNET_18.FINAL_CONV_KERNEL = 1 92 | 93 | HRNET_18.STAGE1 = CN() 94 | HRNET_18.STAGE1.NUM_MODULES = 1 95 | HRNET_18.STAGE1.NUM_BRANCHES = 1 96 | HRNET_18.STAGE1.NUM_BLOCKS = [4] 97 | HRNET_18.STAGE1.NUM_CHANNELS = [64] 98 | HRNET_18.STAGE1.BLOCK = 'BOTTLENECK' 99 | HRNET_18.STAGE1.FUSE_METHOD = 'SUM' 100 | 101 | HRNET_18.STAGE2 = CN() 102 | HRNET_18.STAGE2.NUM_MODULES = 1 103 | HRNET_18.STAGE2.NUM_BRANCHES = 2 104 | HRNET_18.STAGE2.NUM_BLOCKS = [4, 4] 105 | HRNET_18.STAGE2.NUM_CHANNELS = [18, 36] 106 | HRNET_18.STAGE2.BLOCK = 'BASIC' 107 | HRNET_18.STAGE2.FUSE_METHOD = 'SUM' 108 | 109 | HRNET_18.STAGE3 = CN() 110 | HRNET_18.STAGE3.NUM_MODULES = 4 111 | HRNET_18.STAGE3.NUM_BRANCHES = 3 112 | HRNET_18.STAGE3.NUM_BLOCKS = [4, 4, 4] 113 | HRNET_18.STAGE3.NUM_CHANNELS = [18, 36, 72] 114 | HRNET_18.STAGE3.BLOCK = 'BASIC' 115 | HRNET_18.STAGE3.FUSE_METHOD = 'SUM' 116 | 117 | HRNET_18.STAGE4 = CN() 118 | HRNET_18.STAGE4.NUM_MODULES = 3 119 | HRNET_18.STAGE4.NUM_BRANCHES = 4 120 | HRNET_18.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 121 | HRNET_18.STAGE4.NUM_CHANNELS = [18, 36, 72, 144] 122 | HRNET_18.STAGE4.BLOCK = 'BASIC' 123 | HRNET_18.STAGE4.FUSE_METHOD = 'SUM' 124 | 125 | 126 | MODEL_CONFIGS = { 127 | 'hrnet18': HRNET_18, 128 | 'hrnet32': HRNET_32, 129 | 'hrnet48': HRNET_48, 130 | } -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | # high_resoluton_net related params for segmentation 14 | HIGH_RESOLUTION_NET = CN() 15 | HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 16 | HIGH_RESOLUTION_NET.STEM_INPLANES = 64 17 | HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 18 | HIGH_RESOLUTION_NET.WITH_HEAD = True 19 | 20 | HIGH_RESOLUTION_NET.STAGE2 = CN() 21 | HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 22 | HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 23 | HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 24 | HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64] 25 | HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC' 26 | HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 27 | 28 | HIGH_RESOLUTION_NET.STAGE3 = CN() 29 | HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 30 | HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 31 | HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 32 | HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128] 33 | HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC' 34 | HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 35 | 36 | HIGH_RESOLUTION_NET.STAGE4 = CN() 37 | HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 38 | HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 39 | HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 40 | HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 41 | HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC' 42 | HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 43 | 44 | MODEL_EXTRAS = { 45 | 'seg_hrnet': HIGH_RESOLUTION_NET, 46 | } 47 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/core/criterion.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import torch 8 | import torch.nn as nn 9 | from torch.nn import functional as F 10 | import logging 11 | from config import config 12 | 13 | 14 | class CrossEntropy(nn.Module): 15 | def __init__(self, ignore_label=-1, weight=None): 16 | super(CrossEntropy, self).__init__() 17 | self.ignore_label = ignore_label 18 | self.criterion = nn.CrossEntropyLoss( 19 | weight=weight, 20 | ignore_index=ignore_label 21 | ) 22 | 23 | def _forward(self, score, target): 24 | ph, pw = score.size(2), score.size(3) 25 | h, w = target.size(1), target.size(2) 26 | if ph != h or pw != w: 27 | score = F.interpolate(input=score, size=( 28 | h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS) 29 | 30 | loss = self.criterion(score, target) 31 | 32 | return loss 33 | 34 | def forward(self, score, target): 35 | 36 | if config.MODEL.NUM_OUTPUTS == 1: 37 | score = [score] 38 | 39 | weights = config.LOSS.BALANCE_WEIGHTS 40 | assert len(weights) == len(score) 41 | 42 | return sum([w * self._forward(x, target) for (w, x) in zip(weights, score)]) 43 | 44 | 45 | class OhemCrossEntropy(nn.Module): 46 | def __init__(self, ignore_label=-1, thres=0.7, 47 | min_kept=100000, weight=None): 48 | super(OhemCrossEntropy, self).__init__() 49 | self.thresh = thres 50 | self.min_kept = max(1, min_kept) 51 | self.ignore_label = ignore_label 52 | self.criterion = nn.CrossEntropyLoss( 53 | weight=weight, 54 | ignore_index=ignore_label, 55 | reduction='none' 56 | ) 57 | 58 | def _ce_forward(self, score, target): 59 | ph, pw = score.size(2), score.size(3) 60 | h, w = target.size(1), target.size(2) 61 | if ph != h or pw != w: 62 | score = F.interpolate(input=score, size=( 63 | h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS) 64 | 65 | loss = self.criterion(score, target) 66 | 67 | return loss 68 | 69 | def _ohem_forward(self, score, target, **kwargs): 70 | ph, pw = score.size(2), score.size(3) 71 | h, w = target.size(1), target.size(2) 72 | if ph != h or pw != w: 73 | score = F.interpolate(input=score, size=( 74 | h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS) 75 | pred = F.softmax(score, dim=1) 76 | pixel_losses = self.criterion(score, target).contiguous().view(-1) 77 | mask = target.contiguous().view(-1) != self.ignore_label 78 | 79 | tmp_target = target.clone() 80 | tmp_target[tmp_target == self.ignore_label] = 0 81 | pred = pred.gather(1, tmp_target.unsqueeze(1)) 82 | pred, ind = pred.contiguous().view(-1,)[mask].contiguous().sort() 83 | min_value = pred[min(self.min_kept, pred.numel() - 1)] 84 | threshold = max(min_value, self.thresh) 85 | 86 | pixel_losses = pixel_losses[mask][ind] 87 | pixel_losses = pixel_losses[pred < threshold] 88 | return pixel_losses.mean() 89 | 90 | def forward(self, score, target): 91 | 92 | if config.MODEL.NUM_OUTPUTS == 1: 93 | score = [score] 94 | 95 | weights = config.LOSS.BALANCE_WEIGHTS 96 | assert len(weights) == len(score) 97 | 98 | functions = [self._ce_forward] * \ 99 | (len(weights) - 1) + [self._ohem_forward] 100 | return sum([ 101 | w * func(x, target) 102 | for (w, x, func) in zip(weights, score, functions) 103 | ]) 104 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/core/function.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import logging 8 | import os 9 | import time 10 | 11 | import numpy as np 12 | import numpy.ma as ma 13 | from tqdm import tqdm 14 | 15 | import torch 16 | import torch.nn as nn 17 | from torch.nn import functional as F 18 | 19 | from utils.utils import AverageMeter 20 | from utils.utils import get_confusion_matrix 21 | from utils.utils import adjust_learning_rate 22 | 23 | import utils.distributed as dist 24 | 25 | 26 | def reduce_tensor(inp): 27 | """ 28 | Reduce the loss from all processes so that 29 | process with rank 0 has the averaged results. 30 | """ 31 | world_size = dist.get_world_size() 32 | if world_size < 2: 33 | return inp 34 | with torch.no_grad(): 35 | reduced_inp = inp 36 | torch.distributed.reduce(reduced_inp, dst=0) 37 | return reduced_inp / world_size 38 | 39 | 40 | def train(config, epoch, num_epoch, epoch_iters, base_lr, 41 | num_iters, trainloader, optimizer, model, writer_dict): 42 | # Training 43 | model.train() 44 | 45 | batch_time = AverageMeter() 46 | ave_loss = AverageMeter() 47 | tic = time.time() 48 | cur_iters = epoch*epoch_iters 49 | writer = writer_dict['writer'] 50 | global_steps = writer_dict['train_global_steps'] 51 | 52 | for i_iter, batch in enumerate(trainloader, 0): 53 | images, labels, _, _ = batch 54 | images = images.cuda() 55 | labels = labels.long().cuda() 56 | 57 | losses, _ = model(images, labels) 58 | loss = losses.mean() 59 | 60 | if dist.is_distributed(): 61 | reduced_loss = reduce_tensor(loss) 62 | else: 63 | reduced_loss = loss 64 | 65 | model.zero_grad() 66 | loss.backward() 67 | optimizer.step() 68 | 69 | # measure elapsed time 70 | batch_time.update(time.time() - tic) 71 | tic = time.time() 72 | 73 | # update average loss 74 | ave_loss.update(reduced_loss.item()) 75 | 76 | lr = adjust_learning_rate(optimizer, 77 | base_lr, 78 | num_iters, 79 | i_iter+cur_iters) 80 | 81 | if i_iter % config.PRINT_FREQ == 0 and dist.get_rank() == 0: 82 | msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \ 83 | 'lr: {}, Loss: {:.6f}' .format( 84 | epoch, num_epoch, i_iter, epoch_iters, 85 | batch_time.average(), [x['lr'] for x in optimizer.param_groups], ave_loss.average()) 86 | logging.info(msg) 87 | 88 | writer.add_scalar('train_loss', ave_loss.average(), global_steps) 89 | writer_dict['train_global_steps'] = global_steps + 1 90 | 91 | def validate(config, testloader, model, writer_dict): 92 | model.eval() 93 | ave_loss = AverageMeter() 94 | nums = config.MODEL.NUM_OUTPUTS 95 | confusion_matrix = np.zeros( 96 | (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES, nums)) 97 | with torch.no_grad(): 98 | for idx, batch in enumerate(testloader): 99 | image, label, _, _ = batch 100 | size = label.size() 101 | image = image.cuda() 102 | label = label.long().cuda() 103 | 104 | losses, pred = model(image, label) 105 | if not isinstance(pred, (list, tuple)): 106 | pred = [pred] 107 | for i, x in enumerate(pred): 108 | x = F.interpolate( 109 | input=x, size=size[-2:], 110 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 111 | ) 112 | 113 | confusion_matrix[..., i] += get_confusion_matrix( 114 | label, 115 | x, 116 | size, 117 | config.DATASET.NUM_CLASSES, 118 | config.TRAIN.IGNORE_LABEL 119 | ) 120 | 121 | if idx % 10 == 0: 122 | print(idx) 123 | 124 | loss = losses.mean() 125 | if dist.is_distributed(): 126 | reduced_loss = reduce_tensor(loss) 127 | else: 128 | reduced_loss = loss 129 | ave_loss.update(reduced_loss.item()) 130 | 131 | if dist.is_distributed(): 132 | confusion_matrix = torch.from_numpy(confusion_matrix).cuda() 133 | reduced_confusion_matrix = reduce_tensor(confusion_matrix) 134 | confusion_matrix = reduced_confusion_matrix.cpu().numpy() 135 | 136 | for i in range(nums): 137 | pos = confusion_matrix[..., i].sum(1) 138 | res = confusion_matrix[..., i].sum(0) 139 | tp = np.diag(confusion_matrix[..., i]) 140 | IoU_array = (tp / np.maximum(1.0, pos + res - tp)) 141 | mean_IoU = IoU_array.mean() 142 | if dist.get_rank() <= 0: 143 | logging.info('{} {} {}'.format(i, IoU_array, mean_IoU)) 144 | 145 | writer = writer_dict['writer'] 146 | global_steps = writer_dict['valid_global_steps'] 147 | writer.add_scalar('valid_loss', ave_loss.average(), global_steps) 148 | writer.add_scalar('valid_mIoU', mean_IoU, global_steps) 149 | writer_dict['valid_global_steps'] = global_steps + 1 150 | return ave_loss.average(), mean_IoU, IoU_array 151 | 152 | 153 | def testval(config, test_dataset, testloader, model, 154 | sv_dir='', sv_pred=False): 155 | model.eval() 156 | confusion_matrix = np.zeros( 157 | (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES)) 158 | with torch.no_grad(): 159 | for index, batch in enumerate(tqdm(testloader)): 160 | image, label, _, name, *border_padding = batch 161 | size = label.size() 162 | pred = test_dataset.multi_scale_inference( 163 | config, 164 | model, 165 | image, 166 | scales=config.TEST.SCALE_LIST, 167 | flip=config.TEST.FLIP_TEST) 168 | 169 | if len(border_padding) > 0: 170 | border_padding = border_padding[0] 171 | pred = pred[:, :, 0:pred.size(2) - border_padding[0], 0:pred.size(3) - border_padding[1]] 172 | 173 | if pred.size()[-2] != size[-2] or pred.size()[-1] != size[-1]: 174 | pred = F.interpolate( 175 | pred, size[-2:], 176 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 177 | ) 178 | 179 | confusion_matrix += get_confusion_matrix( 180 | label, 181 | pred, 182 | size, 183 | config.DATASET.NUM_CLASSES, 184 | config.TRAIN.IGNORE_LABEL) 185 | 186 | if sv_pred: 187 | sv_path = os.path.join(sv_dir, 'test_results') 188 | if not os.path.exists(sv_path): 189 | os.mkdir(sv_path) 190 | test_dataset.save_pred(pred, sv_path, name) 191 | 192 | if index % 100 == 0: 193 | logging.info('processing: %d images' % index) 194 | pos = confusion_matrix.sum(1) 195 | res = confusion_matrix.sum(0) 196 | tp = np.diag(confusion_matrix) 197 | IoU_array = (tp / np.maximum(1.0, pos + res - tp)) 198 | mean_IoU = IoU_array.mean() 199 | logging.info('mIoU: %.4f' % (mean_IoU)) 200 | 201 | pos = confusion_matrix.sum(1) 202 | res = confusion_matrix.sum(0) 203 | tp = np.diag(confusion_matrix) 204 | pixel_acc = tp.sum()/pos.sum() 205 | mean_acc = (tp/np.maximum(1.0, pos)).mean() 206 | IoU_array = (tp / np.maximum(1.0, pos + res - tp)) 207 | mean_IoU = IoU_array.mean() 208 | 209 | return mean_IoU, IoU_array, pixel_acc, mean_acc 210 | 211 | 212 | def test(config, test_dataset, testloader, model, 213 | sv_dir='', sv_pred=True): 214 | model.eval() 215 | with torch.no_grad(): 216 | for _, batch in enumerate(tqdm(testloader)): 217 | image, size, name = batch 218 | size = size[0] 219 | pred = test_dataset.multi_scale_inference( 220 | config, 221 | model, 222 | image, 223 | scales=config.TEST.SCALE_LIST, 224 | flip=config.TEST.FLIP_TEST) 225 | 226 | if pred.size()[-2] != size[0] or pred.size()[-1] != size[1]: 227 | pred = F.interpolate( 228 | pred, size[-2:], 229 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 230 | ) 231 | 232 | if sv_pred: 233 | sv_path = os.path.join(sv_dir, 'test_results') 234 | if not os.path.exists(sv_path): 235 | os.mkdir(sv_path) 236 | test_dataset.save_pred(pred, sv_path, name) 237 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from .cityscapes import Cityscapes as cityscapes 12 | from .lip import LIP as lip 13 | from .pascal_ctx import PASCALContext as pascal_ctx 14 | from .ade20k import ADE20K as ade20k 15 | from .cocostuff import COCOStuff as cocostuff -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | 12 | import torch 13 | from torch.nn import functional as F 14 | from PIL import Image 15 | 16 | from .base_dataset import BaseDataset 17 | 18 | 19 | class ADE20K(BaseDataset): 20 | def __init__(self, 21 | root, 22 | list_path, 23 | num_samples=None, 24 | num_classes=150, 25 | multi_scale=True, 26 | flip=True, 27 | ignore_label=-1, 28 | base_size=520, 29 | crop_size=(520, 520), 30 | downsample_rate=1, 31 | scale_factor=11, 32 | mean=[0.485, 0.456, 0.406], 33 | std=[0.229, 0.224, 0.225]): 34 | 35 | super(ADE20K, self).__init__(ignore_label, base_size, 36 | crop_size, downsample_rate, scale_factor, mean, std) 37 | 38 | self.root = root 39 | self.num_classes = num_classes 40 | self.list_path = list_path 41 | self.class_weights = None 42 | 43 | self.multi_scale = multi_scale 44 | self.flip = flip 45 | self.img_list = [line.strip().split() for line in open(root+list_path)] 46 | 47 | self.files = self.read_files() 48 | if num_samples: 49 | self.files = self.files[:num_samples] 50 | 51 | def read_files(self): 52 | files = [] 53 | for item in self.img_list: 54 | image_path, label_path = item 55 | name = os.path.splitext(os.path.basename(label_path))[0] 56 | sample = { 57 | 'img': image_path, 58 | 'label': label_path, 59 | 'name': name 60 | } 61 | files.append(sample) 62 | return files 63 | 64 | def resize_image(self, image, label, size): 65 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR) 66 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST) 67 | return image, label 68 | 69 | def __getitem__(self, index): 70 | item = self.files[index] 71 | name = item["name"] 72 | # image_path = os.path.join(self.root, 'ade20k', item['img']) 73 | # label_path = os.path.join(self.root, 'ade20k', item['label']) 74 | image_path = os.path.join(self.root, item['img']) 75 | label_path = os.path.join(self.root, item['label']) 76 | image = cv2.imread( 77 | image_path, 78 | cv2.IMREAD_COLOR 79 | ) 80 | label = np.array( 81 | Image.open(label_path).convert('P') 82 | ) 83 | label = self.reduce_zero_label(label) 84 | size = label.shape 85 | 86 | if 'testval' in self.list_path: 87 | image = self.resize_short_length( 88 | image, 89 | short_length=self.base_size, 90 | fit_stride=8 91 | ) 92 | image = self.input_transform(image) 93 | image = image.transpose((2, 0, 1)) 94 | 95 | return image.copy(), label.copy(), np.array(size), name 96 | 97 | if 'val' in self.list_path: 98 | image, label = self.resize_short_length( 99 | image, 100 | label=label, 101 | short_length=self.base_size, 102 | fit_stride=8 103 | ) 104 | image, label = self.rand_crop(image, label) 105 | image = self.input_transform(image) 106 | image = image.transpose((2, 0, 1)) 107 | 108 | return image.copy(), label.copy(), np.array(size), name 109 | 110 | image, label = self.resize_short_length(image, label, short_length=self.base_size) 111 | image, label = self.gen_sample(image, label, self.multi_scale, self.flip) 112 | 113 | return image.copy(), label.copy(), np.array(size), name -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | from PIL import Image 12 | 13 | import torch 14 | from torch.nn import functional as F 15 | 16 | from .base_dataset import BaseDataset 17 | 18 | class Cityscapes(BaseDataset): 19 | def __init__(self, 20 | root, 21 | list_path, 22 | num_samples=None, 23 | num_classes=19, 24 | multi_scale=True, 25 | flip=True, 26 | ignore_label=-1, 27 | base_size=2048, 28 | crop_size=(512, 1024), 29 | downsample_rate=1, 30 | scale_factor=16, 31 | mean=[0.485, 0.456, 0.406], 32 | std=[0.229, 0.224, 0.225]): 33 | 34 | super(Cityscapes, self).__init__(ignore_label, base_size, 35 | crop_size, downsample_rate, scale_factor, mean, std,) 36 | 37 | self.root = root 38 | self.list_path = list_path 39 | self.num_classes = num_classes 40 | 41 | self.multi_scale = multi_scale 42 | self.flip = flip 43 | 44 | self.img_list = [line.strip().split() for line in open(root+list_path)] 45 | 46 | self.files = self.read_files() 47 | if num_samples: 48 | self.files = self.files[:num_samples] 49 | 50 | self.label_mapping = {-1: ignore_label, 0: ignore_label, 51 | 1: ignore_label, 2: ignore_label, 52 | 3: ignore_label, 4: ignore_label, 53 | 5: ignore_label, 6: ignore_label, 54 | 7: 0, 8: 1, 9: ignore_label, 55 | 10: ignore_label, 11: 2, 12: 3, 56 | 13: 4, 14: ignore_label, 15: ignore_label, 57 | 16: ignore_label, 17: 5, 18: ignore_label, 58 | 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 59 | 25: 12, 26: 13, 27: 14, 28: 15, 60 | 29: ignore_label, 30: ignore_label, 61 | 31: 16, 32: 17, 33: 18} 62 | self.class_weights = torch.FloatTensor([0.8373, 0.918, 0.866, 1.0345, 63 | 1.0166, 0.9969, 0.9754, 1.0489, 64 | 0.8786, 1.0023, 0.9539, 0.9843, 65 | 1.1116, 0.9037, 1.0865, 1.0955, 66 | 1.0865, 1.1529, 1.0507]).cuda() 67 | 68 | def read_files(self): 69 | files = [] 70 | if 'test' in self.list_path: 71 | for item in self.img_list: 72 | image_path = item 73 | name = os.path.splitext(os.path.basename(image_path[0]))[0] 74 | files.append({ 75 | "img": image_path[0], 76 | "name": name, 77 | }) 78 | else: 79 | for item in self.img_list: 80 | image_path, label_path = item 81 | name = os.path.splitext(os.path.basename(label_path))[0] 82 | files.append({ 83 | "img": image_path, 84 | "label": label_path, 85 | "name": name, 86 | "weight": 1 87 | }) 88 | return files 89 | 90 | def convert_label(self, label, inverse=False): 91 | temp = label.copy() 92 | if inverse: 93 | for v, k in self.label_mapping.items(): 94 | label[temp == k] = v 95 | else: 96 | for k, v in self.label_mapping.items(): 97 | label[temp == k] = v 98 | return label 99 | 100 | def __getitem__(self, index): 101 | item = self.files[index] 102 | name = item["name"] 103 | # image = cv2.imread(os.path.join(self.root,'cityscapes',item["img"]), 104 | # cv2.IMREAD_COLOR) 105 | image = cv2.imread(os.path.join(self.root, item["img"]), 106 | cv2.IMREAD_COLOR) 107 | size = image.shape 108 | 109 | if 'test' in self.list_path: 110 | image = self.input_transform(image) 111 | image = image.transpose((2, 0, 1)) 112 | 113 | return image.copy(), np.array(size), name 114 | 115 | # label = cv2.imread(os.path.join(self.root,'cityscapes',item["label"]), 116 | # cv2.IMREAD_GRAYSCALE) 117 | label = cv2.imread(os.path.join(self.root, item["label"]), 118 | cv2.IMREAD_GRAYSCALE) 119 | label = self.convert_label(label) 120 | 121 | image, label = self.gen_sample(image, label, 122 | self.multi_scale, self.flip) 123 | 124 | return image.copy(), label.copy(), np.array(size), name 125 | 126 | def multi_scale_inference(self, config, model, image, scales=[1], flip=False): 127 | batch, _, ori_height, ori_width = image.size() 128 | assert batch == 1, "only supporting batchsize 1." 129 | image = image.numpy()[0].transpose((1,2,0)).copy() 130 | stride_h = np.int(self.crop_size[0] * 1.0) 131 | stride_w = np.int(self.crop_size[1] * 1.0) 132 | final_pred = torch.zeros([1, self.num_classes, 133 | ori_height,ori_width]).cuda() 134 | for scale in scales: 135 | new_img = self.multi_scale_aug(image=image, 136 | rand_scale=scale, 137 | rand_crop=False) 138 | height, width = new_img.shape[:-1] 139 | 140 | if scale <= 1.0: 141 | new_img = new_img.transpose((2, 0, 1)) 142 | new_img = np.expand_dims(new_img, axis=0) 143 | new_img = torch.from_numpy(new_img) 144 | preds = self.inference(config, model, new_img, flip) 145 | preds = preds[:, :, 0:height, 0:width] 146 | else: 147 | new_h, new_w = new_img.shape[:-1] 148 | rows = np.int(np.ceil(1.0 * (new_h - 149 | self.crop_size[0]) / stride_h)) + 1 150 | cols = np.int(np.ceil(1.0 * (new_w - 151 | self.crop_size[1]) / stride_w)) + 1 152 | preds = torch.zeros([1, self.num_classes, 153 | new_h,new_w]).cuda() 154 | count = torch.zeros([1,1, new_h, new_w]).cuda() 155 | 156 | for r in range(rows): 157 | for c in range(cols): 158 | h0 = r * stride_h 159 | w0 = c * stride_w 160 | h1 = min(h0 + self.crop_size[0], new_h) 161 | w1 = min(w0 + self.crop_size[1], new_w) 162 | h0 = max(int(h1 - self.crop_size[0]), 0) 163 | w0 = max(int(w1 - self.crop_size[1]), 0) 164 | crop_img = new_img[h0:h1, w0:w1, :] 165 | crop_img = crop_img.transpose((2, 0, 1)) 166 | crop_img = np.expand_dims(crop_img, axis=0) 167 | crop_img = torch.from_numpy(crop_img) 168 | pred = self.inference(config, model, crop_img, flip) 169 | preds[:,:,h0:h1,w0:w1] += pred[:,:, 0:h1-h0, 0:w1-w0] 170 | count[:,:,h0:h1,w0:w1] += 1 171 | preds = preds / count 172 | preds = preds[:,:,:height,:width] 173 | 174 | preds = F.interpolate( 175 | preds, (ori_height, ori_width), 176 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 177 | ) 178 | final_pred += preds 179 | return final_pred 180 | 181 | def get_palette(self, n): 182 | palette = [0] * (n * 3) 183 | for j in range(0, n): 184 | lab = j 185 | palette[j * 3 + 0] = 0 186 | palette[j * 3 + 1] = 0 187 | palette[j * 3 + 2] = 0 188 | i = 0 189 | while lab: 190 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) 191 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) 192 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) 193 | i += 1 194 | lab >>= 3 195 | return palette 196 | 197 | def save_pred(self, preds, sv_path, name): 198 | palette = self.get_palette(256) 199 | preds = np.asarray(np.argmax(preds.cpu(), axis=1), dtype=np.uint8) 200 | for i in range(preds.shape[0]): 201 | pred = self.convert_label(preds[i], inverse=True) 202 | save_img = Image.fromarray(pred) 203 | save_img.putpalette(palette) 204 | save_img.save(os.path.join(sv_path, name[i]+'.png')) 205 | 206 | 207 | 208 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/datasets/cocostuff.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | 12 | import torch 13 | from torch.nn import functional as F 14 | from PIL import Image 15 | 16 | from .base_dataset import BaseDataset 17 | 18 | 19 | class COCOStuff(BaseDataset): 20 | def __init__(self, 21 | root, 22 | list_path, 23 | num_samples=None, 24 | num_classes=171, 25 | multi_scale=True, 26 | flip=True, 27 | ignore_label=-1, 28 | base_size=520, 29 | crop_size=(520, 520), 30 | downsample_rate=1, 31 | scale_factor=11, 32 | mean=[0.485, 0.456, 0.406], 33 | std=[0.229, 0.224, 0.225]): 34 | 35 | super(COCOStuff, self).__init__(ignore_label, base_size, 36 | crop_size, downsample_rate, scale_factor, mean, std) 37 | 38 | self.root = root 39 | self.num_classes = num_classes 40 | self.list_path = list_path 41 | self.class_weights = None 42 | 43 | self.multi_scale = multi_scale 44 | self.flip = flip 45 | self.crop_size = crop_size 46 | self.img_list = [line.strip().split() for line in open(root+list_path)] 47 | 48 | self.files = self.read_files() 49 | if num_samples: 50 | self.files = self.files[:num_samples] 51 | self.mapping = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 52 | 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 53 | 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 54 | 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 55 | 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 56 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 57 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 58 | 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 59 | 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 60 | 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 61 | 177, 178, 179, 180, 181, 182] 62 | 63 | def read_files(self): 64 | files = [] 65 | for item in self.img_list: 66 | image_path, label_path = item 67 | name = os.path.splitext(os.path.basename(label_path))[0] 68 | sample = { 69 | 'img': image_path, 70 | 'label': label_path, 71 | 'name': name 72 | } 73 | files.append(sample) 74 | return files 75 | 76 | def encode_label(self, labelmap): 77 | ret = np.ones_like(labelmap) * 255 78 | for idx, label in enumerate(self.mapping): 79 | ret[labelmap == label] = idx 80 | 81 | return ret 82 | 83 | def resize_image(self, image, label, size): 84 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR) 85 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST) 86 | return image, label 87 | 88 | def __getitem__(self, index): 89 | item = self.files[index] 90 | name = item["name"] 91 | image_path = os.path.join(self.root, item['img']) 92 | label_path = os.path.join(self.root, item['label']) 93 | image = cv2.imread( 94 | image_path, 95 | cv2.IMREAD_COLOR 96 | ) 97 | label = np.array( 98 | Image.open(label_path).convert('P') 99 | ) 100 | label = self.encode_label(label) 101 | label = self.reduce_zero_label(label) 102 | size = label.shape 103 | 104 | if 'testval' in self.list_path: 105 | image, border_padding = self.resize_short_length( 106 | image, 107 | short_length=self.base_size, 108 | fit_stride=8, 109 | return_padding=True 110 | ) 111 | image = self.input_transform(image) 112 | image = image.transpose((2, 0, 1)) 113 | 114 | return image.copy(), label.copy(), np.array(size), name, border_padding 115 | 116 | if 'val' in self.list_path: 117 | image, label = self.resize_short_length( 118 | image, 119 | label=label, 120 | short_length=self.base_size, 121 | fit_stride=8 122 | ) 123 | image, label = self.rand_crop(image, label) 124 | image = self.input_transform(image) 125 | image = image.transpose((2, 0, 1)) 126 | 127 | return image.copy(), label.copy(), np.array(size), name 128 | 129 | image, label = self.resize_short_length(image, label, short_length=self.base_size) 130 | image, label = self.gen_sample(image, label, self.multi_scale, self.flip) 131 | 132 | return image.copy(), label.copy(), np.array(size), name -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/datasets/lip.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | 12 | import torch 13 | from torch.nn import functional as F 14 | from PIL import Image 15 | 16 | from .base_dataset import BaseDataset 17 | 18 | 19 | class LIP(BaseDataset): 20 | def __init__(self, 21 | root, 22 | list_path, 23 | num_samples=None, 24 | num_classes=20, 25 | multi_scale=True, 26 | flip=True, 27 | ignore_label=-1, 28 | base_size=473, 29 | crop_size=(473, 473), 30 | downsample_rate=1, 31 | scale_factor=11, 32 | mean=[0.485, 0.456, 0.406], 33 | std=[0.229, 0.224, 0.225]): 34 | 35 | super(LIP, self).__init__(ignore_label, base_size, 36 | crop_size, downsample_rate, scale_factor, mean, std) 37 | 38 | self.root = root 39 | self.num_classes = num_classes 40 | self.list_path = list_path 41 | self.class_weights = None 42 | 43 | self.multi_scale = multi_scale 44 | self.flip = flip 45 | self.img_list = [line.strip().split() for line in open(root+list_path)] 46 | 47 | self.files = self.read_files() 48 | if num_samples: 49 | self.files = self.files[:num_samples] 50 | 51 | def read_files(self): 52 | files = [] 53 | for item in self.img_list: 54 | if 'train' in self.list_path: 55 | image_path, label_path, _ = item 56 | name = os.path.splitext(os.path.basename(label_path))[0] 57 | sample = {"img": image_path, 58 | "label": label_path, 59 | "name": name, } 60 | elif 'val' in self.list_path: 61 | image_path, label_path = item 62 | name = os.path.splitext(os.path.basename(label_path))[0] 63 | sample = {"img": image_path, 64 | "label": label_path, 65 | "name": name, } 66 | else: 67 | raise NotImplementedError('Unknown subset.') 68 | files.append(sample) 69 | return files 70 | 71 | def resize_image(self, image, label, size): 72 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR) 73 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST) 74 | return image, label 75 | 76 | def __getitem__(self, index): 77 | item = self.files[index] 78 | name = item["name"] 79 | image_path = os.path.join(self.root, item['img']) 80 | label_path = os.path.join(self.root, item['label']) 81 | image = cv2.imread( 82 | image_path, 83 | cv2.IMREAD_COLOR 84 | ) 85 | label = np.array( 86 | Image.open(label_path).convert('P') 87 | ) 88 | 89 | size = label.shape 90 | if 'testval' in self.list_path: 91 | image = cv2.resize(image, self.crop_size, 92 | interpolation=cv2.INTER_LINEAR) 93 | image = self.input_transform(image) 94 | image = image.transpose((2, 0, 1)) 95 | 96 | return image.copy(), label.copy(), np.array(size), name 97 | 98 | if self.flip: 99 | flip = np.random.choice(2) * 2 - 1 100 | image = image[:, ::flip, :] 101 | label = label[:, ::flip] 102 | 103 | if flip == -1: 104 | right_idx = [15, 17, 19] 105 | left_idx = [14, 16, 18] 106 | for i in range(0, 3): 107 | right_pos = np.where(label == right_idx[i]) 108 | left_pos = np.where(label == left_idx[i]) 109 | label[right_pos[0], right_pos[1]] = left_idx[i] 110 | label[left_pos[0], left_pos[1]] = right_idx[i] 111 | 112 | image, label = self.resize_image(image, label, self.crop_size) 113 | image, label = self.gen_sample(image, label, 114 | self.multi_scale, False) 115 | 116 | return image.copy(), label.copy(), np.array(size), name 117 | 118 | def inference(self, config, model, image, flip): 119 | size = image.size() 120 | pred = model(image) 121 | if config.MODEL.NUM_OUTPUTS > 1: 122 | pred = pred[config.TEST.OUTPUT_INDEX] 123 | 124 | pred = F.interpolate( 125 | input=pred, size=size[-2:], 126 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 127 | ) 128 | 129 | if flip: 130 | flip_img = image.numpy()[:, :, :, ::-1] 131 | flip_output = model(torch.from_numpy(flip_img.copy())) 132 | 133 | if config.MODEL.NUM_OUTPUTS > 1: 134 | flip_output = flip_output[config.TEST.OUTPUT_INDEX] 135 | 136 | flip_output = F.interpolate( 137 | input=flip_output, size=size[-2:], 138 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 139 | ) 140 | 141 | flip_output = flip_output.cpu() 142 | flip_pred = flip_output.cpu().numpy().copy() 143 | flip_pred[:, 14, :, :] = flip_output[:, 15, :, :] 144 | flip_pred[:, 15, :, :] = flip_output[:, 14, :, :] 145 | flip_pred[:, 16, :, :] = flip_output[:, 17, :, :] 146 | flip_pred[:, 17, :, :] = flip_output[:, 16, :, :] 147 | flip_pred[:, 18, :, :] = flip_output[:, 19, :, :] 148 | flip_pred[:, 19, :, :] = flip_output[:, 18, :, :] 149 | flip_pred = torch.from_numpy( 150 | flip_pred[:, :, :, ::-1].copy()).cuda() 151 | pred += flip_pred 152 | pred = pred * 0.5 153 | return pred.exp() 154 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/datasets/pascal_ctx.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # Referring to the implementation in 6 | # https://github.com/zhanghang1989/PyTorch-Encoding 7 | # ------------------------------------------------------------------------------ 8 | 9 | import os 10 | 11 | import cv2 12 | import numpy as np 13 | 14 | import torch 15 | from torch.nn import functional as F 16 | from PIL import Image 17 | 18 | from .base_dataset import BaseDataset 19 | 20 | class PASCALContext(BaseDataset): 21 | def __init__(self, 22 | root, 23 | list_path, 24 | num_samples=None, 25 | num_classes=59, 26 | multi_scale=True, 27 | flip=True, 28 | ignore_label=-1, 29 | base_size=520, 30 | crop_size=(480, 480), 31 | downsample_rate=1, 32 | scale_factor=16, 33 | mean=[0.485, 0.456, 0.406], 34 | std=[0.229, 0.224, 0.225]): 35 | 36 | super(PASCALContext, self).__init__(ignore_label, base_size, 37 | crop_size, downsample_rate, scale_factor, mean, std) 38 | 39 | self.root = root 40 | self.num_classes = num_classes 41 | self.list_path = list_path 42 | self.class_weights = None 43 | 44 | self.multi_scale = multi_scale 45 | self.flip = flip 46 | self.crop_size = crop_size 47 | self.img_list = [line.strip().split() for line in open(root+list_path)] 48 | 49 | self.files = self.read_files() 50 | if num_samples: 51 | self.files = self.files[:num_samples] 52 | 53 | def read_files(self): 54 | files = [] 55 | for item in self.img_list: 56 | image_path, label_path = item 57 | name = os.path.splitext(os.path.basename(label_path))[0] 58 | sample = { 59 | 'img': image_path, 60 | 'label': label_path, 61 | 'name': name 62 | } 63 | files.append(sample) 64 | return files 65 | 66 | def resize_image(self, image, label, size): 67 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR) 68 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST) 69 | return image, label 70 | 71 | def __getitem__(self, index): 72 | item = self.files[index] 73 | name = item["name"] 74 | image_path = os.path.join(self.root, item['img']) 75 | label_path = os.path.join(self.root, item['label']) 76 | image = cv2.imread( 77 | image_path, 78 | cv2.IMREAD_COLOR 79 | ) 80 | label = np.array( 81 | Image.open(label_path).convert('P') 82 | ) 83 | if self.num_classes == 59: 84 | label = self.reduce_zero_label(label) 85 | size = label.shape 86 | 87 | if 'testval' in self.list_path: 88 | image, border_padding = self.resize_short_length( 89 | image, 90 | short_length=self.base_size, 91 | fit_stride=8, 92 | return_padding=True 93 | ) 94 | image = self.input_transform(image) 95 | image = image.transpose((2, 0, 1)) 96 | 97 | return image.copy(), label.copy(), np.array(size), name, border_padding 98 | 99 | if 'val' in self.list_path: 100 | image, label = self.resize_short_length( 101 | image, 102 | label=label, 103 | short_length=self.base_size, 104 | fit_stride=8 105 | ) 106 | image, label = self.rand_crop(image, label) 107 | image = self.input_transform(image) 108 | image = image.transpose((2, 0, 1)) 109 | 110 | return image.copy(), label.copy(), np.array(size), name 111 | 112 | image, label = self.resize_short_length(image, label, short_length=self.base_size) 113 | image, label = self.gen_sample(image, label, self.multi_scale, self.flip) 114 | 115 | return image.copy(), label.copy(), np.array(size), name -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import models.seg_hrnet 12 | import models.seg_hrnet_ocr -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/bn_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import functools 3 | 4 | if torch.__version__.startswith('0'): 5 | from .sync_bn.inplace_abn.bn import InPlaceABNSync 6 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 7 | BatchNorm2d_class = InPlaceABNSync 8 | relu_inplace = False 9 | else: 10 | BatchNorm2d_class = BatchNorm2d = torch.nn.SyncBatchNorm 11 | relu_inplace = True -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/sync_bn/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | BSD 3-Clause License 3 | 4 | Copyright (c) 2017, mapillary 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/sync_bn/__init__.py: -------------------------------------------------------------------------------- 1 | from .inplace_abn import bn -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/sync_bn/inplace_abn/__init__.py: -------------------------------------------------------------------------------- 1 | from .bn import ABN, InPlaceABN, InPlaceABNSync 2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE 3 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/sync_bn/inplace_abn/bn.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as functional 5 | 6 | try: 7 | from queue import Queue 8 | except ImportError: 9 | from Queue import Queue 10 | 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(BASE_DIR) 13 | sys.path.append(os.path.join(BASE_DIR, '../src')) 14 | from functions import * 15 | 16 | 17 | class ABN(nn.Module): 18 | """Activated Batch Normalization 19 | 20 | This gathers a `BatchNorm2d` and an activation function in a single module 21 | """ 22 | 23 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 24 | """Creates an Activated Batch Normalization module 25 | 26 | Parameters 27 | ---------- 28 | num_features : int 29 | Number of feature channels in the input and output. 30 | eps : float 31 | Small constant to prevent numerical issues. 32 | momentum : float 33 | Momentum factor applied to compute running statistics as. 34 | affine : bool 35 | If `True` apply learned scale and shift transformation after normalization. 36 | activation : str 37 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 38 | slope : float 39 | Negative slope for the `leaky_relu` activation. 40 | """ 41 | super(ABN, self).__init__() 42 | self.num_features = num_features 43 | self.affine = affine 44 | self.eps = eps 45 | self.momentum = momentum 46 | self.activation = activation 47 | self.slope = slope 48 | if self.affine: 49 | self.weight = nn.Parameter(torch.ones(num_features)) 50 | self.bias = nn.Parameter(torch.zeros(num_features)) 51 | else: 52 | self.register_parameter('weight', None) 53 | self.register_parameter('bias', None) 54 | self.register_buffer('running_mean', torch.zeros(num_features)) 55 | self.register_buffer('running_var', torch.ones(num_features)) 56 | self.reset_parameters() 57 | 58 | def reset_parameters(self): 59 | nn.init.constant_(self.running_mean, 0) 60 | nn.init.constant_(self.running_var, 1) 61 | if self.affine: 62 | nn.init.constant_(self.weight, 1) 63 | nn.init.constant_(self.bias, 0) 64 | 65 | def forward(self, x): 66 | x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, 67 | self.training, self.momentum, self.eps) 68 | 69 | if self.activation == ACT_RELU: 70 | return functional.relu(x, inplace=True) 71 | elif self.activation == ACT_LEAKY_RELU: 72 | return functional.leaky_relu(x, negative_slope=self.slope, inplace=True) 73 | elif self.activation == ACT_ELU: 74 | return functional.elu(x, inplace=True) 75 | else: 76 | return x 77 | 78 | def __repr__(self): 79 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 80 | ' affine={affine}, activation={activation}' 81 | if self.activation == "leaky_relu": 82 | rep += ', slope={slope})' 83 | else: 84 | rep += ')' 85 | return rep.format(name=self.__class__.__name__, **self.__dict__) 86 | 87 | 88 | class InPlaceABN(ABN): 89 | """InPlace Activated Batch Normalization""" 90 | 91 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 92 | """Creates an InPlace Activated Batch Normalization module 93 | 94 | Parameters 95 | ---------- 96 | num_features : int 97 | Number of feature channels in the input and output. 98 | eps : float 99 | Small constant to prevent numerical issues. 100 | momentum : float 101 | Momentum factor applied to compute running statistics as. 102 | affine : bool 103 | If `True` apply learned scale and shift transformation after normalization. 104 | activation : str 105 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 106 | slope : float 107 | Negative slope for the `leaky_relu` activation. 108 | """ 109 | super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope) 110 | 111 | def forward(self, x): 112 | return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var, 113 | self.training, self.momentum, self.eps, self.activation, self.slope) 114 | 115 | 116 | class InPlaceABNSync(ABN): 117 | """InPlace Activated Batch Normalization with cross-GPU synchronization 118 | 119 | This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DataParallel`. 120 | """ 121 | 122 | def __init__(self, num_features, devices=None, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", 123 | slope=0.01): 124 | """Creates a synchronized, InPlace Activated Batch Normalization module 125 | 126 | Parameters 127 | ---------- 128 | num_features : int 129 | Number of feature channels in the input and output. 130 | devices : list of int or None 131 | IDs of the GPUs that will run the replicas of this module. 132 | eps : float 133 | Small constant to prevent numerical issues. 134 | momentum : float 135 | Momentum factor applied to compute running statistics as. 136 | affine : bool 137 | If `True` apply learned scale and shift transformation after normalization. 138 | activation : str 139 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 140 | slope : float 141 | Negative slope for the `leaky_relu` activation. 142 | """ 143 | super(InPlaceABNSync, self).__init__(num_features, eps, momentum, affine, activation, slope) 144 | self.devices = devices if devices else list(range(torch.cuda.device_count())) 145 | 146 | # Initialize queues 147 | self.worker_ids = self.devices[1:] 148 | self.master_queue = Queue(len(self.worker_ids)) 149 | self.worker_queues = [Queue(1) for _ in self.worker_ids] 150 | 151 | def forward(self, x): 152 | if x.get_device() == self.devices[0]: 153 | # Master mode 154 | extra = { 155 | "is_master": True, 156 | "master_queue": self.master_queue, 157 | "worker_queues": self.worker_queues, 158 | "worker_ids": self.worker_ids 159 | } 160 | else: 161 | # Worker mode 162 | extra = { 163 | "is_master": False, 164 | "master_queue": self.master_queue, 165 | "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())] 166 | } 167 | 168 | return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var, 169 | extra, self.training, self.momentum, self.eps, self.activation, self.slope) 170 | 171 | def __repr__(self): 172 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 173 | ' affine={affine}, devices={devices}, activation={activation}' 174 | if self.activation == "leaky_relu": 175 | rep += ', slope={slope})' 176 | else: 177 | rep += ')' 178 | return rep.format(name=self.__class__.__name__, **self.__dict__) 179 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/sync_bn/inplace_abn/src/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* 6 | * General settings 7 | */ 8 | const int WARP_SIZE = 32; 9 | const int MAX_BLOCK_SIZE = 512; 10 | 11 | template 12 | struct Pair { 13 | T v1, v2; 14 | __device__ Pair() {} 15 | __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {} 16 | __device__ Pair(T v) : v1(v), v2(v) {} 17 | __device__ Pair(int v) : v1(v), v2(v) {} 18 | __device__ Pair &operator+=(const Pair &a) { 19 | v1 += a.v1; 20 | v2 += a.v2; 21 | return *this; 22 | } 23 | }; 24 | 25 | /* 26 | * Utility functions 27 | */ 28 | template 29 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, 30 | unsigned int mask = 0xffffffff) { 31 | #if CUDART_VERSION >= 9000 32 | return __shfl_xor_sync(mask, value, laneMask, width); 33 | #else 34 | return __shfl_xor(value, laneMask, width); 35 | #endif 36 | } 37 | 38 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); } 39 | 40 | static int getNumThreads(int nElem) { 41 | int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE}; 42 | for (int i = 0; i != 5; ++i) { 43 | if (nElem <= threadSizes[i]) { 44 | return threadSizes[i]; 45 | } 46 | } 47 | return MAX_BLOCK_SIZE; 48 | } 49 | 50 | template 51 | static __device__ __forceinline__ T warpSum(T val) { 52 | #if __CUDA_ARCH__ >= 300 53 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) { 54 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); 55 | } 56 | #else 57 | __shared__ T values[MAX_BLOCK_SIZE]; 58 | values[threadIdx.x] = val; 59 | __threadfence_block(); 60 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; 61 | for (int i = 1; i < WARP_SIZE; i++) { 62 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; 63 | } 64 | #endif 65 | return val; 66 | } 67 | 68 | template 69 | static __device__ __forceinline__ Pair warpSum(Pair value) { 70 | value.v1 = warpSum(value.v1); 71 | value.v2 = warpSum(value.v2); 72 | return value; 73 | } 74 | 75 | template 76 | __device__ T reduce(Op op, int plane, int N, int C, int S) { 77 | T sum = (T)0; 78 | for (int batch = 0; batch < N; ++batch) { 79 | for (int x = threadIdx.x; x < S; x += blockDim.x) { 80 | sum += op(batch, plane, x); 81 | } 82 | } 83 | 84 | // sum over NumThreads within a warp 85 | sum = warpSum(sum); 86 | 87 | // 'transpose', and reduce within warp again 88 | __shared__ T shared[32]; 89 | __syncthreads(); 90 | if (threadIdx.x % WARP_SIZE == 0) { 91 | shared[threadIdx.x / WARP_SIZE] = sum; 92 | } 93 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 94 | // zero out the other entries in shared 95 | shared[threadIdx.x] = (T)0; 96 | } 97 | __syncthreads(); 98 | if (threadIdx.x / WARP_SIZE == 0) { 99 | sum = warpSum(shared[threadIdx.x]); 100 | if (threadIdx.x == 0) { 101 | shared[0] = sum; 102 | } 103 | } 104 | __syncthreads(); 105 | 106 | // Everyone picks it up, should be broadcast into the whole gradInput 107 | return shared[0]; 108 | } -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/sync_bn/inplace_abn/src/inplace_abn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | std::vector mean_var(at::Tensor x) { 8 | if (x.is_cuda()) { 9 | return mean_var_cuda(x); 10 | } else { 11 | return mean_var_cpu(x); 12 | } 13 | } 14 | 15 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 16 | bool affine, float eps) { 17 | if (x.is_cuda()) { 18 | return forward_cuda(x, mean, var, weight, bias, affine, eps); 19 | } else { 20 | return forward_cpu(x, mean, var, weight, bias, affine, eps); 21 | } 22 | } 23 | 24 | std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 25 | bool affine, float eps) { 26 | if (z.is_cuda()) { 27 | return edz_eydz_cuda(z, dz, weight, bias, affine, eps); 28 | } else { 29 | return edz_eydz_cpu(z, dz, weight, bias, affine, eps); 30 | } 31 | } 32 | 33 | std::vector backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 34 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 35 | if (z.is_cuda()) { 36 | return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps); 37 | } else { 38 | return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps); 39 | } 40 | } 41 | 42 | void leaky_relu_forward(at::Tensor z, float slope) { 43 | at::leaky_relu_(z, slope); 44 | } 45 | 46 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) { 47 | if (z.is_cuda()) { 48 | return leaky_relu_backward_cuda(z, dz, slope); 49 | } else { 50 | return leaky_relu_backward_cpu(z, dz, slope); 51 | } 52 | } 53 | 54 | void elu_forward(at::Tensor z) { 55 | at::elu_(z); 56 | } 57 | 58 | void elu_backward(at::Tensor z, at::Tensor dz) { 59 | if (z.is_cuda()) { 60 | return elu_backward_cuda(z, dz); 61 | } else { 62 | return elu_backward_cpu(z, dz); 63 | } 64 | } 65 | 66 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 67 | m.def("mean_var", &mean_var, "Mean and variance computation"); 68 | m.def("forward", &forward, "In-place forward computation"); 69 | m.def("edz_eydz", &edz_eydz, "First part of backward computation"); 70 | m.def("backward", &backward, "Second part of backward computation"); 71 | m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation"); 72 | m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion"); 73 | m.def("elu_forward", &elu_forward, "Elu forward computation"); 74 | m.def("elu_backward", &elu_backward, "Elu backward computation and inversion"); 75 | } -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/sync_bn/inplace_abn/src/inplace_abn.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | std::vector mean_var_cpu(at::Tensor x); 8 | std::vector mean_var_cuda(at::Tensor x); 9 | 10 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 11 | bool affine, float eps); 12 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 13 | bool affine, float eps); 14 | 15 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 16 | bool affine, float eps); 17 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 18 | bool affine, float eps); 19 | 20 | std::vector backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 21 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 22 | std::vector backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 23 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 24 | 25 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope); 26 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope); 27 | 28 | void elu_backward_cpu(at::Tensor z, at::Tensor dz); 29 | void elu_backward_cuda(at::Tensor z, at::Tensor dz); -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/models/sync_bn/inplace_abn/src/inplace_abn_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | at::Tensor reduce_sum(at::Tensor x) { 8 | if (x.ndimension() == 2) { 9 | return x.sum(0); 10 | } else { 11 | auto x_view = x.view({x.size(0), x.size(1), -1}); 12 | return x_view.sum(-1).sum(0); 13 | } 14 | } 15 | 16 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { 17 | if (x.ndimension() == 2) { 18 | return v; 19 | } else { 20 | std::vector broadcast_size = {1, -1}; 21 | for (int64_t i = 2; i < x.ndimension(); ++i) 22 | broadcast_size.push_back(1); 23 | 24 | return v.view(broadcast_size); 25 | } 26 | } 27 | 28 | int64_t count(at::Tensor x) { 29 | int64_t count = x.size(0); 30 | for (int64_t i = 2; i < x.ndimension(); ++i) 31 | count *= x.size(i); 32 | 33 | return count; 34 | } 35 | 36 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) { 37 | if (affine) { 38 | return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z); 39 | } else { 40 | return z; 41 | } 42 | } 43 | 44 | std::vector mean_var_cpu(at::Tensor x) { 45 | auto num = count(x); 46 | auto mean = reduce_sum(x) / num; 47 | auto diff = x - broadcast_to(mean, x); 48 | auto var = reduce_sum(diff.pow(2)) / num; 49 | 50 | return {mean, var}; 51 | } 52 | 53 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 54 | bool affine, float eps) { 55 | auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var); 56 | auto mul = at::rsqrt(var + eps) * gamma; 57 | 58 | x.sub_(broadcast_to(mean, x)); 59 | x.mul_(broadcast_to(mul, x)); 60 | if (affine) x.add_(broadcast_to(bias, x)); 61 | 62 | return x; 63 | } 64 | 65 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 66 | bool affine, float eps) { 67 | auto edz = reduce_sum(dz); 68 | auto y = invert_affine(z, weight, bias, affine, eps); 69 | auto eydz = reduce_sum(y * dz); 70 | 71 | return {edz, eydz}; 72 | } 73 | 74 | std::vector backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 75 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 76 | auto y = invert_affine(z, weight, bias, affine, eps); 77 | auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps); 78 | 79 | auto num = count(z); 80 | auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz); 81 | 82 | auto dweight = at::empty(z.type(), {0}); 83 | auto dbias = at::empty(z.type(), {0}); 84 | if (affine) { 85 | dweight = eydz * at::sign(weight); 86 | dbias = edz; 87 | } 88 | 89 | return {dx, dweight, dbias}; 90 | } 91 | 92 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) { 93 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] { 94 | int64_t count = z.numel(); 95 | auto *_z = z.data(); 96 | auto *_dz = dz.data(); 97 | 98 | for (int64_t i = 0; i < count; ++i) { 99 | if (_z[i] < 0) { 100 | _z[i] *= 1 / slope; 101 | _dz[i] *= slope; 102 | } 103 | } 104 | })); 105 | } 106 | 107 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) { 108 | AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] { 109 | int64_t count = z.numel(); 110 | auto *_z = z.data(); 111 | auto *_dz = dz.data(); 112 | 113 | for (int64_t i = 0; i < count; ++i) { 114 | if (_z[i] < 0) { 115 | _z[i] = log1p(_z[i]); 116 | _dz[i] *= (_z[i] + 1.f); 117 | } 118 | } 119 | })); 120 | } -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/AustNet-Inharmonious-Region-Localization/a02b53ae85b2991829bad84173d5334d2774dd02/HRNet-Semantic-Segmentation-HRNet-OCR/lib/utils/__init__.py -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Jingyi Xie (hsfzxjy@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import torch 8 | import torch.distributed as torch_dist 9 | 10 | def is_distributed(): 11 | return torch_dist.is_initialized() 12 | 13 | def get_world_size(): 14 | if not torch_dist.is_initialized(): 15 | return 1 16 | return torch_dist.get_world_size() 17 | 18 | def get_rank(): 19 | if not torch_dist.is_initialized(): 20 | return 0 21 | return torch_dist.get_rank() -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/utils/modelsummary.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | import logging 14 | from collections import namedtuple 15 | 16 | import torch 17 | import torch.nn as nn 18 | 19 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False): 20 | """ 21 | :param model: 22 | :param input_tensors: 23 | :param item_length: 24 | :return: 25 | """ 26 | 27 | summary = [] 28 | 29 | ModuleDetails = namedtuple( 30 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"]) 31 | hooks = [] 32 | layer_instances = {} 33 | 34 | def add_hooks(module): 35 | 36 | def hook(module, input, output): 37 | class_name = str(module.__class__.__name__) 38 | 39 | instance_index = 1 40 | if class_name not in layer_instances: 41 | layer_instances[class_name] = instance_index 42 | else: 43 | instance_index = layer_instances[class_name] + 1 44 | layer_instances[class_name] = instance_index 45 | 46 | layer_name = class_name + "_" + str(instance_index) 47 | 48 | params = 0 49 | 50 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \ 51 | class_name.find("Linear") != -1: 52 | for param_ in module.parameters(): 53 | params += param_.view(-1).size(0) 54 | 55 | flops = "Not Available" 56 | if class_name.find("Conv") != -1 and hasattr(module, "weight"): 57 | flops = ( 58 | torch.prod( 59 | torch.LongTensor(list(module.weight.data.size()))) * 60 | torch.prod( 61 | torch.LongTensor(list(output.size())[2:]))).item() 62 | elif isinstance(module, nn.Linear): 63 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \ 64 | * input[0].size(1)).item() 65 | 66 | if isinstance(input[0], list): 67 | input = input[0] 68 | if isinstance(output, list): 69 | output = output[0] 70 | 71 | summary.append( 72 | ModuleDetails( 73 | name=layer_name, 74 | input_size=list(input[0].size()), 75 | output_size=list(output.size()), 76 | num_parameters=params, 77 | multiply_adds=flops) 78 | ) 79 | 80 | if not isinstance(module, nn.ModuleList) \ 81 | and not isinstance(module, nn.Sequential) \ 82 | and module != model: 83 | hooks.append(module.register_forward_hook(hook)) 84 | 85 | model.eval() 86 | model.apply(add_hooks) 87 | 88 | space_len = item_length 89 | 90 | model(*input_tensors) 91 | for hook in hooks: 92 | hook.remove() 93 | 94 | details = '' 95 | if verbose: 96 | details = "Model Summary" + \ 97 | os.linesep + \ 98 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format( 99 | ' ' * (space_len - len("Name")), 100 | ' ' * (space_len - len("Input Size")), 101 | ' ' * (space_len - len("Output Size")), 102 | ' ' * (space_len - len("Parameters")), 103 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \ 104 | + os.linesep + '-' * space_len * 5 + os.linesep 105 | 106 | params_sum = 0 107 | flops_sum = 0 108 | for layer in summary: 109 | params_sum += layer.num_parameters 110 | if layer.multiply_adds != "Not Available": 111 | flops_sum += layer.multiply_adds 112 | if verbose: 113 | details += "{}{}{}{}{}{}{}{}{}{}".format( 114 | layer.name, 115 | ' ' * (space_len - len(layer.name)), 116 | layer.input_size, 117 | ' ' * (space_len - len(str(layer.input_size))), 118 | layer.output_size, 119 | ' ' * (space_len - len(str(layer.output_size))), 120 | layer.num_parameters, 121 | ' ' * (space_len - len(str(layer.num_parameters))), 122 | layer.multiply_adds, 123 | ' ' * (space_len - len(str(layer.multiply_adds)))) \ 124 | + os.linesep + '-' * space_len * 5 + os.linesep 125 | 126 | details += os.linesep \ 127 | + "Total Parameters: {:,}".format(params_sum) \ 128 | + os.linesep + '-' * space_len * 5 + os.linesep 129 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \ 130 | + os.linesep + '-' * space_len * 5 + os.linesep 131 | details += "Number of Layers" + os.linesep 132 | for layer in layer_instances: 133 | details += "{} : {} layers ".format(layer, layer_instances[layer]) 134 | 135 | return details -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | import time 14 | from pathlib import Path 15 | 16 | import numpy as np 17 | 18 | import torch 19 | import torch.nn as nn 20 | 21 | class FullModel(nn.Module): 22 | """ 23 | Distribute the loss on multi-gpu to reduce 24 | the memory cost in the main gpu. 25 | You can check the following discussion. 26 | https://discuss.pytorch.org/t/dataparallel-imbalanced-memory-usage/22551/21 27 | """ 28 | def __init__(self, model, loss): 29 | super(FullModel, self).__init__() 30 | self.model = model 31 | self.loss = loss 32 | 33 | def forward(self, inputs, labels, *args, **kwargs): 34 | outputs = self.model(inputs, *args, **kwargs) 35 | loss = self.loss(outputs, labels) 36 | return torch.unsqueeze(loss,0), outputs 37 | 38 | class AverageMeter(object): 39 | """Computes and stores the average and current value""" 40 | 41 | def __init__(self): 42 | self.initialized = False 43 | self.val = None 44 | self.avg = None 45 | self.sum = None 46 | self.count = None 47 | 48 | def initialize(self, val, weight): 49 | self.val = val 50 | self.avg = val 51 | self.sum = val * weight 52 | self.count = weight 53 | self.initialized = True 54 | 55 | def update(self, val, weight=1): 56 | if not self.initialized: 57 | self.initialize(val, weight) 58 | else: 59 | self.add(val, weight) 60 | 61 | def add(self, val, weight): 62 | self.val = val 63 | self.sum += val * weight 64 | self.count += weight 65 | self.avg = self.sum / self.count 66 | 67 | def value(self): 68 | return self.val 69 | 70 | def average(self): 71 | return self.avg 72 | 73 | def create_logger(cfg, cfg_name, phase='train'): 74 | root_output_dir = Path(cfg.OUTPUT_DIR) 75 | # set up logger 76 | if not root_output_dir.exists(): 77 | print('=> creating {}'.format(root_output_dir)) 78 | root_output_dir.mkdir() 79 | 80 | dataset = cfg.DATASET.DATASET 81 | model = cfg.MODEL.NAME 82 | cfg_name = os.path.basename(cfg_name).split('.')[0] 83 | 84 | final_output_dir = root_output_dir / dataset / cfg_name 85 | 86 | print('=> creating {}'.format(final_output_dir)) 87 | final_output_dir.mkdir(parents=True, exist_ok=True) 88 | 89 | time_str = time.strftime('%Y-%m-%d-%H-%M') 90 | log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase) 91 | final_log_file = final_output_dir / log_file 92 | head = '%(asctime)-15s %(message)s' 93 | logging.basicConfig(filename=str(final_log_file), 94 | format=head) 95 | logger = logging.getLogger() 96 | logger.setLevel(logging.INFO) 97 | console = logging.StreamHandler() 98 | logging.getLogger('').addHandler(console) 99 | 100 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ 101 | (cfg_name + '_' + time_str) 102 | print('=> creating {}'.format(tensorboard_log_dir)) 103 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 104 | 105 | return logger, str(final_output_dir), str(tensorboard_log_dir) 106 | 107 | def get_confusion_matrix(label, pred, size, num_class, ignore=-1): 108 | """ 109 | Calcute the confusion matrix by given label and pred 110 | """ 111 | output = pred.cpu().numpy().transpose(0, 2, 3, 1) 112 | seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8) 113 | seg_gt = np.asarray( 114 | label.cpu().numpy()[:, :size[-2], :size[-1]], dtype=np.int) 115 | 116 | ignore_index = seg_gt != ignore 117 | seg_gt = seg_gt[ignore_index] 118 | seg_pred = seg_pred[ignore_index] 119 | 120 | index = (seg_gt * num_class + seg_pred).astype('int32') 121 | label_count = np.bincount(index) 122 | confusion_matrix = np.zeros((num_class, num_class)) 123 | 124 | for i_label in range(num_class): 125 | for i_pred in range(num_class): 126 | cur_index = i_label * num_class + i_pred 127 | if cur_index < len(label_count): 128 | confusion_matrix[i_label, 129 | i_pred] = label_count[cur_index] 130 | return confusion_matrix 131 | 132 | def adjust_learning_rate(optimizer, base_lr, max_iters, 133 | cur_iters, power=0.9, nbb_mult=10): 134 | lr = base_lr*((1-float(cur_iters)/max_iters)**(power)) 135 | optimizer.param_groups[0]['lr'] = lr 136 | if len(optimizer.param_groups) == 2: 137 | optimizer.param_groups[1]['lr'] = lr * nbb_mult 138 | return lr -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/requirements.txt: -------------------------------------------------------------------------------- 1 | EasyDict==1.7 2 | shapely 3 | Cython 4 | scipy 5 | pandas 6 | pyyaml 7 | json_tricks 8 | scikit-image 9 | yacs>=0.1.5 10 | tensorboardX>=1.6 11 | tqdm 12 | ninja 13 | 14 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/run_dist.sh: -------------------------------------------------------------------------------- 1 | PYTHON="/opt/conda/bin/python" 2 | GPU_NUM=$1 3 | CONFIG=$2 4 | 5 | $PYTHON -m pip install -r requirements.txt 6 | 7 | $PYTHON -m torch.distributed.launch \ 8 | --nproc_per_node=$GPU_NUM \ 9 | tools/train.py \ 10 | --cfg experiments/$CONFIG.yaml \ 11 | 2>&1 | tee local_log.txt 12 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/run_local.sh: -------------------------------------------------------------------------------- 1 | PYTHON="/data/anaconda/envs/pytorch1.7.1/bin/python" 2 | GPU_NUM=4 3 | CONFIG="seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle" 4 | 5 | $PYTHON -m pip install -r requirements.txt 6 | 7 | $PYTHON -m torch.distributed.launch \ 8 | --nproc_per_node=$GPU_NUM \ 9 | tools/train.py \ 10 | --cfg experiments/pascal_ctx/$CONFIG.yaml \ 11 | 2>&1 | tee local_log.txt 12 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os.path as osp 12 | import sys 13 | 14 | 15 | def add_path(path): 16 | if path not in sys.path: 17 | sys.path.insert(0, path) 18 | 19 | this_dir = osp.dirname(__file__) 20 | 21 | lib_path = osp.join(this_dir, '..', 'lib') 22 | add_path(lib_path) 23 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/tools/inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import pprint 4 | import shutil 5 | import tqdm 6 | import sys 7 | import cv2 8 | from numpy.core.fromnumeric import shape 9 | import torch 10 | from torchvision import transforms as T 11 | import torch.nn.functional as F 12 | 13 | import logging 14 | import time 15 | import timeit 16 | from pathlib import Path 17 | 18 | import numpy as np 19 | 20 | import torch 21 | import torch.nn as nn 22 | import torch.backends.cudnn as cudnn 23 | 24 | import _init_paths 25 | sys.path.append("/home/wph/voting_dirl/HRNet-Semantic-Segmentation-HRNet-OCR/lib") 26 | import models 27 | import datasets 28 | from config import config 29 | from config import update_config 30 | 31 | 32 | PALETTE = [[0, 192, 64], [0, 192, 64], [0, 64, 96], [128, 192, 192], 33 | [0, 64, 64], [0, 192, 224], [0, 192, 192], [128, 192, 64], 34 | [0, 192, 96], [128, 192, 64], [128, 32, 192], [0, 0, 224], 35 | [0, 0, 64], [0, 160, 192], [128, 0, 96], [128, 0, 192], 36 | [0, 32, 192], [128, 128, 224], [0, 0, 192], [128, 160, 192], 37 | [128, 128, 0], [128, 0, 32], [128, 32, 0], [128, 0, 128], 38 | [64, 128, 32], [0, 160, 0], [0, 0, 0], [192, 128, 160], 39 | [0, 32, 0], [0, 128, 128], [64, 128, 160], [128, 160, 0], 40 | [0, 128, 0], [192, 128, 32], [128, 96, 128], [0, 0, 128], 41 | [64, 0, 32], [0, 224, 128], [128, 0, 0], [192, 0, 160], 42 | [0, 96, 128], [128, 128, 128], [64, 0, 160], [128, 224, 128], 43 | [128, 128, 64], [192, 0, 32], [128, 96, 0], [128, 0, 192], 44 | [0, 128, 32], [64, 224, 0], [0, 0, 64], [128, 128, 160], 45 | [64, 96, 0], [0, 128, 192], [0, 128, 160], [192, 224, 0], 46 | [0, 128, 64], [128, 128, 32], [192, 32, 128], [0, 64, 192], 47 | [0, 0, 32], [64, 160, 128], [128, 64, 64], [128, 0, 160], 48 | [64, 32, 128], [128, 192, 192], [0, 0, 160], [192, 160, 128], 49 | [128, 192, 0], [128, 0, 96], [192, 32, 0], [128, 64, 128], 50 | [64, 128, 96], [64, 160, 0], [0, 64, 0], [192, 128, 224], 51 | [64, 32, 0], [0, 192, 128], [64, 128, 224], [192, 160, 0], 52 | [0, 192, 0], [192, 128, 96], [192, 96, 128], [0, 64, 128], 53 | [64, 0, 96], [64, 224, 128], [128, 64, 0], [192, 0, 224], 54 | [64, 96, 128], [128, 192, 128], [64, 0, 224], [192, 224, 128], 55 | [128, 192, 64], [192, 0, 96], [192, 96, 0], [128, 64, 192], 56 | [0, 128, 96], [0, 224, 0], [64, 64, 64], [128, 128, 224], 57 | [0, 96, 0], [64, 192, 192], [0, 128, 224], [128, 224, 0], 58 | [64, 192, 64], [128, 128, 96], [128, 32, 128], [64, 0, 192], 59 | [0, 64, 96], [0, 160, 128], [192, 0, 64], [128, 64, 224], 60 | [0, 32, 128], [192, 128, 192], [0, 64, 224], [128, 160, 128], 61 | [192, 128, 0], [128, 64, 32], [128, 32, 64], [192, 0, 128], 62 | [64, 192, 32], [0, 160, 64], [64, 0, 0], [192, 192, 160], 63 | [0, 32, 64], [64, 128, 128], [64, 192, 160], [128, 160, 64], 64 | [64, 128, 0], [192, 192, 32], [128, 96, 192], [64, 0, 128], 65 | [64, 64, 32], [0, 224, 192], [192, 0, 0], [192, 64, 160], 66 | [0, 96, 192], [192, 128, 128], [64, 64, 160], [128, 224, 192], 67 | [192, 128, 64], [192, 64, 32], [128, 96, 64], [192, 0, 192], 68 | [0, 192, 32], [64, 224, 64], [64, 0, 64], [128, 192, 160], 69 | [64, 96, 64], [64, 128, 192], [0, 192, 160], [192, 224, 64], 70 | [64, 128, 64], [128, 192, 32], [192, 32, 192], [64, 64, 192], 71 | [0, 64, 32], [64, 160, 192], [192, 64, 64], [128, 64, 160], 72 | [64, 32, 192], [192, 192, 192], [0, 64, 160], [192, 160, 192], 73 | [192, 192, 0], [128, 64, 96], [192, 32, 64], [192, 64, 128], 74 | [64, 192, 96], [64, 160, 64], [64, 64, 0]] 75 | 76 | 77 | if __name__=='__main__': 78 | parser = argparse.ArgumentParser(description='Train segmentation network') 79 | args = parser.parse_args() 80 | args.cfg = "experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml" 81 | args.opts = [] 82 | update_config(config, args) 83 | 84 | module = eval('models.'+config.MODEL.NAME) 85 | module.BatchNorm2d_class = module.BatchNorm2d = torch.nn.BatchNorm2d 86 | model = eval('models.'+config.MODEL.NAME + 87 | '.get_seg_model')(config) 88 | model = model.cuda() 89 | model.eval() 90 | 91 | img_list = [] 92 | img_root = "/home/wph/DIRL/iHarmony4" 93 | with open("le50_test.txt") as f: 94 | img_list = f.readlines() 95 | img_list = [os.path.join(img_root, name.strip()) for name in img_list] 96 | 97 | for img_path in tqdm.tqdm(img_list): 98 | img = cv2.imread(img_path) 99 | img = cv2.resize(img, (224, 224)) 100 | origin_img = img.copy() 101 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 102 | 103 | transform = T.Compose([T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 104 | img = transform(img) 105 | img = img.unsqueeze(0).cuda() 106 | with torch.no_grad(): 107 | pred = model(img)[1] 108 | pred = F.interpolate( 109 | input=pred, size=origin_img.shape[:2], 110 | mode='bilinear', align_corners=True 111 | ) 112 | prob = F.softmax(pred, 1) 113 | result = prob.argmax(dim=1) 114 | result = result.cpu().numpy()[0] 115 | color_seg = np.zeros((result.shape[0], result.shape[1], 3), dtype=np.uint8) 116 | for label, color in enumerate(PALETTE): 117 | color_seg[result == label, :] = color 118 | 119 | color_seg = color_seg[..., ::-1] 120 | name = img_path.split(os.sep)[-1].replace(".jpg", "") 121 | cv2.imwrite(os.path.join("seg_results", name + "_seg_result.png"), color_seg) 122 | opacity = 0.5 123 | img = origin_img * (1 - opacity) + color_seg * opacity 124 | img = img.astype(np.uint8) 125 | cv2.imwrite(os.path.join("seg_results", name + "_seg_result_comp.png"), img) 126 | 127 | cv2.imwrite(os.path.join("seg_results", name + "_origin.png"), origin_img) 128 | -------------------------------------------------------------------------------- /HRNet-Semantic-Segmentation-HRNet-OCR/tools/test.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import argparse 8 | import os 9 | import pprint 10 | import shutil 11 | import sys 12 | 13 | import logging 14 | import time 15 | import timeit 16 | from pathlib import Path 17 | 18 | import numpy as np 19 | 20 | import torch 21 | import torch.nn as nn 22 | import torch.backends.cudnn as cudnn 23 | 24 | import _init_paths 25 | import models 26 | import datasets 27 | from config import config 28 | from config import update_config 29 | from core.function import testval, test 30 | from utils.modelsummary import get_model_summary 31 | from utils.utils import create_logger, FullModel 32 | 33 | def parse_args(): 34 | parser = argparse.ArgumentParser(description='Train segmentation network') 35 | 36 | parser.add_argument('--cfg', 37 | help='experiment configure file name', 38 | required=True, 39 | type=str) 40 | parser.add_argument('opts', 41 | help="Modify config options using the command-line", 42 | default=None, 43 | nargs=argparse.REMAINDER) 44 | 45 | args = parser.parse_args() 46 | update_config(config, args) 47 | 48 | return args 49 | 50 | def main(): 51 | args = parse_args() 52 | 53 | logger, final_output_dir, _ = create_logger( 54 | config, args.cfg, 'test') 55 | 56 | logger.info(pprint.pformat(args)) 57 | logger.info(pprint.pformat(config)) 58 | 59 | # cudnn related setting 60 | cudnn.benchmark = config.CUDNN.BENCHMARK 61 | cudnn.deterministic = config.CUDNN.DETERMINISTIC 62 | cudnn.enabled = config.CUDNN.ENABLED 63 | 64 | # build model 65 | if torch.__version__.startswith('1'): 66 | module = eval('models.'+config.MODEL.NAME) 67 | module.BatchNorm2d_class = module.BatchNorm2d = torch.nn.BatchNorm2d 68 | model = eval('models.'+config.MODEL.NAME + 69 | '.get_seg_model')(config) 70 | 71 | dump_input = torch.rand( 72 | (1, 3, config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0]) 73 | ) 74 | logger.info(get_model_summary(model.cuda(), dump_input.cuda())) 75 | 76 | if config.TEST.MODEL_FILE: 77 | model_state_file = config.TEST.MODEL_FILE 78 | else: 79 | model_state_file = os.path.join(final_output_dir, 'final_state.pth') 80 | logger.info('=> loading model from {}'.format(model_state_file)) 81 | 82 | pretrained_dict = torch.load(model_state_file) 83 | if 'state_dict' in pretrained_dict: 84 | pretrained_dict = pretrained_dict['state_dict'] 85 | model_dict = model.state_dict() 86 | pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() 87 | if k[6:] in model_dict.keys()} 88 | for k, _ in pretrained_dict.items(): 89 | logger.info( 90 | '=> loading {} from pretrained model'.format(k)) 91 | model_dict.update(pretrained_dict) 92 | model.load_state_dict(model_dict) 93 | 94 | gpus = list(config.GPUS) 95 | model = nn.DataParallel(model, device_ids=gpus).cuda() 96 | 97 | # prepare data 98 | test_size = (config.TEST.IMAGE_SIZE[1], config.TEST.IMAGE_SIZE[0]) 99 | test_dataset = eval('datasets.'+config.DATASET.DATASET)( 100 | root=config.DATASET.ROOT, 101 | list_path=config.DATASET.TEST_SET, 102 | num_samples=None, 103 | num_classes=config.DATASET.NUM_CLASSES, 104 | multi_scale=False, 105 | flip=False, 106 | ignore_label=config.TRAIN.IGNORE_LABEL, 107 | base_size=config.TEST.BASE_SIZE, 108 | crop_size=test_size, 109 | downsample_rate=1) 110 | 111 | testloader = torch.utils.data.DataLoader( 112 | test_dataset, 113 | batch_size=1, 114 | shuffle=False, 115 | num_workers=config.WORKERS, 116 | pin_memory=True) 117 | 118 | start = timeit.default_timer() 119 | if 'val' in config.DATASET.TEST_SET: 120 | mean_IoU, IoU_array, pixel_acc, mean_acc = testval(config, 121 | test_dataset, 122 | testloader, 123 | model) 124 | 125 | msg = 'MeanIU: {: 4.4f}, Pixel_Acc: {: 4.4f}, \ 126 | Mean_Acc: {: 4.4f}, Class IoU: '.format(mean_IoU, 127 | pixel_acc, mean_acc) 128 | logging.info(msg) 129 | logging.info(IoU_array) 130 | elif 'test' in config.DATASET.TEST_SET: 131 | test(config, 132 | test_dataset, 133 | testloader, 134 | model, 135 | sv_dir=final_output_dir) 136 | 137 | end = timeit.default_timer() 138 | logger.info('Mins: %d' % np.int((end-start)/60)) 139 | logger.info('Done') 140 | 141 | 142 | if __name__ == '__main__': 143 | main() 144 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AustNet-Inharmonious-Region-Localization 2 | 3 | ![teaser](assets/pipeline.png) 4 | 5 | This is the official code of the paper: 6 | > Inharmonious Region Localization with Auxiliary Style Feature 7 | Penghao Wu, Li Niu, Liqing Zhang 8 | [arXiv Paper](https://arxiv.org/abs/2210.02029), BMVC 2022 9 | 10 | 11 | ## Install 12 | Clone this repo and build the environment 13 | 14 | ``` 15 | git clone https://github.com/bcmi/AustNet-Inharmonious-Region-Localization.git 16 | cd AustNet-Inharmonious-Region-Localization 17 | conda env create -f environment.yml --name Austnet 18 | conda activate Austnet 19 | ``` 20 | 21 | Download the semantic segmentation network model weight through link [Dropbox](https://www.dropbox.com/scl/fi/lby093g1d12aqbuhblztr/hrnet_ocr_cocostuff_3965_torch04.pth?rlkey=5bemk4eo9vla59b9rbaikba6j&st=q6otmhew&dl=0) or [Baidu Yun](https://pan.baidu.com/s/1SSRMI8QYCtRsG9E2zmOiOg) with code pfpy. Put the model weight in the HRNet-Semantic-Segmentation-HRNet-OCR folder. 22 | 23 | ## Datset 24 | Please refer to [DIRL](https://github.com/bcmi/DIRL-Inharmonious-Region-Localization) to download the iHarmoney4 dataset. 25 | 26 | ## Training 27 | 28 | To train AustNet, run 29 | 30 | ``` 31 | python train_austnet.py --dataset_root PATH_OF_THE_DATASET --logdir austnet_training_log --gpus NUMBER_OF_GPUS 32 | ``` 33 | To train AustNet_S, run 34 | ``` 35 | python train_austnet_s.py --dataset_root PATH_OF_THE_DATASET --logdir austnet_s_training_log --gpus NUMBER_OF_GPUS 36 | ``` 37 | 38 | 39 | ## Pretrained Model 40 | 41 | |Model| Google Drive Link| Baidu Yun Link| 42 | |-----------|--------------|--------------| 43 | | Austnet | [Dropbox](https://www.dropbox.com/scl/fi/p4m7mwq3o4on0jmrbklrz/austnet.ckpt?rlkey=c7pp2qzwaxhy5h1n0i4bz3mnw&st=2i4ymgju&dl=0) | [Baidu Yun](https://pan.baidu.com/s/1Z7r6p4LgJKqekZaJ3ctPpQ) code: m8ku | 44 | | Austnet_s | [Dropbox](https://www.dropbox.com/scl/fi/bxd1y6ap35zu6vj8hledt/austnet_s.ckpt?rlkey=7f9hezvfdohldutdbmny1az9z&st=7axw25zx&dl=0) | [Baidu Yun](https://pan.baidu.com/s/1LwAWRiFCceoX_wcLOtS5vQ) code: jrdi| 45 | 46 | 47 | ## Evaluation 48 | 49 | To evaluate AustNet, run 50 | 51 | ``` 52 | python test_austnet.py --dataset_root PATH_OF_THE_DATASET --ckpt MODEL_WEIGHT_PATH 53 | ``` 54 | To evaluate AustNet_S, run 55 | ``` 56 | python test_austnet_s.py --dataset_root PATH_OF_THE_DATASET --ckpt MODEL_WEIGHT_PATH 57 | ``` 58 | 59 | ## Citation 60 | 61 | If you find our work or code helpful, please cite: 62 | ```` 63 | @inproceedings{Wu2022Inharmonious, 64 | title={Inharmonious Region Localization with Auxiliary Style Feature}, 65 | author={Penghao Wu and Li Niu and Liqing Zhang}, 66 | booktitle={BMVC}, 67 | year={2022} 68 | } 69 | ```` 70 | 71 | ## Acknowledgement 72 | Our code is based on repositories: 73 | - [DIRL](https://github.com/bcmi/DIRL-Inharmonious-Region-Localization) 74 | - [HRNet-OCR](https://github.com/HRNet/HRNet-Semantic-Segmentation) 75 | -------------------------------------------------------------------------------- /assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/AustNet-Inharmonious-Region-Localization/a02b53ae85b2991829bad84173d5334d2774dd02/assets/pipeline.png -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch.utils.data as data 4 | 5 | import cv2 6 | 7 | from albumentations import HorizontalFlip, RandomResizedCrop, Compose, DualTransform 8 | import albumentations.augmentations.transforms as transforms 9 | from albumentations.augmentations.geometric.resize import Resize 10 | 11 | 12 | import os.path 13 | import os 14 | import cv2 15 | import numpy as np 16 | import torchvision.transforms as transforms 17 | import random 18 | import torch.nn.functional as F 19 | import copy 20 | 21 | 22 | class HCompose(Compose): 23 | def __init__(self, transforms, *args, additional_targets=None, no_nearest_for_masks=True, **kwargs): 24 | if additional_targets is None: 25 | additional_targets = { 26 | 'real': 'image', 27 | 'mask': 'mask', 28 | 'yuv': 'image', 29 | } 30 | self.additional_targets = additional_targets 31 | super().__init__(transforms, *args, additional_targets=additional_targets, **kwargs) 32 | if no_nearest_for_masks: 33 | for t in transforms: 34 | if isinstance(t, DualTransform): 35 | t._additional_targets['mask'] = 'image' 36 | # t._additional_targets['edge'] = 'image' 37 | 38 | 39 | def get_transform(opt, params=None, grayscale=False, convert=True): 40 | transform_list = [] 41 | if grayscale: 42 | transform_list.append(transforms.ToGray()) 43 | if opt.preprocess == 'resize_and_crop': 44 | if params is None: 45 | transform_list.append(RandomResizedCrop(opt.crop_size, opt.crop_size, scale=(0.9, 1.0))) # 0.5,1.0 46 | elif opt.preprocess == 'resize': 47 | transform_list.append(Resize(opt.crop_size, opt.crop_size)) 48 | elif opt.preprocess == 'none': 49 | return HCompose(transform_list) 50 | 51 | if not opt.no_flip: 52 | if params is None: 53 | # print("flip") 54 | transform_list.append(HorizontalFlip()) 55 | 56 | return HCompose(transform_list) 57 | 58 | 59 | class iH4Dataset(data.Dataset): 60 | def __init__(self, opt): 61 | self.opt = copy.copy(opt) 62 | self.root = opt.dataset_root 63 | 64 | self.image_paths = [] 65 | self.phase = opt.phase 66 | 67 | 68 | 69 | if opt.phase=='train': 70 | # print('loading training file: ') 71 | self.trainfile = os.path.join(opt.dataset_root,'le50_train.txt') 72 | self.keep_background_prob = 0.05 # 0.05 73 | with open(self.trainfile,'r') as f: 74 | for line in f.readlines(): 75 | self.image_paths.append(os.path.join(opt.dataset_root,line.rstrip())) 76 | elif opt.phase == 'val' or opt.phase == 'test': 77 | print('loading {} file'.format(opt.phase)) 78 | self.keep_background_prob = -1 79 | self.trainfile = os.path.join(opt.dataset_root,'le50_{}.txt'.format(opt.phase)) 80 | with open(self.trainfile,'r') as f: 81 | for line in f.readlines(): 82 | self.image_paths.append(os.path.join(opt.dataset_root,line.rstrip())) 83 | 84 | self.transform = get_transform(opt) 85 | self.input_transform = transforms.Compose([ 86 | transforms.ToTensor(), 87 | transforms.Normalize( 88 | (0.46962251, 0.4464104, 0.40718787), 89 | (0.27469736, 0.27012361, 0.28515933), 90 | ) 91 | ]) 92 | 93 | self.input_transform_yuv = transforms.Compose([ 94 | transforms.ToTensor(), 95 | ]) 96 | 97 | self.inharmonious_threshold = 1e-2 98 | self.fg_upper_bound = 0.5 99 | 100 | def __len__(self): 101 | """Return the total number of images in the dataset.""" 102 | return len(self.image_paths) 103 | 104 | def __getitem__(self, index): 105 | sample = self.get_sample(index) 106 | self.check_sample_types(sample) 107 | sample = self.augment_sample(sample) 108 | 109 | comp = self.input_transform(sample['image']) 110 | real = self.input_transform(sample['real']) 111 | mask = sample['mask'][np.newaxis, ...].astype(np.float32) 112 | mask_2 = np.expand_dims(cv2.resize(mask.transpose(1, 2, 0), dsize=(112, 112)), axis = 0) 113 | mask_4 = np.expand_dims(cv2.resize(mask.transpose(1, 2, 0), dsize=(56, 56)), axis = 0) 114 | mask_8 = np.expand_dims(cv2.resize(mask.transpose(1, 2, 0), dsize=(28, 28)), axis = 0) 115 | mask = np.where(mask > 0.5, 1, 0).astype(np.uint8) 116 | mask_2 = np.where(mask_2 > 0.5, 1, 0).astype(np.uint8) 117 | mask_4 = np.where(mask_4 > 0.5, 1, 0).astype(np.uint8) 118 | mask_8 = np.where(mask_8 > 0.5, 1, 0).astype(np.uint8) 119 | 120 | yuv = self.input_transform_yuv(sample['yuv']) 121 | 122 | output = { 123 | 'comp': comp, 124 | 'mask': mask, 125 | 'mask_2': mask_2, 126 | 'mask_4': mask_4, 127 | 'mask_8': mask_8, 128 | 'real': real, 129 | 'yuv': yuv, 130 | 'img_path':sample['img_path'] 131 | } 132 | return output 133 | 134 | 135 | 136 | def augment_sample(self, sample): 137 | if self.transform is None: 138 | return sample 139 | #print(self.transform.additional_targets.keys()) 140 | additional_targets = {target_name: sample[target_name] 141 | for target_name in self.transform.additional_targets.keys()} 142 | 143 | valid_augmentation = False 144 | while not valid_augmentation: 145 | aug_output = self.transform(image=sample['comp'], **additional_targets) 146 | valid_augmentation = self.check_augmented_sample(sample, aug_output) 147 | 148 | for target_name, transformed_target in aug_output.items(): 149 | #print(target_name,transformed_target.shape) 150 | sample[target_name] = transformed_target 151 | 152 | return sample 153 | 154 | def check_augmented_sample(self, sample, aug_output): 155 | if self.keep_background_prob < 0.0 or random.random() < self.keep_background_prob: 156 | return True 157 | # return aug_output['mask'].sum() > 1.0 158 | return aug_output['mask'].sum() > 10 159 | 160 | def check_sample_types(self, sample): 161 | assert sample['comp'].dtype == 'uint8' 162 | if 'real' in sample: 163 | assert sample['real'].dtype == 'uint8' 164 | 165 | 166 | def get_sample(self, index): 167 | path = self.image_paths[index] 168 | 169 | name_parts=path.split('_') 170 | 171 | mask_path = self.image_paths[index].replace('composite_images','masks') 172 | mask_path = mask_path.replace(('_'+name_parts[-1]),'.png') 173 | target_path = self.image_paths[index].replace('composite_images','real_images') 174 | target_path = target_path.replace(('_'+name_parts[-2]+'_'+name_parts[-1]),'.jpg') 175 | 176 | comp = cv2.imread(path) 177 | comp = cv2.cvtColor(comp, cv2.COLOR_BGR2RGB) 178 | real = cv2.imread(target_path) 179 | real = cv2.cvtColor(real, cv2.COLOR_BGR2RGB) 180 | mask = cv2.imread(mask_path) 181 | mask = mask[:, :, 0].astype(np.float32) / 255. 182 | 183 | yuv = cv2.cvtColor(comp, cv2.COLOR_RGB2YCrCb) 184 | 185 | return {'comp': comp, 'mask': mask, 'real': real, 'yuv': yuv, 'img_path':path} -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: Austnet 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - _libgcc_mutex=0.1=main 8 | - _openmp_mutex=4.5=1_gnu 9 | - absl-py=0.15.0=pyhd3eb1b0_0 10 | - aiohttp=3.8.1=py37h7f8727e_0 11 | - aiosignal=1.2.0=pyhd3eb1b0_0 12 | - async-timeout=4.0.1=pyhd3eb1b0_0 13 | - asynctest=0.13.0=py_0 14 | - attrs=21.2.0=pyhd3eb1b0_0 15 | - blas=1.0=mkl 16 | - blinker=1.4=py37h06a4308_0 17 | - blosc=1.21.0=h8c45485_0 18 | - brotli=1.0.9=he6710b0_2 19 | - brotlipy=0.7.0=py37h27cfd23_1003 20 | - brunsli=0.1=h2531618_0 21 | - bzip2=1.0.8=h7b6447c_0 22 | - c-ares=1.17.1=h27cfd23_0 23 | - ca-certificates=2022.3.18=h06a4308_0 24 | - cachetools=4.2.2=pyhd3eb1b0_0 25 | - certifi=2021.10.8=py37h06a4308_2 26 | - cffi=1.14.6=py37h400218f_0 27 | - cfitsio=3.470=hf0d0db6_6 28 | - charls=2.2.0=h2531618_0 29 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 30 | - click=8.0.3=pyhd3eb1b0_0 31 | - cryptography=3.4.8=py37hd23ed53_0 32 | - cudatoolkit=11.3.1=h2bc3f7f_2 33 | - cycler=0.11.0=pyhd3eb1b0_0 34 | - cytoolz=0.11.0=py37h7b6447c_0 35 | - dask-core=2021.10.0=pyhd3eb1b0_0 36 | - efficientnet-pytorch=0.7.0=pyhd8ed1ab_0 37 | - ffmpeg=4.3=hf484d3e_0 38 | - fonttools=4.25.0=pyhd3eb1b0_0 39 | - freetype=2.11.0=h70c0345_0 40 | - frozenlist=1.2.0=py37h7f8727e_0 41 | - fsspec=2021.10.1=pyhd3eb1b0_0 42 | - future=0.18.2=py37_1 43 | - fvcore=0.1.2.post20201122=pyhd8ed1ab_0 44 | - giflib=5.2.1=h7b6447c_0 45 | - gmp=6.2.1=h2531618_2 46 | - gnutls=3.6.15=he1e5248_0 47 | - google-auth=1.33.0=pyhd3eb1b0_0 48 | - google-auth-oauthlib=0.4.4=pyhd3eb1b0_0 49 | - grpcio=1.42.0=py37hce63b2e_0 50 | - idna=3.3=pyhd3eb1b0_0 51 | - imagecodecs=2021.8.26=py37h4cda21f_0 52 | - imageio=2.9.0=pyhd3eb1b0_0 53 | - intel-openmp=2021.4.0=h06a4308_3561 54 | - jpeg=9d=h7f8727e_0 55 | - jxrlib=1.1=h7b6447c_2 56 | - kiwisolver=1.3.1=py37h2531618_0 57 | - krb5=1.19.2=hac12032_0 58 | - lame=3.100=h7b6447c_0 59 | - lcms2=2.12=h3be6417_0 60 | - ld_impl_linux-64=2.35.1=h7274673_9 61 | - lerc=3.0=h295c915_0 62 | - libaec=1.0.4=he6710b0_1 63 | - libcurl=7.80.0=h0b77cf5_0 64 | - libdeflate=1.8=h7f8727e_5 65 | - libedit=3.1.20210910=h7f8727e_0 66 | - libev=4.33=h7f8727e_1 67 | - libffi=3.3=he6710b0_2 68 | - libgcc-ng=9.3.0=h5101ec6_17 69 | - libgfortran-ng=7.5.0=ha8ba4b0_17 70 | - libgfortran4=7.5.0=ha8ba4b0_17 71 | - libgomp=9.3.0=h5101ec6_17 72 | - libiconv=1.15=h63c8f33_5 73 | - libidn2=2.3.2=h7f8727e_0 74 | - libnghttp2=1.46.0=hce63b2e_0 75 | - libpng=1.6.37=hbc83047_0 76 | - libprotobuf=3.17.2=h4ff587b_1 77 | - libssh2=1.9.0=h1ba5d50_1 78 | - libstdcxx-ng=9.3.0=hd4cf53a_17 79 | - libtasn1=4.16.0=h27cfd23_0 80 | - libtiff=4.2.0=h85742a9_0 81 | - libunistring=0.9.10=h27cfd23_0 82 | - libuv=1.40.0=h7b6447c_0 83 | - libwebp=1.2.0=h89dd481_0 84 | - libwebp-base=1.2.0=h27cfd23_0 85 | - libzopfli=1.0.3=he6710b0_0 86 | - locket=0.2.1=py37h06a4308_1 87 | - lz4-c=1.9.3=h295c915_1 88 | - markdown=3.3.4=py37h06a4308_0 89 | - matplotlib-base=3.5.0=py37h3ed280b_0 90 | - mkl=2020.2=256 91 | - mkl-service=2.3.0=py37he8ac12f_0 92 | - mkl_fft=1.3.0=py37h54f3939_0 93 | - mkl_random=1.1.1=py37h0573a6f_0 94 | - multidict=5.1.0=py37h27cfd23_2 95 | - munkres=1.1.4=py_0 96 | - ncurses=6.3=h7f8727e_2 97 | - nettle=3.7.3=hbbd107a_1 98 | - networkx=2.6.3=pyhd3eb1b0_0 99 | - ninja=1.10.2=py37hd09550d_3 100 | - numpy=1.19.2=py37h54aff64_0 101 | - numpy-base=1.19.2=py37hfa32c7d_0 102 | - oauthlib=3.1.1=pyhd3eb1b0_0 103 | - olefile=0.46=py37_0 104 | - openh264=2.1.1=h4ff587b_0 105 | - openjpeg=2.4.0=h3ad879b_0 106 | - openssl=1.1.1n=h7f8727e_0 107 | - packaging=21.3=pyhd3eb1b0_0 108 | - partd=1.2.0=pyhd3eb1b0_0 109 | - pillow=8.0.1=py37he98fc37_0 110 | - pip=21.0.1=py37h06a4308_0 111 | - portalocker=2.3.0=py37h06a4308_0 112 | - protobuf=3.17.2=py37h295c915_0 113 | - pyasn1=0.4.8=pyhd3eb1b0_0 114 | - pyasn1-modules=0.2.8=py_0 115 | - pycparser=2.21=pyhd3eb1b0_0 116 | - pyjwt=2.1.0=py37h06a4308_0 117 | - pyopenssl=21.0.0=pyhd3eb1b0_1 118 | - pyparsing=3.0.4=pyhd3eb1b0_0 119 | - pysocks=1.7.1=py37_1 120 | - python=3.7.10=h12debd9_4 121 | - python-dateutil=2.8.2=pyhd3eb1b0_0 122 | - pytorch=1.10.1=py3.7_cuda11.3_cudnn8.2.0_0 123 | - pytorch-lightning=1.2.5=pyhd8ed1ab_1 124 | - pytorch-mutex=1.0=cuda 125 | - pywavelets=1.1.1=py37h7b6447c_2 126 | - pyyaml=6.0=py37h7f8727e_1 127 | - readline=8.1=h27cfd23_0 128 | - requests=2.26.0=pyhd3eb1b0_0 129 | - requests-oauthlib=1.3.0=py_0 130 | - rsa=4.7.2=pyhd3eb1b0_1 131 | - scikit-image=0.18.1=py37ha9443f7_0 132 | - scipy=1.5.2=py37h0b6359f_0 133 | - setuptools=49.6.0=py37_0 134 | - six=1.16.0=pyhd3eb1b0_0 135 | - snappy=1.1.8=he6710b0_0 136 | - sqlite=3.36.0=hc218d9a_0 137 | - tabulate=0.8.9=py37h06a4308_0 138 | - tensorboard=2.4.0=pyhc547734_0 139 | - tensorboard-plugin-wit=1.6.0=py_0 140 | - termcolor=1.1.0=py37h06a4308_1 141 | - tifffile=2021.7.2=pyhd3eb1b0_2 142 | - tk=8.6.11=h1ccaba5_0 143 | - toolz=0.11.2=pyhd3eb1b0_0 144 | - torchmetrics=0.6.2=pyhd8ed1ab_0 145 | - torchvision=0.11.2=py37_cu113 146 | - tqdm=4.50.2=py_0 147 | - typing-extensions=3.10.0.2=hd3eb1b0_0 148 | - typing_extensions=3.10.0.2=pyh06a4308_0 149 | - urllib3=1.26.7=pyhd3eb1b0_0 150 | - werkzeug=2.0.2=pyhd3eb1b0_0 151 | - wheel=0.37.0=pyhd3eb1b0_1 152 | - xz=5.2.5=h7b6447c_0 153 | - yacs=0.1.6=pyhd3eb1b0_1 154 | - yaml=0.2.5=h7b6447c_0 155 | - yarl=1.6.3=py37h27cfd23_0 156 | - zfp=0.5.5=h2531618_6 157 | - zipp=3.6.0=pyhd3eb1b0_0 158 | - zlib=1.2.11=h7f8727e_4 159 | - zstd=1.4.9=haebb681_0 160 | - pip: 161 | - albumentations==1.1.0 162 | - antlr4-python3-runtime==4.8 163 | - argcomplete==1.12.3 164 | - argon2-cffi==21.3.0 165 | - argon2-cffi-bindings==21.2.0 166 | - backcall==0.2.0 167 | - black==21.12b0 168 | - bleach==4.1.0 169 | - cached-property==1.5.2 170 | - cloudpickle==1.3.0 171 | - cython==0.29.26 172 | - dataclasses==0.6 173 | - debugpy==1.5.1 174 | - decorator==4.4.2 175 | - defusedxml==0.7.1 176 | - descartes==1.1.0 177 | - dictor==0.1.7 178 | - docker-pycreds==0.4.0 179 | - einops==0.4.1 180 | - entrypoints==0.3 181 | - ephem==4.1.3 182 | - fire==0.4.0 183 | - flake8==4.0.1 184 | - gitdb==4.0.9 185 | - gitpython==3.1.27 186 | - gym==0.17.2 187 | - h5py==3.6.0 188 | - hydra-core==1.1.1 189 | - imageio-ffmpeg==0.4.5 190 | - imgaug==0.4.0 191 | - importlib-metadata==4.2.0 192 | - importlib-resources==5.4.0 193 | - iniconfig==1.1.1 194 | - ipykernel==6.6.0 195 | - ipython==7.30.1 196 | - ipython-genutils==0.2.0 197 | - ipywidgets==7.6.5 198 | - jedi==0.18.1 199 | - jinja2==3.0.3 200 | - joblib==1.1.0 201 | - jsonschema==4.3.2 202 | - jupyter==1.0.0 203 | - jupyter-client==7.1.0 204 | - jupyter-console==6.4.0 205 | - jupyter-core==4.9.1 206 | - jupyterlab-pygments==0.1.2 207 | - jupyterlab-widgets==1.0.2 208 | - lmdb==1.3.0 209 | - markupsafe==2.0.1 210 | - matplotlib-inline==0.1.3 211 | - mccabe==0.6.1 212 | - mistune==0.8.4 213 | - motmetrics==1.1.3 214 | - moviepy==1.0.3 215 | - mypy-extensions==0.4.3 216 | - nbclient==0.5.9 217 | - nbconvert==6.3.0 218 | - nbformat==5.1.3 219 | - nest-asyncio==1.5.4 220 | - notebook==6.4.6 221 | - nuscenes-devkit==1.1.0 222 | - omegaconf==2.1.1 223 | - opencv-python==4.5.1.48 224 | - opencv-python-headless==4.5.1.48 225 | - pandas==1.3.5 226 | - pandocfilters==1.5.0 227 | - parso==0.8.3 228 | - pathspec==0.9.0 229 | - pathtools==0.1.2 230 | - pexpect==4.8.0 231 | - pickleshare==0.7.5 232 | - platformdirs==2.4.1 233 | - plotly==5.5.0 234 | - pluggy==1.0.0 235 | - proglog==0.1.9 236 | - prometheus-client==0.12.0 237 | - promise==2.3 238 | - prompt-toolkit==3.0.24 239 | - psutil==5.9.0 240 | - ptyprocess==0.7.0 241 | - py==1.11.0 242 | - py-trees==0.8.1 243 | - pycocotools==2.0.3 244 | - pycodestyle==2.8.0 245 | - pydot==1.4.2 246 | - pyflakes==2.4.0 247 | - pyglet==1.5.0 248 | - pygments==2.10.0 249 | - pyquaternion==0.9.9 250 | - pyrsistent==0.18.0 251 | - pytest==6.2.5 252 | - pytz==2021.3 253 | - pyzmq==22.3.0 254 | - qtconsole==5.2.2 255 | - qtpy==2.0.0 256 | - qudida==0.0.4 257 | - scikit-learn==1.0.2 258 | - send2trash==1.8.0 259 | - sentry-sdk==1.5.8 260 | - setproctitle==1.2.2 261 | - shapely==1.8.0 262 | - shortuuid==1.0.8 263 | - smmap==5.0.0 264 | - stable-baselines3==0.8.0 265 | - tenacity==8.0.1 266 | - terminado==0.12.1 267 | - testpath==0.5.0 268 | - threadpoolctl==3.0.0 269 | - timm==0.5.4 270 | - toml==0.10.2 271 | - tomli==1.2.3 272 | - tornado==6.1 273 | - traitlets==5.1.1 274 | - typed-ast==1.5.1 275 | - wandb==0.12.11 276 | - wcwidth==0.2.5 277 | - webencodings==0.5.1 278 | - widgetsnbextension==3.5.2 279 | - yaspin==2.1.0 280 | -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcmi/AustNet-Inharmonious-Region-Localization/a02b53ae85b2991829bad84173d5334d2774dd02/evaluation/__init__.py -------------------------------------------------------------------------------- /evaluation/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from sklearn.metrics import average_precision_score 4 | 5 | def normPRED(d, eps=1e-2): 6 | ma = torch.max(d) 7 | mi = torch.min(d) 8 | 9 | if ma-mi threshold, torch.ones_like(pred), torch.zeros_like(pred)).to(pred.device) 26 | intersection = (pred * gt).sum(dim=[1,2,3]) 27 | union = pred.sum(dim=[1,2,3]) + gt.sum(dim=[1,2,3]) - intersection 28 | return (intersection / (union+eps)).mean().item() 29 | 30 | def MAE(pred, gt): 31 | if isinstance(pred, torch.Tensor): 32 | return torch.mean(torch.abs(pred - gt)) 33 | elif isinstance(pred, np.ndarray): 34 | return np.mean(np.abs(pred-gt)) 35 | 36 | def FScore(pred, gt, beta2=1.0, threshold=0.5, eps=1e-6, reduce_dims=[1,2,3]): 37 | if isinstance(pred, torch.Tensor): 38 | if threshold == -1: threshold = pred.mean().item() * 2 39 | ones = torch.ones_like(pred).to(pred.device) 40 | zeros = torch.zeros_like(pred).to(pred.device) 41 | pred_ = torch.where(pred > threshold, ones, zeros) 42 | gt = torch.where(gt>threshold, ones, zeros) 43 | total_num = pred.nelement() 44 | 45 | TP = (pred_ * gt).sum(dim=reduce_dims) 46 | NumPrecision = pred_.sum(dim=reduce_dims) 47 | NumRecall = gt.sum(dim=reduce_dims) 48 | 49 | precision = TP / (NumPrecision+eps) 50 | recall = TP / (NumRecall+eps) 51 | F_beta = (1+beta2)*(precision * recall) / (beta2*precision + recall + eps) 52 | F_beta = F_beta.mean() 53 | 54 | elif isinstance(pred, np.ndarray): 55 | if threshold == -1: threshold = pred.mean()* 2 56 | pred_ = np.where(pred > threshold, 1.0, 0.0) 57 | gt = np.where(gt > threshold, 1.0, 0.0) 58 | total_num = np.prod(pred_.shape) 59 | 60 | TP = (pred_ * gt).sum() 61 | NumPrecision = pred_.sum() 62 | NumRecall = gt.sum() 63 | 64 | precision = TP / (NumPrecision+eps) 65 | recall = TP / (NumRecall+eps) 66 | F_beta = (1+beta2)*(precision * recall) / (beta2*precision + recall + eps) 67 | 68 | return F_beta 69 | 70 | if __name__ == "__main__": 71 | gt = torch.ones((1,1,3,3)) 72 | gt[0][0][1][1] = 0 73 | pred = torch.ones((1,1,3,3)) 74 | pred[0][0][1][2] = 0 75 | pred[0][0][1][0] = 0 76 | print(compute_IoU(pred, gt)) -------------------------------------------------------------------------------- /model/austnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .encoder_decoder import resnet_encoder, aust_decoder 5 | 6 | def feature_loss(feature, mask_small): 7 | batch, dim, h, w = feature.shape 8 | feature = F.normalize(feature, p=2, dim=1) 9 | feature = feature.view(batch, dim, h*w) 10 | mask = mask_small.view(batch, 1, h*w) 11 | sim_matrix = torch.matmul(feature.transpose(1,2), feature) 12 | sim_matrix = sim_matrix.view(batch, 1, h, w, h, w) 13 | mask_map = torch.where(mask == 0, torch.tensor(-1).to(mask.device).type_as(mask), mask).view(batch, 1, h*w) 14 | mask_map = torch.matmul(mask_map.transpose(1,2), mask_map) 15 | mask_map = mask_map.view(batch, 1, h, w, h, w) 16 | 17 | sim_matrix = torch.clamp(sim_matrix, -1, 1) 18 | inner_pair = (mask_map > 0) 19 | inter_pair = (mask_map < 0) 20 | if torch.sum(inner_pair) > 0: 21 | inner_loss = torch.sum(inner_pair*sim_matrix) / torch.sum(inner_pair) 22 | else: 23 | inner_loss = 0 24 | if torch.sum(inter_pair) > 0 : 25 | inter_loss = torch.sum(inter_pair*sim_matrix) / torch.sum(inter_pair) 26 | else: 27 | inter_loss = 0 28 | return inner_loss, inter_loss 29 | 30 | class AustNet(nn.Module): 31 | 32 | def __init__(self): 33 | super(AustNet, self).__init__() 34 | self.color_transfer_encoder = nn.Sequential( 35 | nn.Conv2d(3, 64, kernel_size=3, padding=1), 36 | nn.ReLU(inplace=True), 37 | nn.Conv2d(64, 64, kernel_size=7, padding=3), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2d(64, 64, kernel_size=7, padding=3), 40 | nn.ReLU(inplace=True), 41 | nn.Conv2d(64, 6, kernel_size=3, padding=1), 42 | ) 43 | 44 | self.yuv_encoder = resnet_encoder(stage=4) 45 | self.rgb_encoder = resnet_encoder(stage=4) 46 | 47 | self.decoder = aust_decoder() 48 | 49 | self.H = 28 50 | self.W = 28 51 | 52 | self.relu = nn.ReLU(inplace=True) 53 | 54 | self.pooling = nn.AdaptiveAvgPool2d((1,1)) 55 | 56 | def calculate_distance_map(self): 57 | H = self.H 58 | W = self.W 59 | # mesh grid 60 | xx = torch.arange(0, W).view(1,-1).repeat(H,1) 61 | yy = torch.arange(0, H).view(-1,1).repeat(1,W) 62 | xx = xx.view(1,H,W) 63 | yy = yy.view(1,H,W) 64 | grid = torch.cat((yy,xx),0).float() 65 | 66 | distance_map = torch.zeros([H,W,H,W]) 67 | for i in range(H): 68 | for j in range(W): 69 | current_position = torch.tensor([i,j]).view(2,1,1) 70 | distance_map[i][j] = torch.sum(torch.abs(current_position - grid ), axis = 0) 71 | 72 | 73 | distance_map /= torch.max(distance_map) 74 | 75 | return distance_map.view(1,self.H, self.W, self.H, self.W) 76 | 77 | def forward(self, rgb, yuv=None): 78 | if yuv is None: 79 | yuv = rgb 80 | rgb_features = self.rgb_encoder(rgb) 81 | transfer_parameter = self.color_transfer_encoder(yuv) 82 | transfered_yuv = yuv * self.relu(transfer_parameter[:, 0:3]) + transfer_parameter[:, 3:] 83 | 84 | yuv_features = self.yuv_encoder(transfered_yuv) 85 | 86 | yuv_feature_norm = self.cal_yuv_similarity(yuv_features[-1]) 87 | 88 | out, aux_list, final_score, init_score= self.decoder(rgb_features, yuv_features, yuv_feature_norm) 89 | 90 | return out, aux_list, init_score, final_score, yuv_features[-1] 91 | 92 | 93 | def cal_yuv_similarity(self, yuv_feature): 94 | B, _, _, _ = yuv_feature.shape 95 | H, W = self.H, self.W 96 | 97 | yuv_feature = yuv_feature.permute(0, 2, 3, 1).view(B, self.H*self.W, -1) 98 | yuv_feature_norm = F.normalize(yuv_feature, p=2, dim=2) 99 | yuv_similarity_map = yuv_feature_norm @ yuv_feature_norm.permute(0,-1,-2) 100 | yuv_similarity_map = yuv_similarity_map.view(B, H, W, H, W) 101 | 102 | return yuv_feature_norm.permute(0, 2, 1).view(B, -1, H, W) 103 | -------------------------------------------------------------------------------- /model/austnet_s.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from .encoder_decoder import resnet_encoder, austs_decoder 5 | 6 | def feature_loss(feature, mask_small): 7 | batch, dim, h, w = feature.shape 8 | feature = F.normalize(feature, p=2, dim=1) 9 | feature = feature.view(batch, dim, h*w) 10 | mask = mask_small.view(batch, 1, h*w) 11 | sim_matrix = torch.matmul(feature.transpose(1,2), feature) 12 | sim_matrix = sim_matrix.view(batch, 1, h, w, h, w) 13 | mask_map = torch.where(mask == 0, torch.tensor(-1).to(mask.device).type_as(mask), mask).view(batch, 1, h*w) 14 | mask_map = torch.matmul(mask_map.transpose(1,2), mask_map) 15 | mask_map = mask_map.view(batch, 1, h, w, h, w) 16 | 17 | sim_matrix = torch.clamp(sim_matrix, -1, 1) 18 | inner_pair = (mask_map > 0) 19 | inter_pair = (mask_map < 0) 20 | if torch.sum(inner_pair) > 0: 21 | inner_loss = torch.sum(inner_pair*sim_matrix) / torch.sum(inner_pair) 22 | else: 23 | inner_loss = 0 24 | if torch.sum(inter_pair) > 0 : 25 | inter_loss = torch.sum(inter_pair*sim_matrix) / torch.sum(inter_pair) 26 | else: 27 | inter_loss = 0 28 | return inner_loss, inter_loss 29 | 30 | class AustNet_S(nn.Module): 31 | 32 | def __init__(self): 33 | super(AustNet_S, self).__init__() 34 | self.color_transfer_encoder = nn.Sequential( 35 | nn.Conv2d(3, 64, kernel_size=3, padding=1), 36 | nn.ReLU(inplace=True), 37 | nn.Conv2d(64, 64, kernel_size=7, padding=3), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2d(64, 64, kernel_size=7, padding=3), 40 | nn.ReLU(inplace=True), 41 | nn.Conv2d(64, 6, kernel_size=3, padding=1), 42 | ) 43 | 44 | self.yuv_encoder = resnet_encoder(stage=4) 45 | self.rgb_encoder = resnet_encoder(stage=4) 46 | 47 | self.decoder = austs_decoder() 48 | 49 | self.H = 28 50 | self.W = 28 51 | 52 | self.relu = nn.ReLU(inplace=True) 53 | 54 | self.distance_map = self.calculate_distance_map() 55 | 56 | def calculate_distance_map(self): 57 | H = self.H 58 | W = self.W 59 | # mesh grid 60 | xx = torch.arange(0, W).view(1,-1).repeat(H,1) 61 | yy = torch.arange(0, H).view(-1,1).repeat(1,W) 62 | xx = xx.view(1,H,W) 63 | yy = yy.view(1,H,W) 64 | grid = torch.cat((yy,xx),0).float() 65 | 66 | distance_map = torch.zeros([H,W,H,W]) 67 | for i in range(H): 68 | for j in range(W): 69 | current_position = torch.tensor([i,j]).view(2,1,1) 70 | distance_map[i][j] = torch.sum(torch.abs(current_position - grid ), axis = 0) 71 | 72 | 73 | distance_map /= torch.max(distance_map) 74 | 75 | return distance_map.view(1,self.H, self.W, self.H, self.W) 76 | 77 | 78 | def forward(self, rgb, yuv=None, semantic_features=None): 79 | semantic_features = F.interpolate(semantic_features, (self.H, self.W), mode='bilinear') 80 | rgb_features = self.rgb_encoder(rgb) 81 | transfer_parameter = self.color_transfer_encoder(yuv) 82 | transfered_yuv = yuv * self.relu(transfer_parameter[:, 0:3]) + transfer_parameter[:, 3:] 83 | yuv_features = self.yuv_encoder(transfered_yuv) 84 | _, semantic_similarity, yuv_similarity = self.aggregate_yuv_features(semantic_features, yuv_features[-1]) 85 | 86 | out, aux_list, final_score, init_score = self.decoder(rgb_features, yuv_features, semantic_similarity, yuv_similarity) 87 | 88 | return out, aux_list, init_score, final_score, yuv_features[-1], transfered_yuv 89 | 90 | 91 | def aggregate_yuv_features(self, semantic_feature, yuv_feature): 92 | B, _, _, _ = semantic_feature.shape 93 | H, W = self.H, self.W 94 | semantic_feature = semantic_feature.permute(0, 2, 3, 1) 95 | semantic_feature = semantic_feature.view(B, self.H*self.W, -1) 96 | semantic_feature = F.normalize(semantic_feature, p=2, dim=2) 97 | similarity_map = semantic_feature @ semantic_feature.permute(0,-1,-2) 98 | similarity_map = similarity_map.view(B, H, W, H, W) 99 | if self.distance_map.device != similarity_map.device: 100 | self.distance_map = self.distance_map.to(similarity_map.device) 101 | score_map = self.distance_map * similarity_map 102 | score_map /= torch.sum(score_map, dim=(3,4), keepdim=True) 103 | 104 | yuv_feature = yuv_feature.permute(0, 2, 3, 1).view(B, self.H*self.W, -1) 105 | yuv_feature_norm = F.normalize(yuv_feature, p=2, dim=2) 106 | yuv_similarity_map = yuv_feature_norm @ yuv_feature_norm.permute(0,-1,-2) 107 | yuv_similarity_map = yuv_similarity_map.view(B, H, W, H, W) 108 | 109 | 110 | score_map = (score_map*yuv_similarity_map) 111 | score_map = torch.sum(score_map, dim = (3,4), keepdim=True).view(B, H, W, 1).permute(0, -1, 1, 2) 112 | return score_map.view(B, 1, H, W), similarity_map, yuv_similarity_map -------------------------------------------------------------------------------- /options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def ArgsParser(): 4 | parser = argparse.ArgumentParser() 5 | # Datasets 6 | parser.add_argument('--dataset_root', type=str, default="iHarmony4/", help='dataset path') 7 | parser.add_argument('--logdir', type=str, default='./checkpoints', help='models are saved here') 8 | parser.add_argument('--preprocess', type=str, default='resize_and_crop', help='scaling and cropping of images at load time [resize_and_crop | none]') 9 | parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation') 10 | 11 | parser.add_argument('--batch_size', type=int, default=8, help='input batch size') 12 | parser.add_argument('--load_size', type=int, default=256, help='scale images to this size') 13 | parser.add_argument('--crop_size', type=int, default=224, help='then crop to this size') 14 | 15 | parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') 16 | 17 | # training parameters 18 | parser.add_argument('--nepochs', type=int, default=250, help='# of training epochs') 19 | parser.add_argument('--gpus', type=int, default=4, help='# of GPUs used for training') 20 | parser.add_argument('--lr', type=float, default=0.0001, help='initial learning rate for adam') 21 | 22 | parser.add_argument('--ckpt', type=str, default="", help='model ckpt for testing') 23 | 24 | parser = parser.parse_args() 25 | return parser 26 | -------------------------------------------------------------------------------- /pytorch_iou/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | def _iou(pred, target, size_average = True): 7 | 8 | b = pred.shape[0] 9 | IoU = 0.0 10 | for i in range(0,b): 11 | #compute the IoU of the foreground 12 | Iand1 = torch.sum(target[i,:,:,:]*pred[i,:,:,:]) 13 | Ior1 = torch.sum(target[i,:,:,:]) + torch.sum(pred[i,:,:,:])-Iand1 14 | IoU1 = Iand1/Ior1 15 | 16 | #IoU loss is (1-IoU1) 17 | IoU = IoU + (1-IoU1) 18 | 19 | return IoU/b 20 | 21 | class IOU(torch.nn.Module): 22 | def __init__(self, size_average = True): 23 | super(IOU, self).__init__() 24 | self.size_average = size_average 25 | 26 | def forward(self, pred, target): 27 | 28 | return _iou(pred, target, self.size_average) 29 | -------------------------------------------------------------------------------- /pytorch_ssim/__init__.py: -------------------------------------------------------------------------------- 1 | # https://github.com/Po-Hsun-Su/pytorch-ssim/blob/master/pytorch_ssim/__init__.py 2 | import torch 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | from math import exp 7 | 8 | def gaussian(window_size, sigma): 9 | gauss = torch.Tensor([exp(-(x - window_size//2)**2/float(2*sigma**2)) for x in range(window_size)]) 10 | return gauss/gauss.sum() 11 | 12 | def create_window(window_size, channel): 13 | _1D_window = gaussian(window_size, 1.5).unsqueeze(1) 14 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) 15 | window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) 16 | return window 17 | 18 | def _ssim(img1, img2, window, window_size, channel, size_average = True): 19 | mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel) 20 | mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel) 21 | 22 | mu1_sq = mu1.pow(2) 23 | mu2_sq = mu2.pow(2) 24 | mu1_mu2 = mu1*mu2 25 | 26 | sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq 27 | sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq 28 | sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2 29 | 30 | C1 = 0.01**2 31 | C2 = 0.03**2 32 | 33 | ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2)) 34 | 35 | if size_average: 36 | return ssim_map.mean() 37 | else: 38 | return ssim_map.mean(1).mean(1).mean(1) 39 | 40 | class SSIM(torch.nn.Module): 41 | def __init__(self, window_size = 11, size_average = True): 42 | super(SSIM, self).__init__() 43 | self.window_size = window_size 44 | self.size_average = size_average 45 | self.channel = 1 46 | self.window = create_window(window_size, self.channel) 47 | 48 | def forward(self, img1, img2): 49 | (_, channel, _, _) = img1.size() 50 | 51 | if channel == self.channel and self.window.data.type() == img1.data.type(): 52 | window = self.window 53 | else: 54 | window = create_window(self.window_size, channel) 55 | 56 | if img1.is_cuda: 57 | window = window.cuda(img1.get_device()) 58 | window = window.type_as(img1) 59 | 60 | self.window = window 61 | self.channel = channel 62 | 63 | 64 | return _ssim(img1, img2, window, self.window_size, channel, self.size_average) 65 | 66 | def _logssim(img1, img2, window, window_size, channel, size_average = True): 67 | mu1 = F.conv2d(img1, window, padding = window_size//2, groups = channel) 68 | mu2 = F.conv2d(img2, window, padding = window_size//2, groups = channel) 69 | 70 | mu1_sq = mu1.pow(2) 71 | mu2_sq = mu2.pow(2) 72 | mu1_mu2 = mu1*mu2 73 | 74 | sigma1_sq = F.conv2d(img1*img1, window, padding = window_size//2, groups = channel) - mu1_sq 75 | sigma2_sq = F.conv2d(img2*img2, window, padding = window_size//2, groups = channel) - mu2_sq 76 | sigma12 = F.conv2d(img1*img2, window, padding = window_size//2, groups = channel) - mu1_mu2 77 | 78 | C1 = 0.01**2 79 | C2 = 0.03**2 80 | 81 | ssim_map = ((2*mu1_mu2 + C1)*(2*sigma12 + C2))/((mu1_sq + mu2_sq + C1)*(sigma1_sq + sigma2_sq + C2)) 82 | ssim_map = (ssim_map - torch.min(ssim_map))/(torch.max(ssim_map)-torch.min(ssim_map)) 83 | ssim_map = -torch.log(ssim_map + 1e-8) 84 | 85 | if size_average: 86 | return ssim_map.mean() 87 | else: 88 | return ssim_map.mean(1).mean(1).mean(1) 89 | 90 | class LOGSSIM(torch.nn.Module): 91 | def __init__(self, window_size = 11, size_average = True): 92 | super(LOGSSIM, self).__init__() 93 | self.window_size = window_size 94 | self.size_average = size_average 95 | self.channel = 1 96 | self.window = create_window(window_size, self.channel) 97 | 98 | def forward(self, img1, img2): 99 | (_, channel, _, _) = img1.size() 100 | 101 | if channel == self.channel and self.window.data.type() == img1.data.type(): 102 | window = self.window 103 | else: 104 | window = create_window(self.window_size, channel) 105 | 106 | if img1.is_cuda: 107 | window = window.cuda(img1.get_device()) 108 | window = window.type_as(img1) 109 | 110 | self.window = window 111 | self.channel = channel 112 | 113 | 114 | return _logssim(img1, img2, window, self.window_size, channel, self.size_average) 115 | 116 | 117 | def ssim(img1, img2, window_size = 11, size_average = True): 118 | (_, channel, _, _) = img1.size() 119 | window = create_window(window_size, channel) 120 | 121 | if img1.is_cuda: 122 | window = window.cuda(img1.get_device()) 123 | window = window.type_as(img1) 124 | 125 | return _ssim(img1, img2, window, window_size, channel, size_average) 126 | -------------------------------------------------------------------------------- /test_austnet.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import pytorch_lightning as pl 4 | 5 | from torch.utils.data import DataLoader 6 | 7 | import numpy as np 8 | import tqdm 9 | 10 | from evaluation.metrics import FScore, normPRED, compute_mAP, compute_IoU 11 | from options import ArgsParser 12 | 13 | 14 | from data import iH4Dataset 15 | from train_austnet import Engine 16 | 17 | 18 | def denormalize(x, isMask=False): 19 | if isMask: 20 | mean = 0 21 | std=1 22 | x = x.numpy().transpose(0,2,3,1) 23 | # x = np.where(x>0.5, 1, 0) 24 | x = (x*std + mean)*255 25 | x = x.astype(np.uint8) 26 | else: 27 | mean = torch.zeros_like(x) 28 | mean[0,:,:] = .46962251 29 | mean[1,:,:] = .4464104 30 | mean[2,:,:] = .40718787 31 | std = torch.zeros_like(x) 32 | std[0,:,:] = 0.27469736 33 | std[1,:,:] = 0.27012361 34 | std[2,:,:] = 0.28515933 35 | x = (x*std + mean)*255 36 | x = x.numpy().transpose(0,2,3,1).astype(np.uint8) 37 | return x 38 | 39 | class AverageMeter(object): 40 | """Computes and stores the average and current value""" 41 | def __init__(self): 42 | self.reset() 43 | 44 | def reset(self): 45 | self.val = 0 46 | self.avg = 0 47 | self.sum = 0 48 | self.count = 0 49 | 50 | def update(self, val, n=1): 51 | self.val = val 52 | self.sum += val * n 53 | self.count += n 54 | self.avg = self.sum / self.count 55 | 56 | 57 | 58 | if __name__ == '__main__': 59 | opt = ArgsParser() 60 | pl.seed_everything(42) 61 | 62 | # Data 63 | opt.no_flip = True 64 | opt.phase = "val" 65 | opt.preprocess = 'resize' 66 | val_set = iH4Dataset(opt) 67 | 68 | dataloader_val = DataLoader(val_set, batch_size=1, shuffle=False, num_workers=8) 69 | 70 | engine = Engine(opt) 71 | engine = Engine.load_from_checkpoint(opt.ckpt, opt = opt) 72 | 73 | engine.eval() 74 | engine.freeze() 75 | 76 | engine = engine.cuda() 77 | total_number = 0 78 | 79 | mAPMeter = AverageMeter() 80 | F1Meter = AverageMeter() 81 | FbMeter = AverageMeter() 82 | IoUMeter = AverageMeter() 83 | innerMeter = AverageMeter() 84 | interMeter = AverageMeter() 85 | 86 | total_number = 0 87 | total_time = 0 88 | 89 | for b_idx, batch in tqdm.tqdm(enumerate(dataloader_val), total = len(dataloader_val)): 90 | 91 | comp = batch["comp"].to("cuda") 92 | mask = batch["mask"].type(torch.FloatTensor).to("cuda") 93 | yuv = batch['yuv'].to("cuda") 94 | with torch.no_grad(): 95 | out, aux_list, init_score, final_score, feature = engine.model(comp, yuv) 96 | mask = torch.clamp(mask, 0, 1) 97 | out = torch.clamp(out, 0, 1) 98 | 99 | inharmonious_pred = normPRED(out) 100 | mask_gt = normPRED(mask) 101 | 102 | pred = inharmonious_pred 103 | 104 | label = mask_gt 105 | 106 | F1 = FScore(pred, label).item() 107 | FBeta = FScore(pred, label, threshold=-1, beta2=0.3) 108 | 109 | mAP = compute_mAP(pred, label) 110 | 111 | IoUMeter.update(compute_IoU(pred, label), label.size(0)) 112 | mAPMeter.update(mAP, inharmonious_pred.size(0)) 113 | F1Meter.update(F1, inharmonious_pred.size(0)) 114 | FbMeter.update(FBeta, inharmonious_pred.size(0)) 115 | 116 | print(mAPMeter.avg) 117 | print(F1Meter.avg) 118 | print(IoUMeter.avg) 119 | 120 | -------------------------------------------------------------------------------- /test_austnet_s.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import pytorch_lightning as pl 4 | 5 | from torch.utils.data import DataLoader 6 | 7 | import numpy as np 8 | import tqdm 9 | 10 | from evaluation.metrics import FScore, normPRED, compute_mAP, compute_IoU 11 | from options import ArgsParser 12 | 13 | from data import iH4Dataset 14 | from train_austnet_s import Engine 15 | 16 | def denormalize(x, isMask=False): 17 | if isMask: 18 | mean = 0 19 | std=1 20 | x = x.numpy().transpose(0,2,3,1) 21 | # x = np.where(x>0.5, 1, 0) 22 | x = (x*std + mean)*255 23 | x = x.astype(np.uint8) 24 | else: 25 | mean = torch.zeros_like(x) 26 | mean[0,:,:] = .46962251 27 | mean[1,:,:] = .4464104 28 | mean[2,:,:] = .40718787 29 | std = torch.zeros_like(x) 30 | std[0,:,:] = 0.27469736 31 | std[1,:,:] = 0.27012361 32 | std[2,:,:] = 0.28515933 33 | x = (x*std + mean)*255 34 | x = x.numpy().transpose(0,2,3,1).astype(np.uint8) 35 | return x 36 | 37 | 38 | class AverageMeter(object): 39 | """Computes and stores the average and current value""" 40 | def __init__(self): 41 | self.reset() 42 | 43 | def reset(self): 44 | self.val = 0 45 | self.avg = 0 46 | self.sum = 0 47 | self.count = 0 48 | 49 | def update(self, val, n=1): 50 | self.val = val 51 | self.sum += val * n 52 | self.count += n 53 | self.avg = self.sum / self.count 54 | 55 | 56 | if __name__ == '__main__': 57 | opt = ArgsParser() 58 | pl.seed_everything(42) 59 | 60 | # Data 61 | opt.no_flip = True 62 | opt.phase = "val" 63 | opt.preprocess = 'resize' 64 | val_set = iH4Dataset(opt) 65 | 66 | dataloader_val = DataLoader(val_set, batch_size=1, shuffle=False, num_workers=8) 67 | 68 | engine = Engine(opt) 69 | 70 | engine = Engine.load_from_checkpoint(opt.ckpt, opt = opt) 71 | 72 | engine.eval() 73 | engine.freeze() 74 | 75 | engine = engine.cuda() 76 | total_number = 0 77 | 78 | mAPMeter = AverageMeter() 79 | F1Meter = AverageMeter() 80 | FbMeter = AverageMeter() 81 | IoUMeter = AverageMeter() 82 | innerMeter = AverageMeter() 83 | interMeter = AverageMeter() 84 | 85 | total_number = 0 86 | total_time = 0 87 | 88 | for b_idx, batch in tqdm.tqdm(enumerate(dataloader_val), total = len(dataloader_val)): 89 | 90 | comp = batch["comp"].to("cuda") 91 | mask = batch["mask"].type(torch.FloatTensor).to("cuda") 92 | yuv = batch['yuv'].to("cuda") 93 | with torch.no_grad(): 94 | semantic_feature, semantic_out = engine.semantic_model(comp) 95 | out, aux_list, init_score, final_score, feature, transfered_yuv = engine.model(comp, yuv, semantic_feature) 96 | 97 | mask = torch.clamp(mask, 0, 1) 98 | out = torch.clamp(out, 0, 1) 99 | 100 | inharmonious_pred = normPRED(out) 101 | mask_gt = normPRED(mask) 102 | 103 | pred = inharmonious_pred 104 | 105 | label = mask_gt 106 | 107 | F1 = FScore(pred, label).item() 108 | FBeta = FScore(pred, label, threshold=-1, beta2=0.3) 109 | 110 | mAP = compute_mAP(pred, label) 111 | 112 | IoUMeter.update(compute_IoU(pred, label), label.size(0)) 113 | mAPMeter.update(mAP, inharmonious_pred.size(0)) 114 | F1Meter.update(F1, inharmonious_pred.size(0)) 115 | FbMeter.update(FBeta, inharmonious_pred.size(0)) 116 | 117 | print(mAPMeter.avg) 118 | print(F1Meter.avg) 119 | print(IoUMeter.avg) 120 | 121 | -------------------------------------------------------------------------------- /train_austnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import pytorch_lightning as pl 4 | from pytorch_lightning.callbacks import ModelCheckpoint 5 | from pytorch_lightning.plugins import DDPPlugin 6 | from torch.utils.data import DataLoader 7 | import torch.optim as optim 8 | 9 | from data import iH4Dataset 10 | import pytorch_iou 11 | import pytorch_ssim 12 | from evaluation.metrics import FScore, normPRED, compute_mAP, compute_IoU 13 | from model.austnet import AustNet, feature_loss 14 | from options import ArgsParser 15 | 16 | 17 | class Engine(pl.LightningModule): 18 | def __init__(self, opt): 19 | super().__init__() 20 | self.opt = opt 21 | self.model = AustNet() 22 | 23 | self.bce_loss = nn.BCELoss(size_average=True) 24 | self.ssim_loss = pytorch_ssim.SSIM(window_size=11,size_average=True) 25 | self.iou_loss = pytorch_iou.IOU(size_average=True) 26 | 27 | 28 | def forward(self, batch): 29 | comp = batch["comp"] 30 | yuv = batch['yuv'] 31 | out, aux_list, init_score, final_score, feature = self.model(comp, yuv) 32 | return out 33 | 34 | 35 | 36 | def training_step(self, batch, batch_idx): 37 | comp = batch["comp"] 38 | real = batch["real"] 39 | mask = batch["mask"].type(torch.FloatTensor).to(comp.device) 40 | mask_2 = batch["mask_2"].type(torch.FloatTensor).to(comp.device) 41 | mask_4 = batch["mask_4"].type(torch.FloatTensor).to(comp.device) 42 | mask_8 = batch["mask_8"].type(torch.FloatTensor).to(comp.device) 43 | yuv = batch['yuv'] 44 | 45 | out, aux_list, init_score, final_score, feature = self.model(comp, yuv) 46 | aux1, aux2, aux3 = aux_list[0], aux_list[1], aux_list[2] 47 | 48 | out = torch.clamp(out, 0, 1) 49 | aux1 = torch.clamp(aux1, 0, 1) 50 | aux2 = torch.clamp(aux2, 0, 1) 51 | aux3 = torch.clamp(aux3, 0, 1) 52 | mask = torch.clamp(mask, 0, 1) 53 | mask_2 = torch.clamp(mask_2, 0, 1) 54 | mask_4 = torch.clamp(mask_4, 0, 1) 55 | mask_8 = torch.clamp(mask_8, 0, 1) 56 | init_score = torch.clamp(1- (1 + init_score)/2, 0, 1) 57 | final_score = torch.clamp(1- (1 + final_score)/2, 0, 1) 58 | 59 | bce_loss = self.bce_loss(out, mask) 60 | ssim_loss = 1 - self.ssim_loss(out, mask) 61 | iou_loss = self.iou_loss(out,mask) 62 | 63 | bce_loss_aux1 = self.bce_loss(aux1, mask_4) 64 | ssim_loss_aux1 = 1 - self.ssim_loss(aux1, mask_4) 65 | iou_loss_aux1 = self.iou_loss(aux1,mask_4) 66 | 67 | bce_loss_aux2 = self.bce_loss(aux2, mask_2) 68 | ssim_loss_aux2 = 1 - self.ssim_loss(aux2, mask_2) 69 | iou_loss_aux2 = self.iou_loss(aux2,mask_2) 70 | 71 | bce_loss_aux3 = self.bce_loss(aux3, mask) 72 | ssim_loss_aux3 = 1 - self.ssim_loss(aux3, mask) 73 | iou_loss_aux3 = self.iou_loss(aux3,mask) 74 | 75 | loss_inner, loss_inter = feature_loss(feature, mask_8) 76 | 77 | loss = bce_loss + ssim_loss + iou_loss 78 | 79 | aux_loss = bce_loss_aux1 + ssim_loss_aux1 + iou_loss_aux1 + bce_loss_aux2 + ssim_loss_aux2 + iou_loss_aux2 + bce_loss_aux3 + ssim_loss_aux3 + iou_loss_aux3 80 | loss += aux_loss/3 81 | loss += max(0, loss_inter - loss_inner + 0.5) 82 | 83 | self.log('train_loss', loss.item()) 84 | self.log('bce_loss', bce_loss.item()) 85 | self.log('ssim_loss', ssim_loss.item()) 86 | self.log('iou_loss', iou_loss.item()) 87 | self.log('aux_loss', aux_loss.item()) 88 | self.log('loss_inner', loss_inner.item()) 89 | self.log('loss_inter', loss_inter.item()) 90 | 91 | return loss 92 | def configure_optimizers(self): 93 | optimizer = optim.Adam(self.model.parameters(), lr=self.opt.lr, betas=(0.9,0.999), weight_decay=1e-4) 94 | lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20) 95 | return [optimizer], [lr_scheduler] 96 | 97 | def validation_step(self, batch, batch_idx): 98 | comp = batch["comp"] 99 | real = batch["real"] 100 | mask = batch["mask"].type(torch.FloatTensor).to(comp.device) 101 | yuv = batch['yuv'] 102 | out, _, _,_,_ = self.model(comp, yuv) 103 | 104 | out = normPRED(out) 105 | mask = normPRED(mask) 106 | 107 | out = torch.clamp(out, 0, 1) 108 | mask = torch.clamp(mask, 0, 1) 109 | 110 | F1 = FScore(out, mask) 111 | mAP = compute_mAP(out, mask) 112 | 113 | IoU = compute_IoU(out, mask) 114 | 115 | self.log('F1', F1, sync_dist=True) 116 | self.log('mAP', mAP, sync_dist=True) 117 | self.log("IoU", IoU, sync_dist=True) 118 | 119 | if __name__ == '__main__': 120 | opt = ArgsParser() 121 | pl.seed_everything(42) 122 | 123 | # Data 124 | opt.phase = "train" 125 | train_set = iH4Dataset(opt) 126 | opt.no_flip = True 127 | opt.phase = "val" 128 | opt.preprocess = 'resize' 129 | val_set = iH4Dataset(opt) 130 | opt.phase = "train" 131 | 132 | dataloader_train = DataLoader(train_set, batch_size=opt.batch_size, shuffle=True, num_workers=8) 133 | dataloader_val = DataLoader(val_set, batch_size=opt.batch_size, shuffle=False, num_workers=8) 134 | 135 | engine = Engine(opt) 136 | 137 | checkpoint_callback = ModelCheckpoint(save_weights_only=False, mode="max", monitor="mAP", save_top_k=2, save_last=True, 138 | dirpath=opt.logdir, filename="best_{epoch:02d}-{mAP:.3f}") 139 | checkpoint_callback.CHECKPOINT_NAME_LAST = "{epoch}-last" 140 | trainer = pl.Trainer.from_argparse_args(opt, 141 | default_root_dir=opt.logdir, 142 | sync_batchnorm=True, 143 | gpus = opt.gpus , 144 | accelerator='ddp', 145 | profiler='simple', 146 | benchmark=True, 147 | log_every_n_steps=1, 148 | plugins=DDPPlugin(find_unused_parameters=False), 149 | flush_logs_every_n_steps=5, 150 | callbacks=[checkpoint_callback, 151 | ], 152 | check_val_every_n_epoch = 3, 153 | max_epochs = opt.nepochs 154 | ) 155 | 156 | trainer.fit(engine, dataloader_train, dataloader_val) -------------------------------------------------------------------------------- /train_austnet_s.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import torch.nn as nn 4 | import pytorch_lightning as pl 5 | from pytorch_lightning.callbacks import ModelCheckpoint 6 | from pytorch_lightning.plugins import DDPPlugin 7 | from torch.utils.data import DataLoader 8 | import torch.optim as optim 9 | import numpy as np 10 | 11 | 12 | from data import iH4Dataset 13 | import pytorch_iou 14 | import pytorch_ssim 15 | from evaluation.metrics import FScore, normPRED, compute_mAP, compute_IoU 16 | from model.austnet_s import AustNet_S, feature_loss 17 | from options import ArgsParser 18 | 19 | sys.path.append("HRNet-Semantic-Segmentation-HRNet-OCR/lib") 20 | import models 21 | from config import config 22 | from config import update_config 23 | 24 | class Engine(pl.LightningModule): 25 | def __init__(self, opt): 26 | super().__init__() 27 | self.opt = opt 28 | self.model = AustNet_S() 29 | self.semantic_model = self.setup_semantic_model() 30 | 31 | self.semantic_model.eval() 32 | 33 | self.bce_loss = nn.BCELoss(size_average=True) 34 | self.ssim_loss = pytorch_ssim.SSIM(window_size=11,size_average=True) 35 | self.iou_loss = pytorch_iou.IOU(size_average=True) 36 | 37 | def setup_semantic_model(self): 38 | args = self.opt 39 | args.cfg = "HRNet-Semantic-Segmentation-HRNet-OCR/experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml" 40 | args.opts = [] 41 | update_config(config, args) 42 | 43 | module = eval('models.'+config.MODEL.NAME) 44 | module.BatchNorm2d_class = module.BatchNorm2d = torch.nn.BatchNorm2d 45 | model = eval('models.'+config.MODEL.NAME + 46 | '.get_seg_model')(config) 47 | model.eval() 48 | 49 | return model 50 | 51 | 52 | def forward(self, batch): 53 | comp = batch["comp"] 54 | yuv = batch['yuv'] 55 | self.semantic_model.eval() 56 | with torch.no_grad(): 57 | semantic_feature, semantic_out = self.semantic_model(comp) 58 | out, aux_list, init_score, final_score, feature = self.model(comp, yuv, semantic_feature.detach()) 59 | return out 60 | 61 | def denormalize(self, x): 62 | mean = np.zeros_like(x) 63 | 64 | mean[0,:,:] = .46962251 65 | mean[1,:,:] = .4464104 66 | mean[2,:,:] = .40718787 67 | std = np.zeros_like(x) 68 | 69 | std[0,:,:] = 0.27469736 70 | std[1,:,:] = 0.27012361 71 | std[2,:,:] = 0.28515933 72 | x = (x*std + mean) 73 | return x 74 | 75 | def training_step(self, batch, batch_idx): 76 | comp = batch["comp"] 77 | real = batch["real"] 78 | mask = batch["mask"].type(torch.FloatTensor).to(comp.device) 79 | mask_2 = batch["mask_2"].type(torch.FloatTensor).to(comp.device) 80 | mask_4 = batch["mask_4"].type(torch.FloatTensor).to(comp.device) 81 | mask_8 = batch["mask_8"].type(torch.FloatTensor).to(comp.device) 82 | yuv = batch['yuv'] 83 | self.semantic_model.eval() 84 | with torch.no_grad(): 85 | semantic_feature, semantic_out = self.semantic_model(comp) 86 | out, aux_list, init_score, final_score, feature, transfered_yuv = self.model(comp, yuv, semantic_feature.detach()) 87 | aux1, aux2, aux3 = aux_list[0], aux_list[1], aux_list[2] 88 | 89 | out = torch.clamp(out, 0, 1) 90 | aux1 = torch.clamp(aux1, 0, 1) 91 | aux2 = torch.clamp(aux2, 0, 1) 92 | aux3 = torch.clamp(aux3, 0, 1) 93 | mask = torch.clamp(mask, 0, 1) 94 | mask_2 = torch.clamp(mask_2, 0, 1) 95 | mask_4 = torch.clamp(mask_4, 0, 1) 96 | mask_8 = torch.clamp(mask_8, 0, 1) 97 | init_score = torch.clamp(1-init_score, 0, 1) 98 | final_score = torch.clamp(1-final_score, 0, 1) 99 | 100 | bce_loss = self.bce_loss(out, mask) 101 | ssim_loss = 1 - self.ssim_loss(out, mask) 102 | iou_loss = self.iou_loss(out,mask) 103 | 104 | bce_loss_aux1 = self.bce_loss(aux1, mask_4) 105 | ssim_loss_aux1 = 1 - self.ssim_loss(aux1, mask_4) 106 | iou_loss_aux1 = self.iou_loss(aux1,mask_4) 107 | 108 | bce_loss_aux2 = self.bce_loss(aux2, mask_2) 109 | ssim_loss_aux2 = 1 - self.ssim_loss(aux2, mask_2) 110 | iou_loss_aux2 = self.iou_loss(aux2,mask_2) 111 | 112 | bce_loss_aux3 = self.bce_loss(aux3, mask) 113 | ssim_loss_aux3 = 1 - self.ssim_loss(aux3, mask) 114 | iou_loss_aux3 = self.iou_loss(aux3,mask) 115 | 116 | loss_inner, loss_inter = feature_loss(feature, mask_8) 117 | 118 | loss = bce_loss + ssim_loss + iou_loss 119 | 120 | aux_loss = bce_loss_aux1 + ssim_loss_aux1 + iou_loss_aux1 + bce_loss_aux2 + ssim_loss_aux2 + iou_loss_aux2 + bce_loss_aux3 + ssim_loss_aux3 + iou_loss_aux3 121 | loss += aux_loss/3 122 | loss += max(0, loss_inter - loss_inner + 0.5) 123 | 124 | 125 | self.log('train_loss', loss.item()) 126 | self.log('bce_loss', bce_loss.item()) 127 | self.log('ssim_loss', ssim_loss.item()) 128 | self.log('iou_loss', iou_loss.item()) 129 | self.log('aux_loss', aux_loss.item()) 130 | self.log('loss_inner', loss_inner.item()) 131 | self.log('loss_inter', loss_inter.item()) 132 | 133 | return loss 134 | 135 | def configure_optimizers(self): 136 | optimizer = optim.Adam(self.model.parameters(), lr=self.opt.lr, betas=(0.9,0.999), weight_decay=1e-4) 137 | lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20) 138 | return [optimizer], [lr_scheduler] 139 | 140 | def validation_step(self, batch, batch_idx): 141 | comp = batch["comp"] 142 | real = batch["real"] 143 | mask = batch["mask"].type(torch.FloatTensor).to(comp.device) 144 | yuv = batch['yuv'] 145 | self.semantic_model.eval() 146 | semantic_feature, semantic_out = self.semantic_model(comp) 147 | out, _, _,_,_,_ = self.model(comp, yuv, semantic_feature.detach()) 148 | 149 | out = normPRED(out) 150 | mask = normPRED(mask) 151 | 152 | out = torch.clamp(out, 0, 1) 153 | mask = torch.clamp(mask, 0, 1) 154 | 155 | F1 = FScore(out, mask) 156 | mAP = compute_mAP(out, mask) 157 | 158 | IoU = compute_IoU(out, mask) 159 | 160 | self.log('F1', F1, sync_dist=True) 161 | self.log('mAP', mAP, sync_dist=True) 162 | self.log("IoU", IoU, sync_dist=True) 163 | 164 | if __name__ == '__main__': 165 | opt = ArgsParser() 166 | pl.seed_everything(42) 167 | 168 | # Data 169 | opt.phase = "train" 170 | train_set = iH4Dataset(opt) 171 | opt.no_flip = True 172 | opt.phase = "val" 173 | opt.preprocess = 'resize' 174 | val_set = iH4Dataset(opt) 175 | opt.phase = "train" 176 | 177 | dataloader_train = DataLoader(train_set, batch_size=opt.batch_size, shuffle=True, num_workers=8) 178 | dataloader_val = DataLoader(val_set, batch_size=opt.batch_size, shuffle=False, num_workers=8) 179 | 180 | engine = Engine(opt) 181 | 182 | checkpoint_callback = ModelCheckpoint(save_weights_only=False, mode="max", monitor="mAP", save_top_k=2, save_last=True, 183 | dirpath=opt.logdir, filename="best_{epoch:02d}-{mAP:.3f}") 184 | checkpoint_callback.CHECKPOINT_NAME_LAST = "{epoch}-last" 185 | trainer = pl.Trainer.from_argparse_args(opt, 186 | default_root_dir=opt.logdir, 187 | sync_batchnorm=True, 188 | gpus = opt.gpus , 189 | accelerator='ddp', 190 | profiler='simple', 191 | benchmark=True, 192 | log_every_n_steps=1, 193 | plugins=DDPPlugin(find_unused_parameters=True), 194 | flush_logs_every_n_steps=5, 195 | callbacks=[checkpoint_callback, 196 | ], 197 | check_val_every_n_epoch = 3, 198 | max_epochs = opt.nepochs 199 | ) 200 | 201 | trainer.fit(engine, dataloader_train, dataloader_val) --------------------------------------------------------------------------------