├── .gitignore ├── LICENSE ├── README.md ├── data └── list │ ├── ade20k │ ├── testval.lst │ ├── train.lst │ ├── trainval.lst │ └── val.lst │ ├── cityscapes │ ├── test.lst │ ├── train.lst │ ├── trainval.lst │ └── val.lst │ └── cocostuff │ ├── testval.lst │ ├── train.lst │ ├── trainval.lst │ └── val.lst ├── experiments ├── ade20k │ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml │ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml │ ├── seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml │ ├── seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml │ └── seg_hrnet_w48_520x520_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml ├── cityscapes │ ├── seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml │ ├── seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml │ ├── seg_hrnet_ocr_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml │ ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml │ ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484_paddle.yaml │ ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml │ ├── seg_hrnet_w48_train_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml │ ├── seg_hrnet_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml │ └── seg_hrnet_w48_trainval_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml ├── cocostuff │ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml │ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml │ ├── seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml │ ├── seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml │ └── seg_hrnet_w48_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml ├── lip │ ├── seg_hrnet_ocr_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml │ ├── seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml │ └── seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150_paddle.yaml └── pascal_ctx │ ├── seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml │ ├── seg_hrnet_ocr_w48_cls60_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml │ ├── seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml │ └── seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle.yaml ├── figures ├── OCR.PNG ├── SegmentationTransformerOCR.png ├── SegmentationTransformerOCR1.png ├── SegmentationTransformerOCR2.png └── seg-hrnet.png ├── hubconf.py ├── lib ├── config │ ├── __init__.py │ ├── default.py │ ├── hrnet_config.py │ └── models.py ├── core │ ├── criterion.py │ └── function.py ├── datasets │ ├── __init__.py │ ├── ade20k.py │ ├── base_dataset.py │ ├── cityscapes.py │ ├── cocostuff.py │ ├── lip.py │ └── pascal_ctx.py ├── models │ ├── __init__.py │ ├── bn_helper.py │ ├── hrnet.py │ ├── seg_hrnet.py │ ├── seg_hrnet_ocr.py │ └── sync_bn │ │ ├── LICENSE │ │ ├── __init__.py │ │ └── inplace_abn │ │ ├── __init__.py │ │ ├── bn.py │ │ ├── functions.py │ │ └── src │ │ ├── common.h │ │ ├── inplace_abn.cpp │ │ ├── inplace_abn.h │ │ ├── inplace_abn_cpu.cpp │ │ └── inplace_abn_cuda.cu └── utils │ ├── __init__.py │ ├── distributed.py │ ├── modelsummary.py │ └── utils.py ├── local_log.txt ├── requirements.txt ├── run_dist.sh ├── run_local.sh └── tools ├── _init_paths.py ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | __pycache__/ 3 | *.py[co] 4 | data/ 5 | log/ 6 | output/ 7 | pretrained_models 8 | scripts/ 9 | detail-api/ 10 | data/list 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [2019] [Microsoft] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | ======================================================================================= 24 | 3-clause BSD licenses 25 | ======================================================================================= 26 | 1. syncbn - For details, see lib/models/syncbn/LICENSE 27 | Copyright (c) 2017 mapillary 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # High-resolution networks and Segmentation Transformer for Semantic Segmentation 2 | ## Branches 3 | - This is the implementation for HRNet + OCR. 4 | - The PyTroch 1.1 version ia available [here](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/pytorch-v1.1). 5 | - The PyTroch 0.4.1 version is available [here](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/master). 6 | 7 | ## News 8 | - [2021/05/04] We rephrase the OCR approach as **Segmentation Transformer** [pdf](https://arxiv.org/pdf/1909.11065.pdf). We will provide the updated implementation soon. 9 | - [2021/02/16] Based on the [PaddleClas](https://github.com/PaddlePaddle/PaddleClas) ImageNet pretrained weights, we achieve **83.22%** on Cityscapes val, **59.62%** on PASCAL-Context val (**new SOTA**), **45.20%** on COCO-Stuff val (**new SOTA**), **58.21%** on LIP val and **47.98%** on ADE20K val. Please checkout [openseg.pytorch](https://github.com/openseg-group/openseg.pytorch/tree/pytorch-1.7) for more details. 10 | - [2020/08/16] [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) has supported our HRNet + OCR. 11 | - [2020/07/20] The researchers from AInnovation have achieved **Rank#1** on [ADE20K Leaderboard](http://sceneparsing.csail.mit.edu/) via training our HRNet + OCR with a semi-supervised learning scheme. More details are in their [Technical Report](https://arxiv.org/pdf/2007.10591.pdf). 12 | - [2020/07/09] Our paper is accepted by ECCV 2020: [Object-Contextual Representations for Semantic Segmentation](https://arxiv.org/pdf/1909.11065.pdf). Notably, the reseachers from Nvidia set a new state-of-the-art performance on Cityscapes leaderboard: [85.4%](https://www.cityscapes-dataset.com/method-details/?submissionID=7836) via combining our HRNet + OCR with a new [hierarchical mult-scale attention scheme](https://arxiv.org/abs/2005.10821). 13 | - [2020/03/13] Our paper is accepted by TPAMI: [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/pdf/1908.07919.pdf). 14 | - HRNet + OCR + SegFix: Rank \#1 (84.5) in [Cityscapes leaderboard](https://www.cityscapes-dataset.com/benchmarks/). OCR: object contextual represenations [pdf](https://arxiv.org/pdf/1909.11065.pdf). ***HRNet + OCR is reproduced [here](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/HRNet-OCR)***. 15 | - Thanks Google and UIUC researchers. A modified HRNet combined with semantic and instance multi-scale context achieves SOTA panoptic segmentation result on the Mapillary Vista challenge. See [the paper](https://arxiv.org/pdf/1910.04751.pdf). 16 | - Small HRNet models for Cityscapes segmentation. Superior to MobileNetV2Plus .... 17 | - Rank \#1 (83.7) in [Cityscapes leaderboard](https://www.cityscapes-dataset.com/benchmarks/). HRNet combined with an extension of [object context](https://arxiv.org/pdf/1809.00916.pdf) 18 | 19 | - Pytorch-v1.1 and the official Sync-BN supported. We have reproduced the cityscapes results on the new codebase. Please check the [pytorch-v1.1 branch](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/pytorch-v1.1). 20 | 21 | ## Introduction 22 | This is the official code of [high-resolution representations for Semantic Segmentation](https://arxiv.org/abs/1904.04514). 23 | We augment the HRNet with a very simple segmentation head shown in the figure below. We aggregate the output representations at four different resolutions, and then use a 1x1 convolutions to fuse these representations. The output representations is fed into the classifier. We evaluate our methods on three datasets, Cityscapes, PASCAL-Context and LIP. 24 | 25 | 26 |
27 | 28 | hrnet 29 | 30 |
31 | 32 | Besides, we further combine HRNet with [Object Contextual Representation](https://arxiv.org/pdf/1909.11065.pdf) and achieve higher performance on the three datasets. The code of HRNet+OCR is contained in this branch. We illustrate the overall framework of OCR in the Figure and the equivalent Transformer pipelines: 33 | 34 |
35 | 36 | OCR 37 |
38 | 39 |
40 | 41 | Segmentation Transformer 42 |
43 | 44 | ## Segmentation models 45 | The models are initialized by the weights pretrained on the ImageNet. ''Paddle'' means the results are based on PaddleCls pretrained HRNet models. 46 | You can download the pretrained models from https://github.com/HRNet/HRNet-Image-Classification. *Slightly different, we use align_corners = True for upsampling in HRNet*. 47 | 48 | 1. Performance on the Cityscapes dataset. The models are trained and tested with the input size of 512x1024 and 1024x2048 respectively. 49 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75. 50 | 51 | | model | Train Set | Test Set | OHEM | Multi-scale| Flip | mIoU | Link | 52 | | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | 53 | | HRNetV2-W48 | Train | Val | No | No | No | 80.9 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_cs_8090_torch11.pth)/[BaiduYun(Access Code:pmix)](https://pan.baidu.com/s/1KyiOUOR0SYxKtJfIlD5o-w)| 54 | | HRNetV2-W48 + OCR | Train | Val | No | No | No | 81.6 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cs_8162_torch11.pth)/[BaiduYun(Access Code:fa6i)](https://pan.baidu.com/s/1BGNt4Xmx3yfXUS8yjde0hQ)| 55 | | HRNetV2-W48 + OCR | Train + Val | Test | No | Yes | Yes | 82.3 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cs_trainval_8227_torch11.pth)/[BaiduYun(Access Code:ycrk)](https://pan.baidu.com/s/16mD81UnGzjUBD-haDQfzIQ)| 56 | | HRNetV2-W48 (Paddle) | Train | Val | No | No | No | 81.6 | ---| 57 | | HRNetV2-W48 + OCR (Paddle) | Train | Val | No | No | No | --- | ---| 58 | | HRNetV2-W48 + OCR (Paddle) | Train + Val | Test | No | Yes | Yes | --- | ---| 59 | 60 | 2. Performance on the LIP dataset. The models are trained and tested with the input size of 473x473. 61 | 62 | | model | OHEM | Multi-scale| Flip | mIoU | Link | 63 | | :--: | :--: | :--: | :--: | :--: | :--: | 64 | | HRNetV2-W48 | No | No | Yes | 55.83 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_lip_5583_torch04.pth)/[BaiduYun(Access Code:fahi)](https://pan.baidu.com/s/15DamFiGEoxwDDF1TwuZdnA)| 65 | | HRNetV2-W48 + OCR | No | No | Yes | 56.48 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_lip_5648_torch04.pth)/[BaiduYun(Access Code:xex2)](https://pan.baidu.com/s/1dFYSR2bahRnvpIOdh88kOQ)| 66 | | HRNetV2-W48 (Paddle) | No | No | Yes | --- | --- | 67 | | HRNetV2-W48 + OCR (Paddle) | No | No | Yes | --- | ---| 68 | 69 | 70 | **Note** Currently we could only reproduce HRNet+OCR results on LIP dataset with PyTorch 0.4.1. 71 | 72 | 3. Performance on the PASCAL-Context dataset. The models are trained and tested with the input size of 520x520. 73 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75,2.0 (the same as EncNet, DANet etc.). 74 | 75 | | model |num classes | OHEM | Multi-scale| Flip | mIoU | Link | 76 | | :--: | :--: | :--: | :--: | :--: | :--: | :--: | 77 | | HRNetV2-W48 | 59 classes | No | Yes | Yes | 54.1 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_pascal_ctx_5410_torch11.pth)/[BaiduYun(Access Code:wz6v)](https://pan.baidu.com/s/1m0MqpHSk0SX380EYEMawSA)| 78 | | HRNetV2-W48 + OCR | 59 classes | No | Yes | Yes | 56.2 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_pascal_ctx_5618_torch11.pth)/[BaiduYun(Access Code:yyxh)](https://pan.baidu.com/s/1XYP54gr3XB76tHmCcKdU9g)| 79 | | HRNetV2-W48 | 60 classes | No | Yes | Yes | 48.3 | [OneDrive](https://1drv.ms/u/s!Aus8VCZ_C_33gQEHDQrZCiv4R5mf)/[BaiduYun(Access Code:9uf8)](https://pan.baidu.com/s/1pgYt8P8ht2HOOzcA0F7Kag)| 80 | | HRNetV2-W48 + OCR | 60 classes | No | Yes | Yes | 50.1 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_pascal_ctx_5410_torch11.pth)/[BaiduYun(Access Code:gtkb)](https://pan.baidu.com/s/13AYjwzh1LJSlipJwNpJ3Uw)| 81 | | HRNetV2-W48 (Paddle) | 59 classes | No | Yes | Yes | --- | ---| 82 | | HRNetV2-W48 (Paddle) | 60 classes | No | Yes | Yes | --- | ---| 83 | | HRNetV2-W48 + OCR (Paddle) | 59 classes | No | Yes | Yes | --- | ---| 84 | | HRNetV2-W48 + OCR (Paddle) | 60 classes | No | Yes | Yes | --- | ---| 85 | 86 | 4. Performance on the COCO-Stuff dataset. The models are trained and tested with the input size of 520x520. 87 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75,2.0 (the same as EncNet, DANet etc.). 88 | 89 | | model | OHEM | Multi-scale| Flip | mIoU | Link | 90 | | :--: | :--: | :--: | :--: | :--: | :--: | 91 | | HRNetV2-W48 | Yes | No | No | 36.2 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_cocostuff_3617_torch04.pth)/[BaiduYun(Access Code:92gw)](https://pan.baidu.com/s/1VAV6KThH1Irzv9HZgLWE2Q)| 92 | | HRNetV2-W48 + OCR | Yes | No | No | 39.7 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cocostuff_3965_torch04.pth)/[BaiduYun(Access Code:sjc4)](https://pan.baidu.com/s/1HFSYyVwKBG3E6y76gcPjDA)| 93 | | HRNetV2-W48 | Yes | Yes | Yes | 37.9 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_cocostuff_3617_torch04.pth)/[BaiduYun(Access Code:92gw)](https://pan.baidu.com/s/1VAV6KThH1Irzv9HZgLWE2Q) | 94 | | HRNetV2-W48 + OCR | Yes | Yes | Yes | 40.6 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cocostuff_3965_torch04.pth)/[BaiduYun(Access Code:sjc4)](https://pan.baidu.com/s/1HFSYyVwKBG3E6y76gcPjDA) | 95 | | HRNetV2-W48 (Paddle) | Yes | No | No | --- | ---| 96 | | HRNetV2-W48 + OCR (Paddle) | Yes | No | No | --- | ---| 97 | | HRNetV2-W48 (Paddle) | Yes | Yes | Yes | --- | ---| 98 | | HRNetV2-W48 + OCR (Paddle) | Yes | Yes | Yes | --- | ---| 99 | 100 | 101 | 102 | 5. Performance on the ADE20K dataset. The models are trained and tested with the input size of 520x520. 103 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75,2.0 (the same as EncNet, DANet etc.). 104 | 105 | | model | OHEM | Multi-scale| Flip | mIoU | Link | 106 | | :--: | :--: | :--: | :--: | :--: | :--: | 107 | | HRNetV2-W48 | Yes | No | No | 43.1 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ade20k_4312_torch04.pth)/[BaiduYun(Access Code:f6xf)](https://pan.baidu.com/s/11neVkzxx27qS2-mPFW9dfg)| 108 | | HRNetV2-W48 + OCR | Yes | No | No | 44.5 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_ade20k_4451_torch04.pth)/[BaiduYun(Access Code:peg4)](https://pan.baidu.com/s/1HLhjiLIdgaOHs0SzEtkgkQ)| 109 | | HRNetV2-W48 | Yes | Yes | Yes | 44.2 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ade20k_4312_torch04.pth)/[BaiduYun(Access Code:f6xf)](https://pan.baidu.com/s/11neVkzxx27qS2-mPFW9dfg) | 110 | | HRNetV2-W48 + OCR | Yes | Yes | Yes | 45.5 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_ade20k_4451_torch04.pth)/[BaiduYun(Access Code:peg4)](https://pan.baidu.com/s/1HLhjiLIdgaOHs0SzEtkgkQ) | 111 | | HRNetV2-W48 (Paddle) | Yes | No | No | --- | ---| 112 | | HRNetV2-W48 + OCR (Paddle) | Yes | No | No | --- | ---| 113 | | HRNetV2-W48 (Paddle) | Yes | Yes | Yes | --- | ---| 114 | | HRNetV2-W48 + OCR (Paddle) | Yes | Yes | Yes | --- | ---| 115 | 116 | 117 | 118 | ## Quick start 119 | ### Install 120 | 1. For LIP dataset, install PyTorch=0.4.1 following the [official instructions](https://pytorch.org/). For Cityscapes and PASCAL-Context, we use PyTorch=1.1.0. 121 | 2. `git clone https://github.com/HRNet/HRNet-Semantic-Segmentation $SEG_ROOT` 122 | 3. Install dependencies: pip install -r requirements.txt 123 | 124 | If you want to train and evaluate our models on PASCAL-Context, you need to install [details](https://github.com/zhanghang1989/detail-api). 125 | ````bash 126 | pip install git+https://github.com/zhanghang1989/detail-api.git#subdirectory=PythonAPI 127 | ```` 128 | 129 | ### Data preparation 130 | You need to download the [Cityscapes](https://www.cityscapes-dataset.com/), [LIP](http://sysu-hcp.net/lip/) and [PASCAL-Context](https://cs.stanford.edu/~roozbeh/pascal-context/) datasets. 131 | 132 | Your directory tree should be look like this: 133 | ````bash 134 | $SEG_ROOT/data 135 | ├── cityscapes 136 | │   ├── gtFine 137 | │   │   ├── test 138 | │   │   ├── train 139 | │   │   └── val 140 | │   └── leftImg8bit 141 | │   ├── test 142 | │      ├── train 143 | │   └── val 144 | ├── lip 145 | │   ├── TrainVal_images 146 | │   │   ├── train_images 147 | │   │   └── val_images 148 | │   └── TrainVal_parsing_annotations 149 | │   ├── train_segmentations 150 | │   ├── train_segmentations_reversed 151 | │   └── val_segmentations 152 | ├── pascal_ctx 153 | │   ├── common 154 | │   ├── PythonAPI 155 | │   ├── res 156 | │   └── VOCdevkit 157 | │   └── VOC2010 158 | ├── cocostuff 159 | │   ├── train 160 | │   │   ├── image 161 | │   │   └── label 162 | │   └── val 163 | │   ├── image 164 | │   └── label 165 | ├── ade20k 166 | │   ├── train 167 | │   │   ├── image 168 | │   │   └── label 169 | │   └── val 170 | │   ├── image 171 | │   └── label 172 | ├── list 173 | │   ├── cityscapes 174 | │   │   ├── test.lst 175 | │   │   ├── trainval.lst 176 | │   │   └── val.lst 177 | │   ├── lip 178 | │   │   ├── testvalList.txt 179 | │   │   ├── trainList.txt 180 | │   │   └── valList.txt 181 | ```` 182 | 183 | ### Train and Test 184 | 185 | #### PyTorch Version Differences 186 | 187 | Note that the codebase supports both PyTorch 0.4.1 and 1.1.0, and they use different command for training. In the following context, we use `$PY_CMD` to denote different startup command. 188 | 189 | ```bash 190 | # For PyTorch 0.4.1 191 | PY_CMD="python" 192 | # For PyTorch 1.1.0 193 | PY_CMD="python -m torch.distributed.launch --nproc_per_node=4" 194 | ``` 195 | 196 | e.g., when training on Cityscapes, we use PyTorch 1.1.0. So the command 197 | ````bash 198 | $PY_CMD tools/train.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml 199 | ```` 200 | indicates 201 | ````bash 202 | python -m torch.distributed.launch --nproc_per_node=4 tools/train.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml 203 | ```` 204 | #### Training 205 | 206 | Just specify the configuration file for `tools/train.py`. 207 | 208 | For example, train the HRNet-W48 on Cityscapes with a batch size of 12 on 4 GPUs: 209 | ````bash 210 | $PY_CMD tools/train.py --cfg experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml 211 | ```` 212 | For example, train the HRNet-W48 + OCR on Cityscapes with a batch size of 12 on 4 GPUs: 213 | ````bash 214 | $PY_CMD tools/train.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml 215 | ```` 216 | 217 | Note that we only reproduce HRNet+OCR on LIP dataset using PyTorch 0.4.1. So we recommend to use PyTorch 0.4.1 if you want to train on LIP dataset. 218 | 219 | #### Testing 220 | 221 | For example, evaluating HRNet+OCR on the Cityscapes validation set with multi-scale and flip testing: 222 | ````bash 223 | python tools/test.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml \ 224 | TEST.MODEL_FILE hrnet_ocr_cs_8162_torch11.pth \ 225 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75 \ 226 | TEST.FLIP_TEST True 227 | ```` 228 | Evaluating HRNet+OCR on the Cityscapes test set with multi-scale and flip testing: 229 | ````bash 230 | python tools/test.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml \ 231 | DATASET.TEST_SET list/cityscapes/test.lst \ 232 | TEST.MODEL_FILE hrnet_ocr_trainval_cs_8227_torch11.pth \ 233 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75 \ 234 | TEST.FLIP_TEST True 235 | ```` 236 | Evaluating HRNet+OCR on the PASCAL-Context validation set with multi-scale and flip testing: 237 | ````bash 238 | python tools/test.py --cfg experiments/pascal_ctx/seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml \ 239 | DATASET.TEST_SET testval \ 240 | TEST.MODEL_FILE hrnet_ocr_pascal_ctx_5618_torch11.pth \ 241 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75,2.0 \ 242 | TEST.FLIP_TEST True 243 | ```` 244 | Evaluating HRNet+OCR on the LIP validation set with flip testing: 245 | ````bash 246 | python tools/test.py --cfg experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml \ 247 | DATASET.TEST_SET list/lip/testvalList.txt \ 248 | TEST.MODEL_FILE hrnet_ocr_lip_5648_torch04.pth \ 249 | TEST.FLIP_TEST True \ 250 | TEST.NUM_SAMPLES 0 251 | ```` 252 | Evaluating HRNet+OCR on the COCO-Stuff validation set with multi-scale and flip testing: 253 | ````bash 254 | python tools/test.py --cfg experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml \ 255 | DATASET.TEST_SET list/cocostuff/testval.lst \ 256 | TEST.MODEL_FILE hrnet_ocr_cocostuff_3965_torch04.pth \ 257 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75,2.0 \ 258 | TEST.MULTI_SCALE True TEST.FLIP_TEST True 259 | ```` 260 | Evaluating HRNet+OCR on the ADE20K validation set with multi-scale and flip testing: 261 | ````bash 262 | python tools/test.py --cfg experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml \ 263 | DATASET.TEST_SET list/ade20k/testval.lst \ 264 | TEST.MODEL_FILE hrnet_ocr_ade20k_4451_torch04.pth \ 265 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75,2.0 \ 266 | TEST.MULTI_SCALE True TEST.FLIP_TEST True 267 | ```` 268 | 269 | ## Other applications of HRNet 270 | * [Human pose estimation](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch) 271 | * [Image Classification](https://github.com/HRNet/HRNet-Image-Classification) 272 | * [Object detection](https://github.com/HRNet/HRNet-Object-Detection) 273 | * [Facial landmark detection](https://github.com/HRNet/HRNet-Facial-Landmark-Detection) 274 | 275 | ## Citation 276 | If you find this work or code is helpful in your research, please cite: 277 | ```` 278 | @inproceedings{SunXLW19, 279 | title={Deep High-Resolution Representation Learning for Human Pose Estimation}, 280 | author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, 281 | booktitle={CVPR}, 282 | year={2019} 283 | } 284 | 285 | @article{WangSCJDZLMTWLX19, 286 | title={Deep High-Resolution Representation Learning for Visual Recognition}, 287 | author={Jingdong Wang and Ke Sun and Tianheng Cheng and 288 | Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and 289 | Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao}, 290 | journal={TPAMI}, 291 | year={2019} 292 | } 293 | 294 | @article{YuanCW19, 295 | title={Object-Contextual Representations for Semantic Segmentation}, 296 | author={Yuhui Yuan and Xilin Chen and Jingdong Wang}, 297 | booktitle={ECCV}, 298 | year={2020} 299 | } 300 | ```` 301 | 302 | ## Reference 303 | [1] Deep High-Resolution Representation Learning for Visual Recognition. Jingdong Wang, Ke Sun, Tianheng Cheng, 304 | Borui Jiang, Chaorui Deng, Yang Zhao, Dong Liu, Yadong Mu, Mingkui Tan, Xinggang Wang, Wenyu Liu, Bin Xiao. Accepted by TPAMI. [download](https://arxiv.org/pdf/1908.07919.pdf) 305 | 306 | [2] Object-Contextual Representations for Semantic Segmentation. Yuhui Yuan, Xilin Chen, Jingdong Wang. [download](https://arxiv.org/pdf/1909.11065.pdf) 307 | 308 | ## Acknowledgement 309 | We adopt sync-bn implemented by [InplaceABN](https://github.com/mapillary/inplace_abn) for PyTorch 0.4.1 experiments and the official 310 | sync-bn provided by PyTorch for PyTorch 1.10 experiments. 311 | 312 | We adopt data precosessing on the PASCAL-Context dataset, implemented by [PASCAL API](https://github.com/zhanghang1989/detail-api). 313 | -------------------------------------------------------------------------------- /experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: 'data/' 14 | TEST_SET: 'list/ade20k/val.lst' 15 | TRAIN_SET: 'list/ade20k/train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 120 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.02 88 | WD: 0.0001 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 520 99 | - 520 100 | BASE_SIZE: 520 101 | BATCH_SIZE_PER_GPU: 1 102 | NUM_SAMPLES: 200 103 | FLIP_TEST: false 104 | MULTI_SCALE: false 105 | -------------------------------------------------------------------------------- /experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: '../../../../dataset/ade20k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 2 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 120 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.02 88 | WD: 0.0001 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 520 99 | - 520 100 | BASE_SIZE: 520 101 | BATCH_SIZE_PER_GPU: 1 102 | NUM_SAMPLES: 200 103 | FLIP_TEST: false 104 | MULTI_SCALE: false 105 | -------------------------------------------------------------------------------- /experiments/ade20k/seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: 'data/' 14 | TEST_SET: 'list/ade20k/val.lst' 15 | TRAIN_SET: 'list/ade20k/train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 4 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 120 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.02 87 | WD: 0.0001 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 520 98 | - 520 99 | BASE_SIZE: 520 100 | BATCH_SIZE_PER_GPU: 1 101 | NUM_SAMPLES: 200 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /experiments/ade20k/seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: '../../../../dataset/ade20k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 2 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 120 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.02 87 | WD: 0.0001 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 520 98 | - 520 99 | BASE_SIZE: 520 100 | BATCH_SIZE_PER_GPU: 1 101 | NUM_SAMPLES: 200 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /experiments/ade20k/seg_hrnet_w48_520x520_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: ade20k 13 | ROOT: 'data/' 14 | TEST_SET: 'list/ade20k/val.lst' 15 | TRAIN_SET: 'list/ade20k/train.lst' 16 | NUM_CLASSES: 150 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 4 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 120 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.02 87 | WD: 0.0001 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 520 98 | - 520 99 | BASE_SIZE: 520 100 | BATCH_SIZE_PER_GPU: 1 101 | NUM_SAMPLES: 200 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: data/ 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: "pretrained_models/hrnetv2_w48_imagenet_pretrained.pth" 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 3 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.01 88 | WD: 0.0005 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 2048 99 | - 1024 100 | BASE_SIZE: 2048 101 | BATCH_SIZE_PER_GPU: 4 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: '../../../../dataset/original_cityscapes/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 2 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.01 88 | WD: 0.0005 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 2048 99 | - 1024 100 | BASE_SIZE: 2048 101 | BATCH_SIZE_PER_GPU: 2 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /experiments/cityscapes/seg_hrnet_ocr_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: data/ 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/trainval.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: "pretrained_models/hrnetv2_w48_imagenet_pretrained.pth" 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 3 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.01 88 | WD: 0.0005 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 16 96 | TEST: 97 | IMAGE_SIZE: 98 | - 2048 99 | - 1024 100 | BASE_SIZE: 2048 101 | BATCH_SIZE_PER_GPU: 4 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: '../../../../dataset/original_cityscapes/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: '../../../../dataset/pretrained_models/hrnetv2_w48_imagenet_pretrained_top1_21.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 1024 78 | - 512 79 | BASE_SIZE: 2048 80 | BATCH_SIZE_PER_GPU: 3 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 484 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.01 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 16 95 | TEST: 96 | IMAGE_SIZE: 97 | - 2048 98 | - 1024 99 | BASE_SIZE: 2048 100 | BATCH_SIZE_PER_GPU: 4 101 | FLIP_TEST: false 102 | MULTI_SCALE: false 103 | -------------------------------------------------------------------------------- /experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: '../../../../dataset/original_cityscapes/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 1024 78 | - 512 79 | BASE_SIZE: 2048 80 | BATCH_SIZE_PER_GPU: 3 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 484 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.01 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 16 95 | TEST: 96 | IMAGE_SIZE: 97 | - 2048 98 | - 1024 99 | BASE_SIZE: 2048 100 | BATCH_SIZE_PER_GPU: 4 101 | FLIP_TEST: false 102 | MULTI_SCALE: false 103 | -------------------------------------------------------------------------------- /experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: '../../../../dataset/original_cityscapes/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 1024 78 | - 512 79 | BASE_SIZE: 2048 80 | BATCH_SIZE_PER_GPU: 2 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 484 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.01 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 16 95 | TEST: 96 | IMAGE_SIZE: 97 | - 2048 98 | - 1024 99 | BASE_SIZE: 2048 100 | BATCH_SIZE_PER_GPU: 4 101 | FLIP_TEST: false 102 | MULTI_SCALE: false -------------------------------------------------------------------------------- /experiments/cityscapes/seg_hrnet_w48_train_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/train.lst' 16 | NUM_CLASSES: 19 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 1024 78 | - 512 79 | BASE_SIZE: 2048 80 | BATCH_SIZE_PER_GPU: 3 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 484 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.01 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 16 95 | TEST: 96 | IMAGE_SIZE: 97 | - 2048 98 | - 1024 99 | BASE_SIZE: 2048 100 | BATCH_SIZE_PER_GPU: 4 101 | FLIP_TEST: false 102 | MULTI_SCALE: false 103 | -------------------------------------------------------------------------------- /experiments/cityscapes/seg_hrnet_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/train.lst' 16 | EXTRA_TRAIN_SET: 'list/cityscapes/trainval.lst' 17 | NUM_CLASSES: 19 18 | MODEL: 19 | NAME: seg_hrnet 20 | ALIGN_CORNERS: False 21 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 22 | EXTRA: 23 | FINAL_CONV_KERNEL: 1 24 | STAGE1: 25 | NUM_MODULES: 1 26 | NUM_RANCHES: 1 27 | BLOCK: BOTTLENECK 28 | NUM_BLOCKS: 29 | - 4 30 | NUM_CHANNELS: 31 | - 64 32 | FUSE_METHOD: SUM 33 | STAGE2: 34 | NUM_MODULES: 1 35 | NUM_BRANCHES: 2 36 | BLOCK: BASIC 37 | NUM_BLOCKS: 38 | - 4 39 | - 4 40 | NUM_CHANNELS: 41 | - 48 42 | - 96 43 | FUSE_METHOD: SUM 44 | STAGE3: 45 | NUM_MODULES: 4 46 | NUM_BRANCHES: 3 47 | BLOCK: BASIC 48 | NUM_BLOCKS: 49 | - 4 50 | - 4 51 | - 4 52 | NUM_CHANNELS: 53 | - 48 54 | - 96 55 | - 192 56 | FUSE_METHOD: SUM 57 | STAGE4: 58 | NUM_MODULES: 3 59 | NUM_BRANCHES: 4 60 | BLOCK: BASIC 61 | NUM_BLOCKS: 62 | - 4 63 | - 4 64 | - 4 65 | - 4 66 | NUM_CHANNELS: 67 | - 48 68 | - 96 69 | - 192 70 | - 384 71 | FUSE_METHOD: SUM 72 | LOSS: 73 | USE_OHEM: false 74 | OHEMTHRES: 0.9 75 | OHEMKEEP: 131072 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 3 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | EXTRA_EPOCH: 484 86 | RESUME: true 87 | OPTIMIZER: sgd 88 | LR: 0.01 89 | EXTRA_LR: 0.001 90 | WD: 0.0005 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: 255 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 2048 101 | - 1024 102 | BASE_SIZE: 2048 103 | BATCH_SIZE_PER_GPU: 4 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /experiments/cityscapes/seg_hrnet_w48_trainval_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: cityscapes 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cityscapes/val.lst' 15 | TRAIN_SET: 'list/cityscapes/train.lst' 16 | EXTRA_TRAIN_SET: 'list/cityscapes/trainval.lst' 17 | NUM_CLASSES: 19 18 | MODEL: 19 | NAME: seg_hrnet 20 | ALIGN_CORNERS: False 21 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 22 | EXTRA: 23 | FINAL_CONV_KERNEL: 1 24 | STAGE1: 25 | NUM_MODULES: 1 26 | NUM_RANCHES: 1 27 | BLOCK: BOTTLENECK 28 | NUM_BLOCKS: 29 | - 4 30 | NUM_CHANNELS: 31 | - 64 32 | FUSE_METHOD: SUM 33 | STAGE2: 34 | NUM_MODULES: 1 35 | NUM_BRANCHES: 2 36 | BLOCK: BASIC 37 | NUM_BLOCKS: 38 | - 4 39 | - 4 40 | NUM_CHANNELS: 41 | - 48 42 | - 96 43 | FUSE_METHOD: SUM 44 | STAGE3: 45 | NUM_MODULES: 4 46 | NUM_BRANCHES: 3 47 | BLOCK: BASIC 48 | NUM_BLOCKS: 49 | - 4 50 | - 4 51 | - 4 52 | NUM_CHANNELS: 53 | - 48 54 | - 96 55 | - 192 56 | FUSE_METHOD: SUM 57 | STAGE4: 58 | NUM_MODULES: 3 59 | NUM_BRANCHES: 4 60 | BLOCK: BASIC 61 | NUM_BLOCKS: 62 | - 4 63 | - 4 64 | - 4 65 | - 4 66 | NUM_CHANNELS: 67 | - 48 68 | - 96 69 | - 192 70 | - 384 71 | FUSE_METHOD: SUM 72 | LOSS: 73 | USE_OHEM: true 74 | OHEMTHRES: 0.9 75 | OHEMKEEP: 131072 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 1024 79 | - 512 80 | BASE_SIZE: 2048 81 | BATCH_SIZE_PER_GPU: 3 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 484 85 | EXTRA_EPOCH: 484 86 | RESUME: true 87 | OPTIMIZER: sgd 88 | LR: 0.01 89 | EXTRA_LR: 0.001 90 | WD: 0.0005 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: 255 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 2048 101 | - 1024 102 | BASE_SIZE: 2048 103 | BATCH_SIZE_PER_GPU: 4 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cocostuff/val.lst' 15 | TRAIN_SET: 'list/cocostuff/train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 110 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.001 88 | WD: 0.0001 89 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr'] 90 | NONBACKBONE_MULT: 10 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: 255 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 1 104 | NUM_SAMPLES: 200 105 | FLIP_TEST: false 106 | MULTI_SCALE: false 107 | -------------------------------------------------------------------------------- /experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: '../../../../dataset/coco_stuff_10k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 2 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 110 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.001 88 | WD: 0.0001 89 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr'] 90 | NONBACKBONE_MULT: 10 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: 255 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 1 104 | NUM_SAMPLES: 200 105 | FLIP_TEST: false 106 | MULTI_SCALE: false 107 | -------------------------------------------------------------------------------- /experiments/cocostuff/seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: '../../../../dataset/coco_stuff_10k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 4 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 110 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.001 87 | WD: 0.0001 88 | NONBACKBONE_KEYWORDS: ['last_layer'] 89 | NONBACKBONE_MULT: 10 90 | MOMENTUM: 0.9 91 | NESTEROV: false 92 | FLIP: true 93 | MULTI_SCALE: true 94 | DOWNSAMPLERATE: 1 95 | IGNORE_LABEL: 255 96 | SCALE_FACTOR: 16 97 | TEST: 98 | IMAGE_SIZE: 99 | - 520 100 | - 520 101 | BASE_SIZE: 520 102 | BATCH_SIZE_PER_GPU: 1 103 | NUM_SAMPLES: 200 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /experiments/cocostuff/seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3,4,5,6,7) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: '../../../../dataset/coco_stuff_10k/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: true 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 2 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 110 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.001 87 | WD: 0.0001 88 | NONBACKBONE_KEYWORDS: ['last_layer'] 89 | NONBACKBONE_MULT: 10 90 | MOMENTUM: 0.9 91 | NESTEROV: false 92 | FLIP: true 93 | MULTI_SCALE: true 94 | DOWNSAMPLERATE: 1 95 | IGNORE_LABEL: 255 96 | SCALE_FACTOR: 16 97 | TEST: 98 | IMAGE_SIZE: 99 | - 520 100 | - 520 101 | BASE_SIZE: 520 102 | BATCH_SIZE_PER_GPU: 1 103 | NUM_SAMPLES: 200 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /experiments/cocostuff/seg_hrnet_w48_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: cocostuff 13 | ROOT: 'data/' 14 | TEST_SET: 'list/cocostuff/val.lst' 15 | TRAIN_SET: 'list/cocostuff/train.lst' 16 | NUM_CLASSES: 171 17 | MODEL: 18 | NAME: seg_hrnet 19 | NUM_OUTPUTS: 1 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 520 78 | - 520 79 | BASE_SIZE: 520 80 | BATCH_SIZE_PER_GPU: 4 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 110 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.001 87 | WD: 0.0001 88 | NONBACKBONE_KEYWORDS: ['last_layer'] 89 | NONBACKBONE_MULT: 10 90 | MOMENTUM: 0.9 91 | NESTEROV: false 92 | FLIP: true 93 | MULTI_SCALE: true 94 | DOWNSAMPLERATE: 1 95 | IGNORE_LABEL: 255 96 | SCALE_FACTOR: 16 97 | TEST: 98 | IMAGE_SIZE: 99 | - 520 100 | - 520 101 | BASE_SIZE: 520 102 | BATCH_SIZE_PER_GPU: 1 103 | NUM_SAMPLES: 200 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /experiments/lip/seg_hrnet_ocr_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: lip 13 | ROOT: 'data/' 14 | TEST_SET: 'list/lip/valList.txt' 15 | TRAIN_SET: 'list/lip/trainList.txt' 16 | NUM_CLASSES: 20 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained_2.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 473 79 | - 473 80 | BASE_SIZE: 473 81 | BATCH_SIZE_PER_GPU: 10 82 | SHUFFLE: true 83 | BEGIN_EPOCH: 0 84 | END_EPOCH: 150 85 | RESUME: true 86 | OPTIMIZER: sgd 87 | LR: 0.007 88 | WD: 0.0005 89 | MOMENTUM: 0.9 90 | NESTEROV: false 91 | FLIP: true 92 | MULTI_SCALE: true 93 | DOWNSAMPLERATE: 1 94 | IGNORE_LABEL: 255 95 | SCALE_FACTOR: 11 96 | TEST: 97 | IMAGE_SIZE: 98 | - 473 99 | - 473 100 | BASE_SIZE: 473 101 | BATCH_SIZE_PER_GPU: 10 102 | NUM_SAMPLES: 2000 103 | FLIP_TEST: false 104 | MULTI_SCALE: false 105 | -------------------------------------------------------------------------------- /experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 100 10 | 11 | DATASET: 12 | DATASET: lip 13 | ROOT: 'data/' 14 | TEST_SET: 'list/lip/valList.txt' 15 | TRAIN_SET: 'list/lip/trainList.txt' 16 | NUM_CLASSES: 20 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 473 78 | - 473 79 | BASE_SIZE: 473 80 | BATCH_SIZE_PER_GPU: 10 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 150 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.007 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 473 98 | - 473 99 | BASE_SIZE: 473 100 | BATCH_SIZE_PER_GPU: 16 101 | NUM_SAMPLES: 2000 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 8 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: lip 13 | ROOT: '../../../../dataset/lip/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 20 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | TRAIN: 76 | IMAGE_SIZE: 77 | - 473 78 | - 473 79 | BASE_SIZE: 473 80 | BATCH_SIZE_PER_GPU: 10 81 | SHUFFLE: true 82 | BEGIN_EPOCH: 0 83 | END_EPOCH: 150 84 | RESUME: true 85 | OPTIMIZER: sgd 86 | LR: 0.007 87 | WD: 0.0005 88 | MOMENTUM: 0.9 89 | NESTEROV: false 90 | FLIP: true 91 | MULTI_SCALE: true 92 | DOWNSAMPLERATE: 1 93 | IGNORE_LABEL: 255 94 | SCALE_FACTOR: 11 95 | TEST: 96 | IMAGE_SIZE: 97 | - 473 98 | - 473 99 | BASE_SIZE: 473 100 | BATCH_SIZE_PER_GPU: 8 101 | NUM_SAMPLES: 2000 102 | FLIP_TEST: false 103 | MULTI_SCALE: false 104 | -------------------------------------------------------------------------------- /experiments/pascal_ctx/seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: pascal_ctx 13 | ROOT: 'data/' 14 | TEST_SET: 'val' 15 | TRAIN_SET: 'train' 16 | NUM_CLASSES: 59 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr'] 83 | NONBACKBONE_MULT: 10 84 | SHUFFLE: true 85 | BEGIN_EPOCH: 0 86 | END_EPOCH: 200 87 | RESUME: true 88 | OPTIMIZER: sgd 89 | LR: 0.001 90 | WD: 0.0001 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: -1 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 16 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /experiments/pascal_ctx/seg_hrnet_ocr_w48_cls60_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: pascal_ctx 13 | ROOT: 'data/' 14 | TEST_SET: 'val' 15 | TRAIN_SET: 'train' 16 | NUM_CLASSES: 60 17 | MODEL: 18 | NAME: seg_hrnet_ocr 19 | NUM_OUTPUTS: 2 20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 21 | EXTRA: 22 | FINAL_CONV_KERNEL: 1 23 | STAGE1: 24 | NUM_MODULES: 1 25 | NUM_RANCHES: 1 26 | BLOCK: BOTTLENECK 27 | NUM_BLOCKS: 28 | - 4 29 | NUM_CHANNELS: 30 | - 64 31 | FUSE_METHOD: SUM 32 | STAGE2: 33 | NUM_MODULES: 1 34 | NUM_BRANCHES: 2 35 | BLOCK: BASIC 36 | NUM_BLOCKS: 37 | - 4 38 | - 4 39 | NUM_CHANNELS: 40 | - 48 41 | - 96 42 | FUSE_METHOD: SUM 43 | STAGE3: 44 | NUM_MODULES: 4 45 | NUM_BRANCHES: 3 46 | BLOCK: BASIC 47 | NUM_BLOCKS: 48 | - 4 49 | - 4 50 | - 4 51 | NUM_CHANNELS: 52 | - 48 53 | - 96 54 | - 192 55 | FUSE_METHOD: SUM 56 | STAGE4: 57 | NUM_MODULES: 3 58 | NUM_BRANCHES: 4 59 | BLOCK: BASIC 60 | NUM_BLOCKS: 61 | - 4 62 | - 4 63 | - 4 64 | - 4 65 | NUM_CHANNELS: 66 | - 48 67 | - 96 68 | - 192 69 | - 384 70 | FUSE_METHOD: SUM 71 | LOSS: 72 | USE_OHEM: false 73 | OHEMTHRES: 0.9 74 | OHEMKEEP: 131072 75 | BALANCE_WEIGHTS: [0.4, 1] 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr'] 83 | NONBACKBONE_MULT: 10 84 | SHUFFLE: true 85 | BEGIN_EPOCH: 0 86 | END_EPOCH: 200 87 | RESUME: true 88 | OPTIMIZER: sgd 89 | LR: 0.001 90 | WD: 0.0001 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: -1 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 16 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /experiments/pascal_ctx/seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: pascal_ctx 13 | ROOT: 'data/' 14 | TEST_SET: 'val' 15 | TRAIN_SET: 'train' 16 | NUM_CLASSES: 59 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | NUM_OUTPUTS: 1 21 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth' 22 | EXTRA: 23 | FINAL_CONV_KERNEL: 1 24 | STAGE1: 25 | NUM_MODULES: 1 26 | NUM_RANCHES: 1 27 | BLOCK: BOTTLENECK 28 | NUM_BLOCKS: 29 | - 4 30 | NUM_CHANNELS: 31 | - 64 32 | FUSE_METHOD: SUM 33 | STAGE2: 34 | NUM_MODULES: 1 35 | NUM_BRANCHES: 2 36 | BLOCK: BASIC 37 | NUM_BLOCKS: 38 | - 4 39 | - 4 40 | NUM_CHANNELS: 41 | - 48 42 | - 96 43 | FUSE_METHOD: SUM 44 | STAGE3: 45 | NUM_MODULES: 4 46 | NUM_BRANCHES: 3 47 | BLOCK: BASIC 48 | NUM_BLOCKS: 49 | - 4 50 | - 4 51 | - 4 52 | NUM_CHANNELS: 53 | - 48 54 | - 96 55 | - 192 56 | FUSE_METHOD: SUM 57 | STAGE4: 58 | NUM_MODULES: 3 59 | NUM_BRANCHES: 4 60 | BLOCK: BASIC 61 | NUM_BLOCKS: 62 | - 4 63 | - 4 64 | - 4 65 | - 4 66 | NUM_CHANNELS: 67 | - 48 68 | - 96 69 | - 192 70 | - 384 71 | FUSE_METHOD: SUM 72 | LOSS: 73 | USE_OHEM: false 74 | OHEMTHRES: 0.9 75 | OHEMKEEP: 131072 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | NONBACKBONE_KEYWORDS: ['last_layer'] 83 | NONBACKBONE_MULT: 10 84 | SHUFFLE: true 85 | BEGIN_EPOCH: 0 86 | END_EPOCH: 200 87 | RESUME: true 88 | OPTIMIZER: sgd 89 | LR: 0.001 90 | WD: 0.0001 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: -1 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 16 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /experiments/pascal_ctx/seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle.yaml: -------------------------------------------------------------------------------- 1 | CUDNN: 2 | BENCHMARK: true 3 | DETERMINISTIC: false 4 | ENABLED: true 5 | GPUS: (0,1,2,3) 6 | OUTPUT_DIR: 'output' 7 | LOG_DIR: 'log' 8 | WORKERS: 4 9 | PRINT_FREQ: 10 10 | 11 | DATASET: 12 | DATASET: pascal_ctx 13 | ROOT: '../../../../dataset/pascal_context/' 14 | TEST_SET: 'val.lst' 15 | TRAIN_SET: 'train.lst' 16 | NUM_CLASSES: 59 17 | MODEL: 18 | NAME: seg_hrnet 19 | ALIGN_CORNERS: False 20 | NUM_OUTPUTS: 1 21 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth' 22 | EXTRA: 23 | FINAL_CONV_KERNEL: 1 24 | STAGE1: 25 | NUM_MODULES: 1 26 | NUM_RANCHES: 1 27 | BLOCK: BOTTLENECK 28 | NUM_BLOCKS: 29 | - 4 30 | NUM_CHANNELS: 31 | - 64 32 | FUSE_METHOD: SUM 33 | STAGE2: 34 | NUM_MODULES: 1 35 | NUM_BRANCHES: 2 36 | BLOCK: BASIC 37 | NUM_BLOCKS: 38 | - 4 39 | - 4 40 | NUM_CHANNELS: 41 | - 48 42 | - 96 43 | FUSE_METHOD: SUM 44 | STAGE3: 45 | NUM_MODULES: 4 46 | NUM_BRANCHES: 3 47 | BLOCK: BASIC 48 | NUM_BLOCKS: 49 | - 4 50 | - 4 51 | - 4 52 | NUM_CHANNELS: 53 | - 48 54 | - 96 55 | - 192 56 | FUSE_METHOD: SUM 57 | STAGE4: 58 | NUM_MODULES: 3 59 | NUM_BRANCHES: 4 60 | BLOCK: BASIC 61 | NUM_BLOCKS: 62 | - 4 63 | - 4 64 | - 4 65 | - 4 66 | NUM_CHANNELS: 67 | - 48 68 | - 96 69 | - 192 70 | - 384 71 | FUSE_METHOD: SUM 72 | LOSS: 73 | USE_OHEM: false 74 | OHEMTHRES: 0.9 75 | OHEMKEEP: 131072 76 | TRAIN: 77 | IMAGE_SIZE: 78 | - 520 79 | - 520 80 | BASE_SIZE: 520 81 | BATCH_SIZE_PER_GPU: 4 82 | NONBACKBONE_KEYWORDS: ['last_layer'] 83 | NONBACKBONE_MULT: 10 84 | SHUFFLE: true 85 | BEGIN_EPOCH: 0 86 | END_EPOCH: 200 87 | RESUME: true 88 | OPTIMIZER: sgd 89 | LR: 0.001 90 | WD: 0.0001 91 | MOMENTUM: 0.9 92 | NESTEROV: false 93 | FLIP: true 94 | MULTI_SCALE: true 95 | DOWNSAMPLERATE: 1 96 | IGNORE_LABEL: -1 97 | SCALE_FACTOR: 16 98 | TEST: 99 | IMAGE_SIZE: 100 | - 520 101 | - 520 102 | BASE_SIZE: 520 103 | BATCH_SIZE_PER_GPU: 16 104 | FLIP_TEST: false 105 | MULTI_SCALE: false 106 | -------------------------------------------------------------------------------- /figures/OCR.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/OCR.PNG -------------------------------------------------------------------------------- /figures/SegmentationTransformerOCR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/SegmentationTransformerOCR.png -------------------------------------------------------------------------------- /figures/SegmentationTransformerOCR1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/SegmentationTransformerOCR1.png -------------------------------------------------------------------------------- /figures/SegmentationTransformerOCR2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/SegmentationTransformerOCR2.png -------------------------------------------------------------------------------- /figures/seg-hrnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/seg-hrnet.png -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | """File for accessing HRNet via PyTorch Hub https://pytorch.org/hub/ 2 | 3 | Usage: 4 | import torch 5 | model = torch.hub.load('AlexeyAB/PyTorch_YOLOv4:u5_preview', 'yolov4_pacsp_s', pretrained=True, channels=3, classes=80) 6 | """ 7 | 8 | dependencies = ['torch'] 9 | import torch 10 | from lib.models.seg_hrnet import get_seg_model 11 | 12 | 13 | state_dict_url = 'https://github.com/huawei-noah/ghostnet/raw/master/pytorch/models/state_dict_93.98.pth' 14 | 15 | 16 | def hrnet_w48_cityscapes(pretrained=False, **kwargs): 17 | """ # This docstring shows up in hub.help() 18 | HRNetW48 model pretrained on Cityscapes 19 | pretrained (bool): kwargs, load pretrained weights into the model 20 | """ 21 | model = ghostnet(num_classes=1000, width=1.0, dropout=0.2) 22 | if pretrained: 23 | state_dict = torch.hub.load_state_dict_from_url(state_dict_url, progress=True) 24 | model.load_state_dict(state_dict) 25 | return model -------------------------------------------------------------------------------- /lib/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | from .default import _C as config 11 | from .default import update_config 12 | from .models import MODEL_EXTRAS 13 | -------------------------------------------------------------------------------- /lib/config/default.py: -------------------------------------------------------------------------------- 1 | 2 | # ------------------------------------------------------------------------------ 3 | # Copyright (c) Microsoft 4 | # Licensed under the MIT License. 5 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | 14 | from yacs.config import CfgNode as CN 15 | 16 | 17 | _C = CN() 18 | 19 | _C.OUTPUT_DIR = '' 20 | _C.LOG_DIR = '' 21 | _C.GPUS = (0,) 22 | _C.WORKERS = 4 23 | _C.PRINT_FREQ = 20 24 | _C.AUTO_RESUME = False 25 | _C.PIN_MEMORY = True 26 | _C.RANK = 0 27 | 28 | # Cudnn related params 29 | _C.CUDNN = CN() 30 | _C.CUDNN.BENCHMARK = True 31 | _C.CUDNN.DETERMINISTIC = False 32 | _C.CUDNN.ENABLED = True 33 | 34 | # common params for NETWORK 35 | _C.MODEL = CN() 36 | _C.MODEL.NAME = 'seg_hrnet' 37 | _C.MODEL.PRETRAINED = '' 38 | _C.MODEL.ALIGN_CORNERS = True 39 | _C.MODEL.NUM_OUTPUTS = 1 40 | _C.MODEL.EXTRA = CN(new_allowed=True) 41 | 42 | 43 | _C.MODEL.OCR = CN() 44 | _C.MODEL.OCR.MID_CHANNELS = 512 45 | _C.MODEL.OCR.KEY_CHANNELS = 256 46 | _C.MODEL.OCR.DROPOUT = 0.05 47 | _C.MODEL.OCR.SCALE = 1 48 | 49 | _C.LOSS = CN() 50 | _C.LOSS.USE_OHEM = False 51 | _C.LOSS.OHEMTHRES = 0.9 52 | _C.LOSS.OHEMKEEP = 100000 53 | _C.LOSS.CLASS_BALANCE = False 54 | _C.LOSS.BALANCE_WEIGHTS = [1] 55 | 56 | # DATASET related params 57 | _C.DATASET = CN() 58 | _C.DATASET.ROOT = '' 59 | _C.DATASET.DATASET = 'cityscapes' 60 | _C.DATASET.NUM_CLASSES = 19 61 | _C.DATASET.TRAIN_SET = 'list/cityscapes/train.lst' 62 | _C.DATASET.EXTRA_TRAIN_SET = '' 63 | _C.DATASET.TEST_SET = 'list/cityscapes/val.lst' 64 | 65 | # training 66 | _C.TRAIN = CN() 67 | 68 | _C.TRAIN.FREEZE_LAYERS = '' 69 | _C.TRAIN.FREEZE_EPOCHS = -1 70 | _C.TRAIN.NONBACKBONE_KEYWORDS = [] 71 | _C.TRAIN.NONBACKBONE_MULT = 10 72 | 73 | _C.TRAIN.IMAGE_SIZE = [1024, 512] # width * height 74 | _C.TRAIN.BASE_SIZE = 2048 75 | _C.TRAIN.DOWNSAMPLERATE = 1 76 | _C.TRAIN.FLIP = True 77 | _C.TRAIN.MULTI_SCALE = True 78 | _C.TRAIN.SCALE_FACTOR = 16 79 | 80 | _C.TRAIN.RANDOM_BRIGHTNESS = False 81 | _C.TRAIN.RANDOM_BRIGHTNESS_SHIFT_VALUE = 10 82 | 83 | _C.TRAIN.LR_FACTOR = 0.1 84 | _C.TRAIN.LR_STEP = [90, 110] 85 | _C.TRAIN.LR = 0.01 86 | _C.TRAIN.EXTRA_LR = 0.001 87 | 88 | _C.TRAIN.OPTIMIZER = 'sgd' 89 | _C.TRAIN.MOMENTUM = 0.9 90 | _C.TRAIN.WD = 0.0001 91 | _C.TRAIN.NESTEROV = False 92 | _C.TRAIN.IGNORE_LABEL = -1 93 | 94 | _C.TRAIN.BEGIN_EPOCH = 0 95 | _C.TRAIN.END_EPOCH = 484 96 | _C.TRAIN.EXTRA_EPOCH = 0 97 | 98 | _C.TRAIN.RESUME = False 99 | 100 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32 101 | _C.TRAIN.SHUFFLE = True 102 | # only using some training samples 103 | _C.TRAIN.NUM_SAMPLES = 0 104 | 105 | # testing 106 | _C.TEST = CN() 107 | 108 | _C.TEST.IMAGE_SIZE = [2048, 1024] # width * height 109 | _C.TEST.BASE_SIZE = 2048 110 | 111 | _C.TEST.BATCH_SIZE_PER_GPU = 32 112 | # only testing some samples 113 | _C.TEST.NUM_SAMPLES = 0 114 | 115 | _C.TEST.MODEL_FILE = '' 116 | _C.TEST.FLIP_TEST = False 117 | _C.TEST.MULTI_SCALE = False 118 | _C.TEST.SCALE_LIST = [1] 119 | 120 | _C.TEST.OUTPUT_INDEX = -1 121 | 122 | # debug 123 | _C.DEBUG = CN() 124 | _C.DEBUG.DEBUG = False 125 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 126 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 127 | _C.DEBUG.SAVE_HEATMAPS_GT = False 128 | _C.DEBUG.SAVE_HEATMAPS_PRED = False 129 | 130 | 131 | def update_config(cfg, args): 132 | cfg.defrost() 133 | 134 | cfg.merge_from_file(args.cfg) 135 | cfg.merge_from_list(args.opts) 136 | 137 | cfg.freeze() 138 | 139 | 140 | if __name__ == '__main__': 141 | import sys 142 | with open(sys.argv[1], 'w') as f: 143 | print(_C, file=f) 144 | 145 | -------------------------------------------------------------------------------- /lib/config/hrnet_config.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Create by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn), Rainbowsecret (yuyua@microsoft.com) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | from yacs.config import CfgNode as CN 13 | 14 | 15 | # configs for HRNet48 16 | HRNET_48 = CN() 17 | HRNET_48.FINAL_CONV_KERNEL = 1 18 | 19 | HRNET_48.STAGE1 = CN() 20 | HRNET_48.STAGE1.NUM_MODULES = 1 21 | HRNET_48.STAGE1.NUM_BRANCHES = 1 22 | HRNET_48.STAGE1.NUM_BLOCKS = [4] 23 | HRNET_48.STAGE1.NUM_CHANNELS = [64] 24 | HRNET_48.STAGE1.BLOCK = 'BOTTLENECK' 25 | HRNET_48.STAGE1.FUSE_METHOD = 'SUM' 26 | 27 | HRNET_48.STAGE2 = CN() 28 | HRNET_48.STAGE2.NUM_MODULES = 1 29 | HRNET_48.STAGE2.NUM_BRANCHES = 2 30 | HRNET_48.STAGE2.NUM_BLOCKS = [4, 4] 31 | HRNET_48.STAGE2.NUM_CHANNELS = [48, 96] 32 | HRNET_48.STAGE2.BLOCK = 'BASIC' 33 | HRNET_48.STAGE2.FUSE_METHOD = 'SUM' 34 | 35 | HRNET_48.STAGE3 = CN() 36 | HRNET_48.STAGE3.NUM_MODULES = 4 37 | HRNET_48.STAGE3.NUM_BRANCHES = 3 38 | HRNET_48.STAGE3.NUM_BLOCKS = [4, 4, 4] 39 | HRNET_48.STAGE3.NUM_CHANNELS = [48, 96, 192] 40 | HRNET_48.STAGE3.BLOCK = 'BASIC' 41 | HRNET_48.STAGE3.FUSE_METHOD = 'SUM' 42 | 43 | HRNET_48.STAGE4 = CN() 44 | HRNET_48.STAGE4.NUM_MODULES = 3 45 | HRNET_48.STAGE4.NUM_BRANCHES = 4 46 | HRNET_48.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 47 | HRNET_48.STAGE4.NUM_CHANNELS = [48, 96, 192, 384] 48 | HRNET_48.STAGE4.BLOCK = 'BASIC' 49 | HRNET_48.STAGE4.FUSE_METHOD = 'SUM' 50 | 51 | 52 | # configs for HRNet32 53 | HRNET_32 = CN() 54 | HRNET_32.FINAL_CONV_KERNEL = 1 55 | 56 | HRNET_32.STAGE1 = CN() 57 | HRNET_32.STAGE1.NUM_MODULES = 1 58 | HRNET_32.STAGE1.NUM_BRANCHES = 1 59 | HRNET_32.STAGE1.NUM_BLOCKS = [4] 60 | HRNET_32.STAGE1.NUM_CHANNELS = [64] 61 | HRNET_32.STAGE1.BLOCK = 'BOTTLENECK' 62 | HRNET_32.STAGE1.FUSE_METHOD = 'SUM' 63 | 64 | HRNET_32.STAGE2 = CN() 65 | HRNET_32.STAGE2.NUM_MODULES = 1 66 | HRNET_32.STAGE2.NUM_BRANCHES = 2 67 | HRNET_32.STAGE2.NUM_BLOCKS = [4, 4] 68 | HRNET_32.STAGE2.NUM_CHANNELS = [32, 64] 69 | HRNET_32.STAGE2.BLOCK = 'BASIC' 70 | HRNET_32.STAGE2.FUSE_METHOD = 'SUM' 71 | 72 | HRNET_32.STAGE3 = CN() 73 | HRNET_32.STAGE3.NUM_MODULES = 4 74 | HRNET_32.STAGE3.NUM_BRANCHES = 3 75 | HRNET_32.STAGE3.NUM_BLOCKS = [4, 4, 4] 76 | HRNET_32.STAGE3.NUM_CHANNELS = [32, 64, 128] 77 | HRNET_32.STAGE3.BLOCK = 'BASIC' 78 | HRNET_32.STAGE3.FUSE_METHOD = 'SUM' 79 | 80 | HRNET_32.STAGE4 = CN() 81 | HRNET_32.STAGE4.NUM_MODULES = 3 82 | HRNET_32.STAGE4.NUM_BRANCHES = 4 83 | HRNET_32.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 84 | HRNET_32.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 85 | HRNET_32.STAGE4.BLOCK = 'BASIC' 86 | HRNET_32.STAGE4.FUSE_METHOD = 'SUM' 87 | 88 | 89 | # configs for HRNet18 90 | HRNET_18 = CN() 91 | HRNET_18.FINAL_CONV_KERNEL = 1 92 | 93 | HRNET_18.STAGE1 = CN() 94 | HRNET_18.STAGE1.NUM_MODULES = 1 95 | HRNET_18.STAGE1.NUM_BRANCHES = 1 96 | HRNET_18.STAGE1.NUM_BLOCKS = [4] 97 | HRNET_18.STAGE1.NUM_CHANNELS = [64] 98 | HRNET_18.STAGE1.BLOCK = 'BOTTLENECK' 99 | HRNET_18.STAGE1.FUSE_METHOD = 'SUM' 100 | 101 | HRNET_18.STAGE2 = CN() 102 | HRNET_18.STAGE2.NUM_MODULES = 1 103 | HRNET_18.STAGE2.NUM_BRANCHES = 2 104 | HRNET_18.STAGE2.NUM_BLOCKS = [4, 4] 105 | HRNET_18.STAGE2.NUM_CHANNELS = [18, 36] 106 | HRNET_18.STAGE2.BLOCK = 'BASIC' 107 | HRNET_18.STAGE2.FUSE_METHOD = 'SUM' 108 | 109 | HRNET_18.STAGE3 = CN() 110 | HRNET_18.STAGE3.NUM_MODULES = 4 111 | HRNET_18.STAGE3.NUM_BRANCHES = 3 112 | HRNET_18.STAGE3.NUM_BLOCKS = [4, 4, 4] 113 | HRNET_18.STAGE3.NUM_CHANNELS = [18, 36, 72] 114 | HRNET_18.STAGE3.BLOCK = 'BASIC' 115 | HRNET_18.STAGE3.FUSE_METHOD = 'SUM' 116 | 117 | HRNET_18.STAGE4 = CN() 118 | HRNET_18.STAGE4.NUM_MODULES = 3 119 | HRNET_18.STAGE4.NUM_BRANCHES = 4 120 | HRNET_18.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 121 | HRNET_18.STAGE4.NUM_CHANNELS = [18, 36, 72, 144] 122 | HRNET_18.STAGE4.BLOCK = 'BASIC' 123 | HRNET_18.STAGE4.FUSE_METHOD = 'SUM' 124 | 125 | 126 | MODEL_CONFIGS = { 127 | 'hrnet18': HRNET_18, 128 | 'hrnet32': HRNET_32, 129 | 'hrnet48': HRNET_48, 130 | } -------------------------------------------------------------------------------- /lib/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | # high_resoluton_net related params for segmentation 14 | HIGH_RESOLUTION_NET = CN() 15 | HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 16 | HIGH_RESOLUTION_NET.STEM_INPLANES = 64 17 | HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 18 | HIGH_RESOLUTION_NET.WITH_HEAD = True 19 | 20 | HIGH_RESOLUTION_NET.STAGE2 = CN() 21 | HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 22 | HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 23 | HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 24 | HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64] 25 | HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC' 26 | HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 27 | 28 | HIGH_RESOLUTION_NET.STAGE3 = CN() 29 | HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 30 | HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 31 | HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 32 | HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128] 33 | HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC' 34 | HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 35 | 36 | HIGH_RESOLUTION_NET.STAGE4 = CN() 37 | HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 38 | HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 39 | HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 40 | HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256] 41 | HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC' 42 | HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 43 | 44 | MODEL_EXTRAS = { 45 | 'seg_hrnet': HIGH_RESOLUTION_NET, 46 | } 47 | -------------------------------------------------------------------------------- /lib/core/criterion.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import torch 8 | import torch.nn as nn 9 | from torch.nn import functional as F 10 | import logging 11 | from config import config 12 | 13 | 14 | class CrossEntropy(nn.Module): 15 | def __init__(self, ignore_label=-1, weight=None): 16 | super(CrossEntropy, self).__init__() 17 | self.ignore_label = ignore_label 18 | self.criterion = nn.CrossEntropyLoss( 19 | weight=weight, 20 | ignore_index=ignore_label 21 | ) 22 | 23 | def _forward(self, score, target): 24 | ph, pw = score.size(2), score.size(3) 25 | h, w = target.size(1), target.size(2) 26 | if ph != h or pw != w: 27 | score = F.interpolate(input=score, size=( 28 | h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS) 29 | 30 | loss = self.criterion(score, target) 31 | 32 | return loss 33 | 34 | def forward(self, score, target): 35 | 36 | if config.MODEL.NUM_OUTPUTS == 1: 37 | score = [score] 38 | 39 | weights = config.LOSS.BALANCE_WEIGHTS 40 | assert len(weights) == len(score) 41 | 42 | return sum([w * self._forward(x, target) for (w, x) in zip(weights, score)]) 43 | 44 | 45 | class OhemCrossEntropy(nn.Module): 46 | def __init__(self, ignore_label=-1, thres=0.7, 47 | min_kept=100000, weight=None): 48 | super(OhemCrossEntropy, self).__init__() 49 | self.thresh = thres 50 | self.min_kept = max(1, min_kept) 51 | self.ignore_label = ignore_label 52 | self.criterion = nn.CrossEntropyLoss( 53 | weight=weight, 54 | ignore_index=ignore_label, 55 | reduction='none' 56 | ) 57 | 58 | def _ce_forward(self, score, target): 59 | ph, pw = score.size(2), score.size(3) 60 | h, w = target.size(1), target.size(2) 61 | if ph != h or pw != w: 62 | score = F.interpolate(input=score, size=( 63 | h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS) 64 | 65 | loss = self.criterion(score, target) 66 | 67 | return loss 68 | 69 | def _ohem_forward(self, score, target, **kwargs): 70 | ph, pw = score.size(2), score.size(3) 71 | h, w = target.size(1), target.size(2) 72 | if ph != h or pw != w: 73 | score = F.interpolate(input=score, size=( 74 | h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS) 75 | pred = F.softmax(score, dim=1) 76 | pixel_losses = self.criterion(score, target).contiguous().view(-1) 77 | mask = target.contiguous().view(-1) != self.ignore_label 78 | 79 | tmp_target = target.clone() 80 | tmp_target[tmp_target == self.ignore_label] = 0 81 | pred = pred.gather(1, tmp_target.unsqueeze(1)) 82 | pred, ind = pred.contiguous().view(-1,)[mask].contiguous().sort() 83 | min_value = pred[min(self.min_kept, pred.numel() - 1)] 84 | threshold = max(min_value, self.thresh) 85 | 86 | pixel_losses = pixel_losses[mask][ind] 87 | pixel_losses = pixel_losses[pred < threshold] 88 | return pixel_losses.mean() 89 | 90 | def forward(self, score, target): 91 | 92 | if config.MODEL.NUM_OUTPUTS == 1: 93 | score = [score] 94 | 95 | weights = config.LOSS.BALANCE_WEIGHTS 96 | assert len(weights) == len(score) 97 | 98 | functions = [self._ce_forward] * \ 99 | (len(weights) - 1) + [self._ohem_forward] 100 | return sum([ 101 | w * func(x, target) 102 | for (w, x, func) in zip(weights, score, functions) 103 | ]) 104 | -------------------------------------------------------------------------------- /lib/core/function.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import logging 8 | import os 9 | import time 10 | 11 | import numpy as np 12 | import numpy.ma as ma 13 | from tqdm import tqdm 14 | 15 | import torch 16 | import torch.nn as nn 17 | from torch.nn import functional as F 18 | 19 | from utils.utils import AverageMeter 20 | from utils.utils import get_confusion_matrix 21 | from utils.utils import adjust_learning_rate 22 | 23 | import utils.distributed as dist 24 | 25 | 26 | def reduce_tensor(inp): 27 | """ 28 | Reduce the loss from all processes so that 29 | process with rank 0 has the averaged results. 30 | """ 31 | world_size = dist.get_world_size() 32 | if world_size < 2: 33 | return inp 34 | with torch.no_grad(): 35 | reduced_inp = inp 36 | torch.distributed.reduce(reduced_inp, dst=0) 37 | return reduced_inp / world_size 38 | 39 | 40 | def train(config, epoch, num_epoch, epoch_iters, base_lr, 41 | num_iters, trainloader, optimizer, model, writer_dict): 42 | # Training 43 | model.train() 44 | 45 | batch_time = AverageMeter() 46 | ave_loss = AverageMeter() 47 | tic = time.time() 48 | cur_iters = epoch*epoch_iters 49 | writer = writer_dict['writer'] 50 | global_steps = writer_dict['train_global_steps'] 51 | 52 | for i_iter, batch in enumerate(trainloader, 0): 53 | images, labels, _, _ = batch 54 | images = images.cuda() 55 | labels = labels.long().cuda() 56 | 57 | losses, _ = model(images, labels) 58 | loss = losses.mean() 59 | 60 | if dist.is_distributed(): 61 | reduced_loss = reduce_tensor(loss) 62 | else: 63 | reduced_loss = loss 64 | 65 | model.zero_grad() 66 | loss.backward() 67 | optimizer.step() 68 | 69 | # measure elapsed time 70 | batch_time.update(time.time() - tic) 71 | tic = time.time() 72 | 73 | # update average loss 74 | ave_loss.update(reduced_loss.item()) 75 | 76 | lr = adjust_learning_rate(optimizer, 77 | base_lr, 78 | num_iters, 79 | i_iter+cur_iters) 80 | 81 | if i_iter % config.PRINT_FREQ == 0 and dist.get_rank() == 0: 82 | msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \ 83 | 'lr: {}, Loss: {:.6f}' .format( 84 | epoch, num_epoch, i_iter, epoch_iters, 85 | batch_time.average(), [x['lr'] for x in optimizer.param_groups], ave_loss.average()) 86 | logging.info(msg) 87 | 88 | writer.add_scalar('train_loss', ave_loss.average(), global_steps) 89 | writer_dict['train_global_steps'] = global_steps + 1 90 | 91 | def validate(config, testloader, model, writer_dict): 92 | model.eval() 93 | ave_loss = AverageMeter() 94 | nums = config.MODEL.NUM_OUTPUTS 95 | confusion_matrix = np.zeros( 96 | (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES, nums)) 97 | with torch.no_grad(): 98 | for idx, batch in enumerate(testloader): 99 | image, label, _, _ = batch 100 | size = label.size() 101 | image = image.cuda() 102 | label = label.long().cuda() 103 | 104 | losses, pred = model(image, label) 105 | if not isinstance(pred, (list, tuple)): 106 | pred = [pred] 107 | for i, x in enumerate(pred): 108 | x = F.interpolate( 109 | input=x, size=size[-2:], 110 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 111 | ) 112 | 113 | confusion_matrix[..., i] += get_confusion_matrix( 114 | label, 115 | x, 116 | size, 117 | config.DATASET.NUM_CLASSES, 118 | config.TRAIN.IGNORE_LABEL 119 | ) 120 | 121 | if idx % 10 == 0: 122 | print(idx) 123 | 124 | loss = losses.mean() 125 | if dist.is_distributed(): 126 | reduced_loss = reduce_tensor(loss) 127 | else: 128 | reduced_loss = loss 129 | ave_loss.update(reduced_loss.item()) 130 | 131 | if dist.is_distributed(): 132 | confusion_matrix = torch.from_numpy(confusion_matrix).cuda() 133 | reduced_confusion_matrix = reduce_tensor(confusion_matrix) 134 | confusion_matrix = reduced_confusion_matrix.cpu().numpy() 135 | 136 | for i in range(nums): 137 | pos = confusion_matrix[..., i].sum(1) 138 | res = confusion_matrix[..., i].sum(0) 139 | tp = np.diag(confusion_matrix[..., i]) 140 | IoU_array = (tp / np.maximum(1.0, pos + res - tp)) 141 | mean_IoU = IoU_array.mean() 142 | if dist.get_rank() <= 0: 143 | logging.info('{} {} {}'.format(i, IoU_array, mean_IoU)) 144 | 145 | writer = writer_dict['writer'] 146 | global_steps = writer_dict['valid_global_steps'] 147 | writer.add_scalar('valid_loss', ave_loss.average(), global_steps) 148 | writer.add_scalar('valid_mIoU', mean_IoU, global_steps) 149 | writer_dict['valid_global_steps'] = global_steps + 1 150 | return ave_loss.average(), mean_IoU, IoU_array 151 | 152 | 153 | def testval(config, test_dataset, testloader, model, 154 | sv_dir='', sv_pred=False): 155 | model.eval() 156 | confusion_matrix = np.zeros( 157 | (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES)) 158 | with torch.no_grad(): 159 | for index, batch in enumerate(tqdm(testloader)): 160 | image, label, _, name, *border_padding = batch 161 | size = label.size() 162 | pred = test_dataset.multi_scale_inference( 163 | config, 164 | model, 165 | image, 166 | scales=config.TEST.SCALE_LIST, 167 | flip=config.TEST.FLIP_TEST) 168 | 169 | if len(border_padding) > 0: 170 | border_padding = border_padding[0] 171 | pred = pred[:, :, 0:pred.size(2) - border_padding[0], 0:pred.size(3) - border_padding[1]] 172 | 173 | if pred.size()[-2] != size[-2] or pred.size()[-1] != size[-1]: 174 | pred = F.interpolate( 175 | pred, size[-2:], 176 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 177 | ) 178 | 179 | confusion_matrix += get_confusion_matrix( 180 | label, 181 | pred, 182 | size, 183 | config.DATASET.NUM_CLASSES, 184 | config.TRAIN.IGNORE_LABEL) 185 | 186 | if sv_pred: 187 | sv_path = os.path.join(sv_dir, 'test_results') 188 | if not os.path.exists(sv_path): 189 | os.mkdir(sv_path) 190 | test_dataset.save_pred(pred, sv_path, name) 191 | 192 | if index % 100 == 0: 193 | logging.info('processing: %d images' % index) 194 | pos = confusion_matrix.sum(1) 195 | res = confusion_matrix.sum(0) 196 | tp = np.diag(confusion_matrix) 197 | IoU_array = (tp / np.maximum(1.0, pos + res - tp)) 198 | mean_IoU = IoU_array.mean() 199 | logging.info('mIoU: %.4f' % (mean_IoU)) 200 | 201 | pos = confusion_matrix.sum(1) 202 | res = confusion_matrix.sum(0) 203 | tp = np.diag(confusion_matrix) 204 | pixel_acc = tp.sum()/pos.sum() 205 | mean_acc = (tp/np.maximum(1.0, pos)).mean() 206 | IoU_array = (tp / np.maximum(1.0, pos + res - tp)) 207 | mean_IoU = IoU_array.mean() 208 | 209 | return mean_IoU, IoU_array, pixel_acc, mean_acc 210 | 211 | 212 | def test(config, test_dataset, testloader, model, 213 | sv_dir='', sv_pred=True): 214 | model.eval() 215 | with torch.no_grad(): 216 | for _, batch in enumerate(tqdm(testloader)): 217 | image, size, name = batch 218 | size = size[0] 219 | pred = test_dataset.multi_scale_inference( 220 | config, 221 | model, 222 | image, 223 | scales=config.TEST.SCALE_LIST, 224 | flip=config.TEST.FLIP_TEST) 225 | 226 | if pred.size()[-2] != size[0] or pred.size()[-1] != size[1]: 227 | pred = F.interpolate( 228 | pred, size[-2:], 229 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 230 | ) 231 | 232 | if sv_pred: 233 | sv_path = os.path.join(sv_dir, 'test_results') 234 | if not os.path.exists(sv_path): 235 | os.mkdir(sv_path) 236 | test_dataset.save_pred(pred, sv_path, name) 237 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from .cityscapes import Cityscapes as cityscapes 12 | from .lip import LIP as lip 13 | from .pascal_ctx import PASCALContext as pascal_ctx 14 | from .ade20k import ADE20K as ade20k 15 | from .cocostuff import COCOStuff as cocostuff -------------------------------------------------------------------------------- /lib/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | 12 | import torch 13 | from torch.nn import functional as F 14 | from PIL import Image 15 | 16 | from .base_dataset import BaseDataset 17 | 18 | 19 | class ADE20K(BaseDataset): 20 | def __init__(self, 21 | root, 22 | list_path, 23 | num_samples=None, 24 | num_classes=150, 25 | multi_scale=True, 26 | flip=True, 27 | ignore_label=-1, 28 | base_size=520, 29 | crop_size=(520, 520), 30 | downsample_rate=1, 31 | scale_factor=11, 32 | mean=[0.485, 0.456, 0.406], 33 | std=[0.229, 0.224, 0.225]): 34 | 35 | super(ADE20K, self).__init__(ignore_label, base_size, 36 | crop_size, downsample_rate, scale_factor, mean, std) 37 | 38 | self.root = root 39 | self.num_classes = num_classes 40 | self.list_path = list_path 41 | self.class_weights = None 42 | 43 | self.multi_scale = multi_scale 44 | self.flip = flip 45 | self.img_list = [line.strip().split() for line in open(root+list_path)] 46 | 47 | self.files = self.read_files() 48 | if num_samples: 49 | self.files = self.files[:num_samples] 50 | 51 | def read_files(self): 52 | files = [] 53 | for item in self.img_list: 54 | image_path, label_path = item 55 | name = os.path.splitext(os.path.basename(label_path))[0] 56 | sample = { 57 | 'img': image_path, 58 | 'label': label_path, 59 | 'name': name 60 | } 61 | files.append(sample) 62 | return files 63 | 64 | def resize_image(self, image, label, size): 65 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR) 66 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST) 67 | return image, label 68 | 69 | def __getitem__(self, index): 70 | item = self.files[index] 71 | name = item["name"] 72 | # image_path = os.path.join(self.root, 'ade20k', item['img']) 73 | # label_path = os.path.join(self.root, 'ade20k', item['label']) 74 | image_path = os.path.join(self.root, item['img']) 75 | label_path = os.path.join(self.root, item['label']) 76 | image = cv2.imread( 77 | image_path, 78 | cv2.IMREAD_COLOR 79 | ) 80 | label = np.array( 81 | Image.open(label_path).convert('P') 82 | ) 83 | label = self.reduce_zero_label(label) 84 | size = label.shape 85 | 86 | if 'testval' in self.list_path: 87 | image = self.resize_short_length( 88 | image, 89 | short_length=self.base_size, 90 | fit_stride=8 91 | ) 92 | image = self.input_transform(image) 93 | image = image.transpose((2, 0, 1)) 94 | 95 | return image.copy(), label.copy(), np.array(size), name 96 | 97 | if 'val' in self.list_path: 98 | image, label = self.resize_short_length( 99 | image, 100 | label=label, 101 | short_length=self.base_size, 102 | fit_stride=8 103 | ) 104 | image, label = self.rand_crop(image, label) 105 | image = self.input_transform(image) 106 | image = image.transpose((2, 0, 1)) 107 | 108 | return image.copy(), label.copy(), np.array(size), name 109 | 110 | image, label = self.resize_short_length(image, label, short_length=self.base_size) 111 | image, label = self.gen_sample(image, label, self.multi_scale, self.flip) 112 | 113 | return image.copy(), label.copy(), np.array(size), name -------------------------------------------------------------------------------- /lib/datasets/base_dataset.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | import random 12 | 13 | import torch 14 | from torch.nn import functional as F 15 | from torch.utils import data 16 | 17 | from config import config 18 | 19 | 20 | class BaseDataset(data.Dataset): 21 | def __init__(self, 22 | ignore_label=-1, 23 | base_size=2048, 24 | crop_size=(512, 1024), 25 | downsample_rate=1, 26 | scale_factor=16, 27 | mean=[0.485, 0.456, 0.406], 28 | std=[0.229, 0.224, 0.225]): 29 | 30 | self.base_size = base_size 31 | self.crop_size = crop_size 32 | self.ignore_label = ignore_label 33 | 34 | self.mean = mean 35 | self.std = std 36 | self.scale_factor = scale_factor 37 | self.downsample_rate = 1./downsample_rate 38 | 39 | self.files = [] 40 | 41 | def __len__(self): 42 | return len(self.files) 43 | 44 | def input_transform(self, image): 45 | image = image.astype(np.float32)[:, :, ::-1] 46 | image = image / 255.0 47 | image -= self.mean 48 | image /= self.std 49 | return image 50 | 51 | def label_transform(self, label): 52 | return np.array(label).astype('int32') 53 | 54 | def pad_image(self, image, h, w, size, padvalue): 55 | pad_image = image.copy() 56 | pad_h = max(size[0] - h, 0) 57 | pad_w = max(size[1] - w, 0) 58 | if pad_h > 0 or pad_w > 0: 59 | pad_image = cv2.copyMakeBorder(image, 0, pad_h, 0, 60 | pad_w, cv2.BORDER_CONSTANT, 61 | value=padvalue) 62 | 63 | return pad_image 64 | 65 | def rand_crop(self, image, label): 66 | h, w = image.shape[:-1] 67 | image = self.pad_image(image, h, w, self.crop_size, 68 | (0.0, 0.0, 0.0)) 69 | label = self.pad_image(label, h, w, self.crop_size, 70 | (self.ignore_label,)) 71 | 72 | new_h, new_w = label.shape 73 | x = random.randint(0, new_w - self.crop_size[1]) 74 | y = random.randint(0, new_h - self.crop_size[0]) 75 | image = image[y:y+self.crop_size[0], x:x+self.crop_size[1]] 76 | label = label[y:y+self.crop_size[0], x:x+self.crop_size[1]] 77 | 78 | return image, label 79 | 80 | def multi_scale_aug(self, image, label=None, 81 | rand_scale=1, rand_crop=True): 82 | long_size = np.int(self.base_size * rand_scale + 0.5) 83 | h, w = image.shape[:2] 84 | if h > w: 85 | new_h = long_size 86 | new_w = np.int(w * long_size / h + 0.5) 87 | else: 88 | new_w = long_size 89 | new_h = np.int(h * long_size / w + 0.5) 90 | 91 | image = cv2.resize(image, (new_w, new_h), 92 | interpolation=cv2.INTER_LINEAR) 93 | if label is not None: 94 | label = cv2.resize(label, (new_w, new_h), 95 | interpolation=cv2.INTER_NEAREST) 96 | else: 97 | return image 98 | 99 | if rand_crop: 100 | image, label = self.rand_crop(image, label) 101 | 102 | return image, label 103 | 104 | def resize_short_length(self, image, label=None, short_length=None, fit_stride=None, return_padding=False): 105 | h, w = image.shape[:2] 106 | if h < w: 107 | new_h = short_length 108 | new_w = np.int(w * short_length / h + 0.5) 109 | else: 110 | new_w = short_length 111 | new_h = np.int(h * short_length / w + 0.5) 112 | image = cv2.resize(image, (new_w, new_h), 113 | interpolation=cv2.INTER_LINEAR) 114 | pad_w, pad_h = 0, 0 115 | if fit_stride is not None: 116 | pad_w = 0 if (new_w % fit_stride == 0) else fit_stride - (new_w % fit_stride) 117 | pad_h = 0 if (new_h % fit_stride == 0) else fit_stride - (new_h % fit_stride) 118 | image = cv2.copyMakeBorder( 119 | image, 0, pad_h, 0, pad_w, 120 | cv2.BORDER_CONSTANT, value=tuple(x * 255 for x in self.mean[::-1]) 121 | ) 122 | 123 | if label is not None: 124 | label = cv2.resize( 125 | label, (new_w, new_h), 126 | interpolation=cv2.INTER_NEAREST) 127 | if pad_h > 0 or pad_w > 0: 128 | label = cv2.copyMakeBorder( 129 | label, 0, pad_h, 0, pad_w, 130 | cv2.BORDER_CONSTANT, value=self.ignore_label 131 | ) 132 | if return_padding: 133 | return image, label, (pad_h, pad_w) 134 | else: 135 | return image, label 136 | else: 137 | if return_padding: 138 | return image, (pad_h, pad_w) 139 | else: 140 | return image 141 | 142 | def random_brightness(self, img): 143 | if not config.TRAIN.RANDOM_BRIGHTNESS: 144 | return img 145 | if random.random() < 0.5: 146 | return img 147 | self.shift_value = config.TRAIN.RANDOM_BRIGHTNESS_SHIFT_VALUE 148 | img = img.astype(np.float32) 149 | shift = random.randint(-self.shift_value, self.shift_value) 150 | img[:, :, :] += shift 151 | img = np.around(img) 152 | img = np.clip(img, 0, 255).astype(np.uint8) 153 | return img 154 | 155 | def gen_sample(self, image, label, 156 | multi_scale=True, is_flip=True): 157 | if multi_scale: 158 | rand_scale = 0.5 + random.randint(0, self.scale_factor) / 10.0 159 | image, label = self.multi_scale_aug(image, label, 160 | rand_scale=rand_scale) 161 | 162 | image = self.random_brightness(image) 163 | image = self.input_transform(image) 164 | label = self.label_transform(label) 165 | 166 | image = image.transpose((2, 0, 1)) 167 | 168 | if is_flip: 169 | flip = np.random.choice(2) * 2 - 1 170 | image = image[:, :, ::flip] 171 | label = label[:, ::flip] 172 | 173 | if self.downsample_rate != 1: 174 | label = cv2.resize( 175 | label, 176 | None, 177 | fx=self.downsample_rate, 178 | fy=self.downsample_rate, 179 | interpolation=cv2.INTER_NEAREST 180 | ) 181 | 182 | return image, label 183 | 184 | def reduce_zero_label(self, labelmap): 185 | labelmap = np.array(labelmap) 186 | encoded_labelmap = labelmap - 1 187 | 188 | return encoded_labelmap 189 | 190 | def inference(self, config, model, image, flip=False): 191 | size = image.size() 192 | pred = model(image) 193 | 194 | if config.MODEL.NUM_OUTPUTS > 1: 195 | pred = pred[config.TEST.OUTPUT_INDEX] 196 | 197 | pred = F.interpolate( 198 | input=pred, size=size[-2:], 199 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 200 | ) 201 | 202 | if flip: 203 | flip_img = image.numpy()[:, :, :, ::-1] 204 | flip_output = model(torch.from_numpy(flip_img.copy())) 205 | 206 | if config.MODEL.NUM_OUTPUTS > 1: 207 | flip_output = flip_output[config.TEST.OUTPUT_INDEX] 208 | 209 | flip_output = F.interpolate( 210 | input=flip_output, size=size[-2:], 211 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 212 | ) 213 | 214 | flip_pred = flip_output.cpu().numpy().copy() 215 | flip_pred = torch.from_numpy( 216 | flip_pred[:, :, :, ::-1].copy()).cuda() 217 | pred += flip_pred 218 | pred = pred * 0.5 219 | return pred.exp() 220 | 221 | def multi_scale_inference(self, config, model, image, scales=[1], flip=False): 222 | batch, _, ori_height, ori_width = image.size() 223 | assert batch == 1, "only supporting batchsize 1." 224 | image = image.numpy()[0].transpose((1, 2, 0)).copy() 225 | stride_h = np.int(self.crop_size[0] * 2.0 / 3.0) 226 | stride_w = np.int(self.crop_size[1] * 2.0 / 3.0) 227 | final_pred = torch.zeros([1, self.num_classes, 228 | ori_height, ori_width]).cuda() 229 | padvalue = -1.0 * np.array(self.mean) / np.array(self.std) 230 | for scale in scales: 231 | new_img = self.multi_scale_aug(image=image, 232 | rand_scale=scale, 233 | rand_crop=False) 234 | height, width = new_img.shape[:-1] 235 | 236 | if max(height, width) <= np.min(self.crop_size): 237 | new_img = self.pad_image(new_img, height, width, 238 | self.crop_size, padvalue) 239 | new_img = new_img.transpose((2, 0, 1)) 240 | new_img = np.expand_dims(new_img, axis=0) 241 | new_img = torch.from_numpy(new_img) 242 | preds = self.inference(config, model, new_img, flip) 243 | preds = preds[:, :, 0:height, 0:width] 244 | else: 245 | if height < self.crop_size[0] or width < self.crop_size[1]: 246 | new_img = self.pad_image(new_img, height, width, 247 | self.crop_size, padvalue) 248 | new_h, new_w = new_img.shape[:-1] 249 | rows = np.int(np.ceil(1.0 * (new_h - 250 | self.crop_size[0]) / stride_h)) + 1 251 | cols = np.int(np.ceil(1.0 * (new_w - 252 | self.crop_size[1]) / stride_w)) + 1 253 | preds = torch.zeros([1, self.num_classes, 254 | new_h, new_w]).cuda() 255 | count = torch.zeros([1, 1, new_h, new_w]).cuda() 256 | 257 | for r in range(rows): 258 | for c in range(cols): 259 | h0 = r * stride_h 260 | w0 = c * stride_w 261 | h1 = min(h0 + self.crop_size[0], new_h) 262 | w1 = min(w0 + self.crop_size[1], new_w) 263 | crop_img = new_img[h0:h1, w0:w1, :] 264 | if h1 == new_h or w1 == new_w: 265 | crop_img = self.pad_image(crop_img, 266 | h1-h0, 267 | w1-w0, 268 | self.crop_size, 269 | padvalue) 270 | crop_img = crop_img.transpose((2, 0, 1)) 271 | crop_img = np.expand_dims(crop_img, axis=0) 272 | crop_img = torch.from_numpy(crop_img) 273 | pred = self.inference(config, model, crop_img, flip) 274 | preds[:, :, h0:h1, w0:w1] += pred[:, :, 0:h1-h0, 0:w1-w0] 275 | count[:, :, h0:h1, w0:w1] += 1 276 | preds = preds / count 277 | preds = preds[:, :, :height, :width] 278 | 279 | preds = F.interpolate( 280 | preds, (ori_height, ori_width), 281 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 282 | ) 283 | final_pred += preds 284 | return final_pred 285 | -------------------------------------------------------------------------------- /lib/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | from PIL import Image 12 | 13 | import torch 14 | from torch.nn import functional as F 15 | 16 | from .base_dataset import BaseDataset 17 | 18 | class Cityscapes(BaseDataset): 19 | def __init__(self, 20 | root, 21 | list_path, 22 | num_samples=None, 23 | num_classes=19, 24 | multi_scale=True, 25 | flip=True, 26 | ignore_label=-1, 27 | base_size=2048, 28 | crop_size=(512, 1024), 29 | downsample_rate=1, 30 | scale_factor=16, 31 | mean=[0.485, 0.456, 0.406], 32 | std=[0.229, 0.224, 0.225]): 33 | 34 | super(Cityscapes, self).__init__(ignore_label, base_size, 35 | crop_size, downsample_rate, scale_factor, mean, std,) 36 | 37 | self.root = root 38 | self.list_path = list_path 39 | self.num_classes = num_classes 40 | 41 | self.multi_scale = multi_scale 42 | self.flip = flip 43 | 44 | self.img_list = [line.strip().split() for line in open(root+list_path)] 45 | 46 | self.files = self.read_files() 47 | if num_samples: 48 | self.files = self.files[:num_samples] 49 | 50 | self.label_mapping = {-1: ignore_label, 0: ignore_label, 51 | 1: ignore_label, 2: ignore_label, 52 | 3: ignore_label, 4: ignore_label, 53 | 5: ignore_label, 6: ignore_label, 54 | 7: 0, 8: 1, 9: ignore_label, 55 | 10: ignore_label, 11: 2, 12: 3, 56 | 13: 4, 14: ignore_label, 15: ignore_label, 57 | 16: ignore_label, 17: 5, 18: ignore_label, 58 | 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 59 | 25: 12, 26: 13, 27: 14, 28: 15, 60 | 29: ignore_label, 30: ignore_label, 61 | 31: 16, 32: 17, 33: 18} 62 | self.class_weights = torch.FloatTensor([0.8373, 0.918, 0.866, 1.0345, 63 | 1.0166, 0.9969, 0.9754, 1.0489, 64 | 0.8786, 1.0023, 0.9539, 0.9843, 65 | 1.1116, 0.9037, 1.0865, 1.0955, 66 | 1.0865, 1.1529, 1.0507]).cuda() 67 | 68 | def read_files(self): 69 | files = [] 70 | if 'test' in self.list_path: 71 | for item in self.img_list: 72 | image_path = item 73 | name = os.path.splitext(os.path.basename(image_path[0]))[0] 74 | files.append({ 75 | "img": image_path[0], 76 | "name": name, 77 | }) 78 | else: 79 | for item in self.img_list: 80 | image_path, label_path = item 81 | name = os.path.splitext(os.path.basename(label_path))[0] 82 | files.append({ 83 | "img": image_path, 84 | "label": label_path, 85 | "name": name, 86 | "weight": 1 87 | }) 88 | return files 89 | 90 | def convert_label(self, label, inverse=False): 91 | temp = label.copy() 92 | if inverse: 93 | for v, k in self.label_mapping.items(): 94 | label[temp == k] = v 95 | else: 96 | for k, v in self.label_mapping.items(): 97 | label[temp == k] = v 98 | return label 99 | 100 | def __getitem__(self, index): 101 | item = self.files[index] 102 | name = item["name"] 103 | # image = cv2.imread(os.path.join(self.root,'cityscapes',item["img"]), 104 | # cv2.IMREAD_COLOR) 105 | image = cv2.imread(os.path.join(self.root, item["img"]), 106 | cv2.IMREAD_COLOR) 107 | size = image.shape 108 | 109 | if 'test' in self.list_path: 110 | image = self.input_transform(image) 111 | image = image.transpose((2, 0, 1)) 112 | 113 | return image.copy(), np.array(size), name 114 | 115 | # label = cv2.imread(os.path.join(self.root,'cityscapes',item["label"]), 116 | # cv2.IMREAD_GRAYSCALE) 117 | label = cv2.imread(os.path.join(self.root, item["label"]), 118 | cv2.IMREAD_GRAYSCALE) 119 | label = self.convert_label(label) 120 | 121 | image, label = self.gen_sample(image, label, 122 | self.multi_scale, self.flip) 123 | 124 | return image.copy(), label.copy(), np.array(size), name 125 | 126 | def multi_scale_inference(self, config, model, image, scales=[1], flip=False): 127 | batch, _, ori_height, ori_width = image.size() 128 | assert batch == 1, "only supporting batchsize 1." 129 | image = image.numpy()[0].transpose((1,2,0)).copy() 130 | stride_h = np.int(self.crop_size[0] * 1.0) 131 | stride_w = np.int(self.crop_size[1] * 1.0) 132 | final_pred = torch.zeros([1, self.num_classes, 133 | ori_height,ori_width]).cuda() 134 | for scale in scales: 135 | new_img = self.multi_scale_aug(image=image, 136 | rand_scale=scale, 137 | rand_crop=False) 138 | height, width = new_img.shape[:-1] 139 | 140 | if scale <= 1.0: 141 | new_img = new_img.transpose((2, 0, 1)) 142 | new_img = np.expand_dims(new_img, axis=0) 143 | new_img = torch.from_numpy(new_img) 144 | preds = self.inference(config, model, new_img, flip) 145 | preds = preds[:, :, 0:height, 0:width] 146 | else: 147 | new_h, new_w = new_img.shape[:-1] 148 | rows = np.int(np.ceil(1.0 * (new_h - 149 | self.crop_size[0]) / stride_h)) + 1 150 | cols = np.int(np.ceil(1.0 * (new_w - 151 | self.crop_size[1]) / stride_w)) + 1 152 | preds = torch.zeros([1, self.num_classes, 153 | new_h,new_w]).cuda() 154 | count = torch.zeros([1,1, new_h, new_w]).cuda() 155 | 156 | for r in range(rows): 157 | for c in range(cols): 158 | h0 = r * stride_h 159 | w0 = c * stride_w 160 | h1 = min(h0 + self.crop_size[0], new_h) 161 | w1 = min(w0 + self.crop_size[1], new_w) 162 | h0 = max(int(h1 - self.crop_size[0]), 0) 163 | w0 = max(int(w1 - self.crop_size[1]), 0) 164 | crop_img = new_img[h0:h1, w0:w1, :] 165 | crop_img = crop_img.transpose((2, 0, 1)) 166 | crop_img = np.expand_dims(crop_img, axis=0) 167 | crop_img = torch.from_numpy(crop_img) 168 | pred = self.inference(config, model, crop_img, flip) 169 | preds[:,:,h0:h1,w0:w1] += pred[:,:, 0:h1-h0, 0:w1-w0] 170 | count[:,:,h0:h1,w0:w1] += 1 171 | preds = preds / count 172 | preds = preds[:,:,:height,:width] 173 | 174 | preds = F.interpolate( 175 | preds, (ori_height, ori_width), 176 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 177 | ) 178 | final_pred += preds 179 | return final_pred 180 | 181 | def get_palette(self, n): 182 | palette = [0] * (n * 3) 183 | for j in range(0, n): 184 | lab = j 185 | palette[j * 3 + 0] = 0 186 | palette[j * 3 + 1] = 0 187 | palette[j * 3 + 2] = 0 188 | i = 0 189 | while lab: 190 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) 191 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) 192 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) 193 | i += 1 194 | lab >>= 3 195 | return palette 196 | 197 | def save_pred(self, preds, sv_path, name): 198 | palette = self.get_palette(256) 199 | preds = np.asarray(np.argmax(preds.cpu(), axis=1), dtype=np.uint8) 200 | for i in range(preds.shape[0]): 201 | pred = self.convert_label(preds[i], inverse=True) 202 | save_img = Image.fromarray(pred) 203 | save_img.putpalette(palette) 204 | save_img.save(os.path.join(sv_path, name[i]+'.png')) 205 | 206 | 207 | 208 | -------------------------------------------------------------------------------- /lib/datasets/cocostuff.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | 12 | import torch 13 | from torch.nn import functional as F 14 | from PIL import Image 15 | 16 | from .base_dataset import BaseDataset 17 | 18 | 19 | class COCOStuff(BaseDataset): 20 | def __init__(self, 21 | root, 22 | list_path, 23 | num_samples=None, 24 | num_classes=171, 25 | multi_scale=True, 26 | flip=True, 27 | ignore_label=-1, 28 | base_size=520, 29 | crop_size=(520, 520), 30 | downsample_rate=1, 31 | scale_factor=11, 32 | mean=[0.485, 0.456, 0.406], 33 | std=[0.229, 0.224, 0.225]): 34 | 35 | super(COCOStuff, self).__init__(ignore_label, base_size, 36 | crop_size, downsample_rate, scale_factor, mean, std) 37 | 38 | self.root = root 39 | self.num_classes = num_classes 40 | self.list_path = list_path 41 | self.class_weights = None 42 | 43 | self.multi_scale = multi_scale 44 | self.flip = flip 45 | self.crop_size = crop_size 46 | self.img_list = [line.strip().split() for line in open(root+list_path)] 47 | 48 | self.files = self.read_files() 49 | if num_samples: 50 | self.files = self.files[:num_samples] 51 | self.mapping = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 52 | 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 53 | 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 54 | 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 55 | 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 56 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 57 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 58 | 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 59 | 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 60 | 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 61 | 177, 178, 179, 180, 181, 182] 62 | 63 | def read_files(self): 64 | files = [] 65 | for item in self.img_list: 66 | image_path, label_path = item 67 | name = os.path.splitext(os.path.basename(label_path))[0] 68 | sample = { 69 | 'img': image_path, 70 | 'label': label_path, 71 | 'name': name 72 | } 73 | files.append(sample) 74 | return files 75 | 76 | def encode_label(self, labelmap): 77 | ret = np.ones_like(labelmap) * 255 78 | for idx, label in enumerate(self.mapping): 79 | ret[labelmap == label] = idx 80 | 81 | return ret 82 | 83 | def resize_image(self, image, label, size): 84 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR) 85 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST) 86 | return image, label 87 | 88 | def __getitem__(self, index): 89 | item = self.files[index] 90 | name = item["name"] 91 | image_path = os.path.join(self.root, item['img']) 92 | label_path = os.path.join(self.root, item['label']) 93 | image = cv2.imread( 94 | image_path, 95 | cv2.IMREAD_COLOR 96 | ) 97 | label = np.array( 98 | Image.open(label_path).convert('P') 99 | ) 100 | label = self.encode_label(label) 101 | label = self.reduce_zero_label(label) 102 | size = label.shape 103 | 104 | if 'testval' in self.list_path: 105 | image, border_padding = self.resize_short_length( 106 | image, 107 | short_length=self.base_size, 108 | fit_stride=8, 109 | return_padding=True 110 | ) 111 | image = self.input_transform(image) 112 | image = image.transpose((2, 0, 1)) 113 | 114 | return image.copy(), label.copy(), np.array(size), name, border_padding 115 | 116 | if 'val' in self.list_path: 117 | image, label = self.resize_short_length( 118 | image, 119 | label=label, 120 | short_length=self.base_size, 121 | fit_stride=8 122 | ) 123 | image, label = self.rand_crop(image, label) 124 | image = self.input_transform(image) 125 | image = image.transpose((2, 0, 1)) 126 | 127 | return image.copy(), label.copy(), np.array(size), name 128 | 129 | image, label = self.resize_short_length(image, label, short_length=self.base_size) 130 | image, label = self.gen_sample(image, label, self.multi_scale, self.flip) 131 | 132 | return image.copy(), label.copy(), np.array(size), name -------------------------------------------------------------------------------- /lib/datasets/lip.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import os 8 | 9 | import cv2 10 | import numpy as np 11 | 12 | import torch 13 | from torch.nn import functional as F 14 | from PIL import Image 15 | 16 | from .base_dataset import BaseDataset 17 | 18 | 19 | class LIP(BaseDataset): 20 | def __init__(self, 21 | root, 22 | list_path, 23 | num_samples=None, 24 | num_classes=20, 25 | multi_scale=True, 26 | flip=True, 27 | ignore_label=-1, 28 | base_size=473, 29 | crop_size=(473, 473), 30 | downsample_rate=1, 31 | scale_factor=11, 32 | mean=[0.485, 0.456, 0.406], 33 | std=[0.229, 0.224, 0.225]): 34 | 35 | super(LIP, self).__init__(ignore_label, base_size, 36 | crop_size, downsample_rate, scale_factor, mean, std) 37 | 38 | self.root = root 39 | self.num_classes = num_classes 40 | self.list_path = list_path 41 | self.class_weights = None 42 | 43 | self.multi_scale = multi_scale 44 | self.flip = flip 45 | self.img_list = [line.strip().split() for line in open(root+list_path)] 46 | 47 | self.files = self.read_files() 48 | if num_samples: 49 | self.files = self.files[:num_samples] 50 | 51 | def read_files(self): 52 | files = [] 53 | for item in self.img_list: 54 | if 'train' in self.list_path: 55 | image_path, label_path, _ = item 56 | name = os.path.splitext(os.path.basename(label_path))[0] 57 | sample = {"img": image_path, 58 | "label": label_path, 59 | "name": name, } 60 | elif 'val' in self.list_path: 61 | image_path, label_path = item 62 | name = os.path.splitext(os.path.basename(label_path))[0] 63 | sample = {"img": image_path, 64 | "label": label_path, 65 | "name": name, } 66 | else: 67 | raise NotImplementedError('Unknown subset.') 68 | files.append(sample) 69 | return files 70 | 71 | def resize_image(self, image, label, size): 72 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR) 73 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST) 74 | return image, label 75 | 76 | def __getitem__(self, index): 77 | item = self.files[index] 78 | name = item["name"] 79 | image_path = os.path.join(self.root, item['img']) 80 | label_path = os.path.join(self.root, item['label']) 81 | image = cv2.imread( 82 | image_path, 83 | cv2.IMREAD_COLOR 84 | ) 85 | label = np.array( 86 | Image.open(label_path).convert('P') 87 | ) 88 | 89 | size = label.shape 90 | if 'testval' in self.list_path: 91 | image = cv2.resize(image, self.crop_size, 92 | interpolation=cv2.INTER_LINEAR) 93 | image = self.input_transform(image) 94 | image = image.transpose((2, 0, 1)) 95 | 96 | return image.copy(), label.copy(), np.array(size), name 97 | 98 | if self.flip: 99 | flip = np.random.choice(2) * 2 - 1 100 | image = image[:, ::flip, :] 101 | label = label[:, ::flip] 102 | 103 | if flip == -1: 104 | right_idx = [15, 17, 19] 105 | left_idx = [14, 16, 18] 106 | for i in range(0, 3): 107 | right_pos = np.where(label == right_idx[i]) 108 | left_pos = np.where(label == left_idx[i]) 109 | label[right_pos[0], right_pos[1]] = left_idx[i] 110 | label[left_pos[0], left_pos[1]] = right_idx[i] 111 | 112 | image, label = self.resize_image(image, label, self.crop_size) 113 | image, label = self.gen_sample(image, label, 114 | self.multi_scale, False) 115 | 116 | return image.copy(), label.copy(), np.array(size), name 117 | 118 | def inference(self, config, model, image, flip): 119 | size = image.size() 120 | pred = model(image) 121 | if config.MODEL.NUM_OUTPUTS > 1: 122 | pred = pred[config.TEST.OUTPUT_INDEX] 123 | 124 | pred = F.interpolate( 125 | input=pred, size=size[-2:], 126 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 127 | ) 128 | 129 | if flip: 130 | flip_img = image.numpy()[:, :, :, ::-1] 131 | flip_output = model(torch.from_numpy(flip_img.copy())) 132 | 133 | if config.MODEL.NUM_OUTPUTS > 1: 134 | flip_output = flip_output[config.TEST.OUTPUT_INDEX] 135 | 136 | flip_output = F.interpolate( 137 | input=flip_output, size=size[-2:], 138 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS 139 | ) 140 | 141 | flip_output = flip_output.cpu() 142 | flip_pred = flip_output.cpu().numpy().copy() 143 | flip_pred[:, 14, :, :] = flip_output[:, 15, :, :] 144 | flip_pred[:, 15, :, :] = flip_output[:, 14, :, :] 145 | flip_pred[:, 16, :, :] = flip_output[:, 17, :, :] 146 | flip_pred[:, 17, :, :] = flip_output[:, 16, :, :] 147 | flip_pred[:, 18, :, :] = flip_output[:, 19, :, :] 148 | flip_pred[:, 19, :, :] = flip_output[:, 18, :, :] 149 | flip_pred = torch.from_numpy( 150 | flip_pred[:, :, :, ::-1].copy()).cuda() 151 | pred += flip_pred 152 | pred = pred * 0.5 153 | return pred.exp() 154 | -------------------------------------------------------------------------------- /lib/datasets/pascal_ctx.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # Referring to the implementation in 6 | # https://github.com/zhanghang1989/PyTorch-Encoding 7 | # ------------------------------------------------------------------------------ 8 | 9 | import os 10 | 11 | import cv2 12 | import numpy as np 13 | 14 | import torch 15 | from torch.nn import functional as F 16 | from PIL import Image 17 | 18 | from .base_dataset import BaseDataset 19 | 20 | class PASCALContext(BaseDataset): 21 | def __init__(self, 22 | root, 23 | list_path, 24 | num_samples=None, 25 | num_classes=59, 26 | multi_scale=True, 27 | flip=True, 28 | ignore_label=-1, 29 | base_size=520, 30 | crop_size=(480, 480), 31 | downsample_rate=1, 32 | scale_factor=16, 33 | mean=[0.485, 0.456, 0.406], 34 | std=[0.229, 0.224, 0.225]): 35 | 36 | super(PASCALContext, self).__init__(ignore_label, base_size, 37 | crop_size, downsample_rate, scale_factor, mean, std) 38 | 39 | self.root = root 40 | self.num_classes = num_classes 41 | self.list_path = list_path 42 | self.class_weights = None 43 | 44 | self.multi_scale = multi_scale 45 | self.flip = flip 46 | self.crop_size = crop_size 47 | self.img_list = [line.strip().split() for line in open(root+list_path)] 48 | 49 | self.files = self.read_files() 50 | if num_samples: 51 | self.files = self.files[:num_samples] 52 | 53 | def read_files(self): 54 | files = [] 55 | for item in self.img_list: 56 | image_path, label_path = item 57 | name = os.path.splitext(os.path.basename(label_path))[0] 58 | sample = { 59 | 'img': image_path, 60 | 'label': label_path, 61 | 'name': name 62 | } 63 | files.append(sample) 64 | return files 65 | 66 | def resize_image(self, image, label, size): 67 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR) 68 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST) 69 | return image, label 70 | 71 | def __getitem__(self, index): 72 | item = self.files[index] 73 | name = item["name"] 74 | image_path = os.path.join(self.root, item['img']) 75 | label_path = os.path.join(self.root, item['label']) 76 | image = cv2.imread( 77 | image_path, 78 | cv2.IMREAD_COLOR 79 | ) 80 | label = np.array( 81 | Image.open(label_path).convert('P') 82 | ) 83 | if self.num_classes == 59: 84 | label = self.reduce_zero_label(label) 85 | size = label.shape 86 | 87 | if 'testval' in self.list_path: 88 | image, border_padding = self.resize_short_length( 89 | image, 90 | short_length=self.base_size, 91 | fit_stride=8, 92 | return_padding=True 93 | ) 94 | image = self.input_transform(image) 95 | image = image.transpose((2, 0, 1)) 96 | 97 | return image.copy(), label.copy(), np.array(size), name, border_padding 98 | 99 | if 'val' in self.list_path: 100 | image, label = self.resize_short_length( 101 | image, 102 | label=label, 103 | short_length=self.base_size, 104 | fit_stride=8 105 | ) 106 | image, label = self.rand_crop(image, label) 107 | image = self.input_transform(image) 108 | image = image.transpose((2, 0, 1)) 109 | 110 | return image.copy(), label.copy(), np.array(size), name 111 | 112 | image, label = self.resize_short_length(image, label, short_length=self.base_size) 113 | image, label = self.gen_sample(image, label, self.multi_scale, self.flip) 114 | 115 | return image.copy(), label.copy(), np.array(size), name -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import models.seg_hrnet 12 | import models.seg_hrnet_ocr -------------------------------------------------------------------------------- /lib/models/bn_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import functools 3 | 4 | if torch.__version__.startswith('0'): 5 | from .sync_bn.inplace_abn.bn import InPlaceABNSync 6 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') 7 | BatchNorm2d_class = InPlaceABNSync 8 | relu_inplace = False 9 | else: 10 | BatchNorm2d_class = BatchNorm2d = torch.nn.SyncBatchNorm 11 | relu_inplace = True -------------------------------------------------------------------------------- /lib/models/sync_bn/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | BSD 3-Clause License 3 | 4 | Copyright (c) 2017, mapillary 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /lib/models/sync_bn/__init__.py: -------------------------------------------------------------------------------- 1 | from .inplace_abn import bn -------------------------------------------------------------------------------- /lib/models/sync_bn/inplace_abn/__init__.py: -------------------------------------------------------------------------------- 1 | from .bn import ABN, InPlaceABN, InPlaceABNSync 2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE 3 | -------------------------------------------------------------------------------- /lib/models/sync_bn/inplace_abn/bn.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as functional 5 | 6 | try: 7 | from queue import Queue 8 | except ImportError: 9 | from Queue import Queue 10 | 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | sys.path.append(BASE_DIR) 13 | sys.path.append(os.path.join(BASE_DIR, '../src')) 14 | from functions import * 15 | 16 | 17 | class ABN(nn.Module): 18 | """Activated Batch Normalization 19 | 20 | This gathers a `BatchNorm2d` and an activation function in a single module 21 | """ 22 | 23 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 24 | """Creates an Activated Batch Normalization module 25 | 26 | Parameters 27 | ---------- 28 | num_features : int 29 | Number of feature channels in the input and output. 30 | eps : float 31 | Small constant to prevent numerical issues. 32 | momentum : float 33 | Momentum factor applied to compute running statistics as. 34 | affine : bool 35 | If `True` apply learned scale and shift transformation after normalization. 36 | activation : str 37 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 38 | slope : float 39 | Negative slope for the `leaky_relu` activation. 40 | """ 41 | super(ABN, self).__init__() 42 | self.num_features = num_features 43 | self.affine = affine 44 | self.eps = eps 45 | self.momentum = momentum 46 | self.activation = activation 47 | self.slope = slope 48 | if self.affine: 49 | self.weight = nn.Parameter(torch.ones(num_features)) 50 | self.bias = nn.Parameter(torch.zeros(num_features)) 51 | else: 52 | self.register_parameter('weight', None) 53 | self.register_parameter('bias', None) 54 | self.register_buffer('running_mean', torch.zeros(num_features)) 55 | self.register_buffer('running_var', torch.ones(num_features)) 56 | self.reset_parameters() 57 | 58 | def reset_parameters(self): 59 | nn.init.constant_(self.running_mean, 0) 60 | nn.init.constant_(self.running_var, 1) 61 | if self.affine: 62 | nn.init.constant_(self.weight, 1) 63 | nn.init.constant_(self.bias, 0) 64 | 65 | def forward(self, x): 66 | x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, 67 | self.training, self.momentum, self.eps) 68 | 69 | if self.activation == ACT_RELU: 70 | return functional.relu(x, inplace=True) 71 | elif self.activation == ACT_LEAKY_RELU: 72 | return functional.leaky_relu(x, negative_slope=self.slope, inplace=True) 73 | elif self.activation == ACT_ELU: 74 | return functional.elu(x, inplace=True) 75 | else: 76 | return x 77 | 78 | def __repr__(self): 79 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 80 | ' affine={affine}, activation={activation}' 81 | if self.activation == "leaky_relu": 82 | rep += ', slope={slope})' 83 | else: 84 | rep += ')' 85 | return rep.format(name=self.__class__.__name__, **self.__dict__) 86 | 87 | 88 | class InPlaceABN(ABN): 89 | """InPlace Activated Batch Normalization""" 90 | 91 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 92 | """Creates an InPlace Activated Batch Normalization module 93 | 94 | Parameters 95 | ---------- 96 | num_features : int 97 | Number of feature channels in the input and output. 98 | eps : float 99 | Small constant to prevent numerical issues. 100 | momentum : float 101 | Momentum factor applied to compute running statistics as. 102 | affine : bool 103 | If `True` apply learned scale and shift transformation after normalization. 104 | activation : str 105 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 106 | slope : float 107 | Negative slope for the `leaky_relu` activation. 108 | """ 109 | super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope) 110 | 111 | def forward(self, x): 112 | return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var, 113 | self.training, self.momentum, self.eps, self.activation, self.slope) 114 | 115 | 116 | class InPlaceABNSync(ABN): 117 | """InPlace Activated Batch Normalization with cross-GPU synchronization 118 | 119 | This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DataParallel`. 120 | """ 121 | 122 | def __init__(self, num_features, devices=None, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", 123 | slope=0.01): 124 | """Creates a synchronized, InPlace Activated Batch Normalization module 125 | 126 | Parameters 127 | ---------- 128 | num_features : int 129 | Number of feature channels in the input and output. 130 | devices : list of int or None 131 | IDs of the GPUs that will run the replicas of this module. 132 | eps : float 133 | Small constant to prevent numerical issues. 134 | momentum : float 135 | Momentum factor applied to compute running statistics as. 136 | affine : bool 137 | If `True` apply learned scale and shift transformation after normalization. 138 | activation : str 139 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 140 | slope : float 141 | Negative slope for the `leaky_relu` activation. 142 | """ 143 | super(InPlaceABNSync, self).__init__(num_features, eps, momentum, affine, activation, slope) 144 | self.devices = devices if devices else list(range(torch.cuda.device_count())) 145 | 146 | # Initialize queues 147 | self.worker_ids = self.devices[1:] 148 | self.master_queue = Queue(len(self.worker_ids)) 149 | self.worker_queues = [Queue(1) for _ in self.worker_ids] 150 | 151 | def forward(self, x): 152 | if x.get_device() == self.devices[0]: 153 | # Master mode 154 | extra = { 155 | "is_master": True, 156 | "master_queue": self.master_queue, 157 | "worker_queues": self.worker_queues, 158 | "worker_ids": self.worker_ids 159 | } 160 | else: 161 | # Worker mode 162 | extra = { 163 | "is_master": False, 164 | "master_queue": self.master_queue, 165 | "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())] 166 | } 167 | 168 | return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var, 169 | extra, self.training, self.momentum, self.eps, self.activation, self.slope) 170 | 171 | def __repr__(self): 172 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 173 | ' affine={affine}, devices={devices}, activation={activation}' 174 | if self.activation == "leaky_relu": 175 | rep += ', slope={slope})' 176 | else: 177 | rep += ')' 178 | return rep.format(name=self.__class__.__name__, **self.__dict__) 179 | -------------------------------------------------------------------------------- /lib/models/sync_bn/inplace_abn/functions.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | import torch.autograd as autograd 4 | import torch.cuda.comm as comm 5 | from torch.autograd.function import once_differentiable 6 | from torch.utils.cpp_extension import load 7 | 8 | _src_path = path.join(path.dirname(path.abspath(__file__)), "src") 9 | _backend = load(name="inplace_abn", 10 | extra_cflags=["-O3"], 11 | sources=[path.join(_src_path, f) for f in [ 12 | "inplace_abn.cpp", 13 | "inplace_abn_cpu.cpp", 14 | "inplace_abn_cuda.cu" 15 | ]], 16 | extra_cuda_cflags=["--expt-extended-lambda"]) 17 | 18 | # Activation names 19 | ACT_RELU = "relu" 20 | ACT_LEAKY_RELU = "leaky_relu" 21 | ACT_ELU = "elu" 22 | ACT_NONE = "none" 23 | 24 | 25 | def _check(fn, *args, **kwargs): 26 | success = fn(*args, **kwargs) 27 | if not success: 28 | raise RuntimeError("CUDA Error encountered in {}".format(fn)) 29 | 30 | 31 | def _broadcast_shape(x): 32 | out_size = [] 33 | for i, s in enumerate(x.size()): 34 | if i != 1: 35 | out_size.append(1) 36 | else: 37 | out_size.append(s) 38 | return out_size 39 | 40 | 41 | def _reduce(x): 42 | if len(x.size()) == 2: 43 | return x.sum(dim=0) 44 | else: 45 | n, c = x.size()[0:2] 46 | return x.contiguous().view((n, c, -1)).sum(2).sum(0) 47 | 48 | 49 | def _count_samples(x): 50 | count = 1 51 | for i, s in enumerate(x.size()): 52 | if i != 1: 53 | count *= s 54 | return count 55 | 56 | 57 | def _act_forward(ctx, x): 58 | if ctx.activation == ACT_LEAKY_RELU: 59 | _backend.leaky_relu_forward(x, ctx.slope) 60 | elif ctx.activation == ACT_ELU: 61 | _backend.elu_forward(x) 62 | elif ctx.activation == ACT_NONE: 63 | pass 64 | 65 | 66 | def _act_backward(ctx, x, dx): 67 | if ctx.activation == ACT_LEAKY_RELU: 68 | _backend.leaky_relu_backward(x, dx, ctx.slope) 69 | elif ctx.activation == ACT_ELU: 70 | _backend.elu_backward(x, dx) 71 | elif ctx.activation == ACT_NONE: 72 | pass 73 | 74 | 75 | class InPlaceABN(autograd.Function): 76 | @staticmethod 77 | def forward(ctx, x, weight, bias, running_mean, running_var, 78 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): 79 | # Save context 80 | ctx.training = training 81 | ctx.momentum = momentum 82 | ctx.eps = eps 83 | ctx.activation = activation 84 | ctx.slope = slope 85 | ctx.affine = weight is not None and bias is not None 86 | 87 | # Prepare inputs 88 | count = _count_samples(x) 89 | x = x.contiguous() 90 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 91 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 92 | 93 | if ctx.training: 94 | mean, var = _backend.mean_var(x) 95 | 96 | # Update running stats 97 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 98 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1)) 99 | 100 | # Mark in-place modified tensors 101 | ctx.mark_dirty(x, running_mean, running_var) 102 | else: 103 | mean, var = running_mean.contiguous(), running_var.contiguous() 104 | ctx.mark_dirty(x) 105 | 106 | # BN forward + activation 107 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 108 | _act_forward(ctx, x) 109 | 110 | # Output 111 | ctx.var = var 112 | ctx.save_for_backward(x, var, weight, bias) 113 | return x 114 | 115 | @staticmethod 116 | @once_differentiable 117 | def backward(ctx, dz): 118 | z, var, weight, bias = ctx.saved_tensors 119 | dz = dz.contiguous() 120 | 121 | # Undo activation 122 | _act_backward(ctx, z, dz) 123 | 124 | if ctx.training: 125 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 126 | else: 127 | # TODO: implement simplified CUDA backward for inference mode 128 | edz = dz.new_zeros(dz.size(1)) 129 | eydz = dz.new_zeros(dz.size(1)) 130 | 131 | dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 132 | dweight = dweight if ctx.affine else None 133 | dbias = dbias if ctx.affine else None 134 | 135 | return dx, dweight, dbias, None, None, None, None, None, None, None 136 | 137 | 138 | class InPlaceABNSync(autograd.Function): 139 | @classmethod 140 | def forward(cls, ctx, x, weight, bias, running_mean, running_var, 141 | extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): 142 | # Save context 143 | cls._parse_extra(ctx, extra) 144 | ctx.training = training 145 | ctx.momentum = momentum 146 | ctx.eps = eps 147 | ctx.activation = activation 148 | ctx.slope = slope 149 | ctx.affine = weight is not None and bias is not None 150 | 151 | # Prepare inputs 152 | count = _count_samples(x) * (ctx.master_queue.maxsize + 1) 153 | x = x.contiguous() 154 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 155 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 156 | 157 | if ctx.training: 158 | mean, var = _backend.mean_var(x) 159 | 160 | if ctx.is_master: 161 | means, vars = [mean.unsqueeze(0)], [var.unsqueeze(0)] 162 | for _ in range(ctx.master_queue.maxsize): 163 | mean_w, var_w = ctx.master_queue.get() 164 | ctx.master_queue.task_done() 165 | means.append(mean_w.unsqueeze(0)) 166 | vars.append(var_w.unsqueeze(0)) 167 | 168 | means = comm.gather(means) 169 | vars = comm.gather(vars) 170 | 171 | mean = means.mean(0) 172 | var = (vars + (mean - means) ** 2).mean(0) 173 | 174 | tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids) 175 | for ts, queue in zip(tensors[1:], ctx.worker_queues): 176 | queue.put(ts) 177 | else: 178 | ctx.master_queue.put((mean, var)) 179 | mean, var = ctx.worker_queue.get() 180 | ctx.worker_queue.task_done() 181 | 182 | # Update running stats 183 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 184 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1)) 185 | 186 | # Mark in-place modified tensors 187 | ctx.mark_dirty(x, running_mean, running_var) 188 | else: 189 | mean, var = running_mean.contiguous(), running_var.contiguous() 190 | ctx.mark_dirty(x) 191 | 192 | # BN forward + activation 193 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 194 | _act_forward(ctx, x) 195 | 196 | # Output 197 | ctx.var = var 198 | ctx.save_for_backward(x, var, weight, bias) 199 | return x 200 | 201 | @staticmethod 202 | @once_differentiable 203 | def backward(ctx, dz): 204 | z, var, weight, bias = ctx.saved_tensors 205 | dz = dz.contiguous() 206 | 207 | # Undo activation 208 | _act_backward(ctx, z, dz) 209 | 210 | if ctx.training: 211 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 212 | 213 | if ctx.is_master: 214 | edzs, eydzs = [edz], [eydz] 215 | for _ in range(len(ctx.worker_queues)): 216 | edz_w, eydz_w = ctx.master_queue.get() 217 | ctx.master_queue.task_done() 218 | edzs.append(edz_w) 219 | eydzs.append(eydz_w) 220 | 221 | edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1) 222 | eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1) 223 | 224 | tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids) 225 | for ts, queue in zip(tensors[1:], ctx.worker_queues): 226 | queue.put(ts) 227 | else: 228 | ctx.master_queue.put((edz, eydz)) 229 | edz, eydz = ctx.worker_queue.get() 230 | ctx.worker_queue.task_done() 231 | else: 232 | edz = dz.new_zeros(dz.size(1)) 233 | eydz = dz.new_zeros(dz.size(1)) 234 | 235 | dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 236 | dweight = dweight if ctx.affine else None 237 | dbias = dbias if ctx.affine else None 238 | 239 | return dx, dweight, dbias, None, None, None, None, None, None, None, None 240 | 241 | @staticmethod 242 | def _parse_extra(ctx, extra): 243 | ctx.is_master = extra["is_master"] 244 | if ctx.is_master: 245 | ctx.master_queue = extra["master_queue"] 246 | ctx.worker_queues = extra["worker_queues"] 247 | ctx.worker_ids = extra["worker_ids"] 248 | else: 249 | ctx.master_queue = extra["master_queue"] 250 | ctx.worker_queue = extra["worker_queue"] 251 | 252 | 253 | inplace_abn = InPlaceABN.apply 254 | inplace_abn_sync = InPlaceABNSync.apply 255 | 256 | __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"] 257 | -------------------------------------------------------------------------------- /lib/models/sync_bn/inplace_abn/src/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* 6 | * General settings 7 | */ 8 | const int WARP_SIZE = 32; 9 | const int MAX_BLOCK_SIZE = 512; 10 | 11 | template 12 | struct Pair { 13 | T v1, v2; 14 | __device__ Pair() {} 15 | __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {} 16 | __device__ Pair(T v) : v1(v), v2(v) {} 17 | __device__ Pair(int v) : v1(v), v2(v) {} 18 | __device__ Pair &operator+=(const Pair &a) { 19 | v1 += a.v1; 20 | v2 += a.v2; 21 | return *this; 22 | } 23 | }; 24 | 25 | /* 26 | * Utility functions 27 | */ 28 | template 29 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, 30 | unsigned int mask = 0xffffffff) { 31 | #if CUDART_VERSION >= 9000 32 | return __shfl_xor_sync(mask, value, laneMask, width); 33 | #else 34 | return __shfl_xor(value, laneMask, width); 35 | #endif 36 | } 37 | 38 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); } 39 | 40 | static int getNumThreads(int nElem) { 41 | int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE}; 42 | for (int i = 0; i != 5; ++i) { 43 | if (nElem <= threadSizes[i]) { 44 | return threadSizes[i]; 45 | } 46 | } 47 | return MAX_BLOCK_SIZE; 48 | } 49 | 50 | template 51 | static __device__ __forceinline__ T warpSum(T val) { 52 | #if __CUDA_ARCH__ >= 300 53 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) { 54 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); 55 | } 56 | #else 57 | __shared__ T values[MAX_BLOCK_SIZE]; 58 | values[threadIdx.x] = val; 59 | __threadfence_block(); 60 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; 61 | for (int i = 1; i < WARP_SIZE; i++) { 62 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; 63 | } 64 | #endif 65 | return val; 66 | } 67 | 68 | template 69 | static __device__ __forceinline__ Pair warpSum(Pair value) { 70 | value.v1 = warpSum(value.v1); 71 | value.v2 = warpSum(value.v2); 72 | return value; 73 | } 74 | 75 | template 76 | __device__ T reduce(Op op, int plane, int N, int C, int S) { 77 | T sum = (T)0; 78 | for (int batch = 0; batch < N; ++batch) { 79 | for (int x = threadIdx.x; x < S; x += blockDim.x) { 80 | sum += op(batch, plane, x); 81 | } 82 | } 83 | 84 | // sum over NumThreads within a warp 85 | sum = warpSum(sum); 86 | 87 | // 'transpose', and reduce within warp again 88 | __shared__ T shared[32]; 89 | __syncthreads(); 90 | if (threadIdx.x % WARP_SIZE == 0) { 91 | shared[threadIdx.x / WARP_SIZE] = sum; 92 | } 93 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 94 | // zero out the other entries in shared 95 | shared[threadIdx.x] = (T)0; 96 | } 97 | __syncthreads(); 98 | if (threadIdx.x / WARP_SIZE == 0) { 99 | sum = warpSum(shared[threadIdx.x]); 100 | if (threadIdx.x == 0) { 101 | shared[0] = sum; 102 | } 103 | } 104 | __syncthreads(); 105 | 106 | // Everyone picks it up, should be broadcast into the whole gradInput 107 | return shared[0]; 108 | } -------------------------------------------------------------------------------- /lib/models/sync_bn/inplace_abn/src/inplace_abn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | std::vector mean_var(at::Tensor x) { 8 | if (x.is_cuda()) { 9 | return mean_var_cuda(x); 10 | } else { 11 | return mean_var_cpu(x); 12 | } 13 | } 14 | 15 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 16 | bool affine, float eps) { 17 | if (x.is_cuda()) { 18 | return forward_cuda(x, mean, var, weight, bias, affine, eps); 19 | } else { 20 | return forward_cpu(x, mean, var, weight, bias, affine, eps); 21 | } 22 | } 23 | 24 | std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 25 | bool affine, float eps) { 26 | if (z.is_cuda()) { 27 | return edz_eydz_cuda(z, dz, weight, bias, affine, eps); 28 | } else { 29 | return edz_eydz_cpu(z, dz, weight, bias, affine, eps); 30 | } 31 | } 32 | 33 | std::vector backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 34 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 35 | if (z.is_cuda()) { 36 | return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps); 37 | } else { 38 | return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps); 39 | } 40 | } 41 | 42 | void leaky_relu_forward(at::Tensor z, float slope) { 43 | at::leaky_relu_(z, slope); 44 | } 45 | 46 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) { 47 | if (z.is_cuda()) { 48 | return leaky_relu_backward_cuda(z, dz, slope); 49 | } else { 50 | return leaky_relu_backward_cpu(z, dz, slope); 51 | } 52 | } 53 | 54 | void elu_forward(at::Tensor z) { 55 | at::elu_(z); 56 | } 57 | 58 | void elu_backward(at::Tensor z, at::Tensor dz) { 59 | if (z.is_cuda()) { 60 | return elu_backward_cuda(z, dz); 61 | } else { 62 | return elu_backward_cpu(z, dz); 63 | } 64 | } 65 | 66 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 67 | m.def("mean_var", &mean_var, "Mean and variance computation"); 68 | m.def("forward", &forward, "In-place forward computation"); 69 | m.def("edz_eydz", &edz_eydz, "First part of backward computation"); 70 | m.def("backward", &backward, "Second part of backward computation"); 71 | m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation"); 72 | m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion"); 73 | m.def("elu_forward", &elu_forward, "Elu forward computation"); 74 | m.def("elu_backward", &elu_backward, "Elu backward computation and inversion"); 75 | } -------------------------------------------------------------------------------- /lib/models/sync_bn/inplace_abn/src/inplace_abn.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | std::vector mean_var_cpu(at::Tensor x); 8 | std::vector mean_var_cuda(at::Tensor x); 9 | 10 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 11 | bool affine, float eps); 12 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 13 | bool affine, float eps); 14 | 15 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 16 | bool affine, float eps); 17 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 18 | bool affine, float eps); 19 | 20 | std::vector backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 21 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 22 | std::vector backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 23 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 24 | 25 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope); 26 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope); 27 | 28 | void elu_backward_cpu(at::Tensor z, at::Tensor dz); 29 | void elu_backward_cuda(at::Tensor z, at::Tensor dz); -------------------------------------------------------------------------------- /lib/models/sync_bn/inplace_abn/src/inplace_abn_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | at::Tensor reduce_sum(at::Tensor x) { 8 | if (x.ndimension() == 2) { 9 | return x.sum(0); 10 | } else { 11 | auto x_view = x.view({x.size(0), x.size(1), -1}); 12 | return x_view.sum(-1).sum(0); 13 | } 14 | } 15 | 16 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { 17 | if (x.ndimension() == 2) { 18 | return v; 19 | } else { 20 | std::vector broadcast_size = {1, -1}; 21 | for (int64_t i = 2; i < x.ndimension(); ++i) 22 | broadcast_size.push_back(1); 23 | 24 | return v.view(broadcast_size); 25 | } 26 | } 27 | 28 | int64_t count(at::Tensor x) { 29 | int64_t count = x.size(0); 30 | for (int64_t i = 2; i < x.ndimension(); ++i) 31 | count *= x.size(i); 32 | 33 | return count; 34 | } 35 | 36 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) { 37 | if (affine) { 38 | return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z); 39 | } else { 40 | return z; 41 | } 42 | } 43 | 44 | std::vector mean_var_cpu(at::Tensor x) { 45 | auto num = count(x); 46 | auto mean = reduce_sum(x) / num; 47 | auto diff = x - broadcast_to(mean, x); 48 | auto var = reduce_sum(diff.pow(2)) / num; 49 | 50 | return {mean, var}; 51 | } 52 | 53 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 54 | bool affine, float eps) { 55 | auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var); 56 | auto mul = at::rsqrt(var + eps) * gamma; 57 | 58 | x.sub_(broadcast_to(mean, x)); 59 | x.mul_(broadcast_to(mul, x)); 60 | if (affine) x.add_(broadcast_to(bias, x)); 61 | 62 | return x; 63 | } 64 | 65 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 66 | bool affine, float eps) { 67 | auto edz = reduce_sum(dz); 68 | auto y = invert_affine(z, weight, bias, affine, eps); 69 | auto eydz = reduce_sum(y * dz); 70 | 71 | return {edz, eydz}; 72 | } 73 | 74 | std::vector backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 75 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 76 | auto y = invert_affine(z, weight, bias, affine, eps); 77 | auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps); 78 | 79 | auto num = count(z); 80 | auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz); 81 | 82 | auto dweight = at::empty(z.type(), {0}); 83 | auto dbias = at::empty(z.type(), {0}); 84 | if (affine) { 85 | dweight = eydz * at::sign(weight); 86 | dbias = edz; 87 | } 88 | 89 | return {dx, dweight, dbias}; 90 | } 91 | 92 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) { 93 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] { 94 | int64_t count = z.numel(); 95 | auto *_z = z.data(); 96 | auto *_dz = dz.data(); 97 | 98 | for (int64_t i = 0; i < count; ++i) { 99 | if (_z[i] < 0) { 100 | _z[i] *= 1 / slope; 101 | _dz[i] *= slope; 102 | } 103 | } 104 | })); 105 | } 106 | 107 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) { 108 | AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] { 109 | int64_t count = z.numel(); 110 | auto *_z = z.data(); 111 | auto *_dz = dz.data(); 112 | 113 | for (int64_t i = 0; i < count; ++i) { 114 | if (_z[i] < 0) { 115 | _z[i] = log1p(_z[i]); 116 | _dz[i] *= (_z[i] + 1.f); 117 | } 118 | } 119 | })); 120 | } -------------------------------------------------------------------------------- /lib/models/sync_bn/inplace_abn/src/inplace_abn_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "common.h" 9 | #include "inplace_abn.h" 10 | 11 | // Checks 12 | #ifndef AT_CHECK 13 | #define AT_CHECK AT_ASSERT 14 | #endif 15 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 16 | #define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x " must be contiguous") 17 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 18 | 19 | // Utilities 20 | void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) { 21 | num = x.size(0); 22 | chn = x.size(1); 23 | sp = 1; 24 | for (int64_t i = 2; i < x.ndimension(); ++i) 25 | sp *= x.size(i); 26 | } 27 | 28 | // Operations for reduce 29 | template 30 | struct SumOp { 31 | __device__ SumOp(const T *t, int c, int s) 32 | : tensor(t), chn(c), sp(s) {} 33 | __device__ __forceinline__ T operator()(int batch, int plane, int n) { 34 | return tensor[(batch * chn + plane) * sp + n]; 35 | } 36 | const T *tensor; 37 | const int chn; 38 | const int sp; 39 | }; 40 | 41 | template 42 | struct VarOp { 43 | __device__ VarOp(T m, const T *t, int c, int s) 44 | : mean(m), tensor(t), chn(c), sp(s) {} 45 | __device__ __forceinline__ T operator()(int batch, int plane, int n) { 46 | T val = tensor[(batch * chn + plane) * sp + n]; 47 | return (val - mean) * (val - mean); 48 | } 49 | const T mean; 50 | const T *tensor; 51 | const int chn; 52 | const int sp; 53 | }; 54 | 55 | template 56 | struct GradOp { 57 | __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s) 58 | : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} 59 | __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { 60 | T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight; 61 | T _dz = dz[(batch * chn + plane) * sp + n]; 62 | return Pair(_dz, _y * _dz); 63 | } 64 | const T weight; 65 | const T bias; 66 | const T *z; 67 | const T *dz; 68 | const int chn; 69 | const int sp; 70 | }; 71 | 72 | /*********** 73 | * mean_var 74 | ***********/ 75 | 76 | template 77 | __global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) { 78 | int plane = blockIdx.x; 79 | T norm = T(1) / T(num * sp); 80 | 81 | T _mean = reduce>(SumOp(x, chn, sp), plane, num, chn, sp) * norm; 82 | __syncthreads(); 83 | T _var = reduce>(VarOp(_mean, x, chn, sp), plane, num, chn, sp) * norm; 84 | 85 | if (threadIdx.x == 0) { 86 | mean[plane] = _mean; 87 | var[plane] = _var; 88 | } 89 | } 90 | 91 | std::vector mean_var_cuda(at::Tensor x) { 92 | CHECK_INPUT(x); 93 | 94 | // Extract dimensions 95 | int64_t num, chn, sp; 96 | get_dims(x, num, chn, sp); 97 | 98 | // Prepare output tensors 99 | auto mean = at::empty(x.type(), {chn}); 100 | auto var = at::empty(x.type(), {chn}); 101 | 102 | // Run kernel 103 | dim3 blocks(chn); 104 | dim3 threads(getNumThreads(sp)); 105 | AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] { 106 | mean_var_kernel<<>>( 107 | x.data(), 108 | mean.data(), 109 | var.data(), 110 | num, chn, sp); 111 | })); 112 | 113 | return {mean, var}; 114 | } 115 | 116 | /********** 117 | * forward 118 | **********/ 119 | 120 | template 121 | __global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias, 122 | bool affine, float eps, int num, int chn, int sp) { 123 | int plane = blockIdx.x; 124 | 125 | T _mean = mean[plane]; 126 | T _var = var[plane]; 127 | T _weight = affine ? abs(weight[plane]) + eps : T(1); 128 | T _bias = affine ? bias[plane] : T(0); 129 | 130 | T mul = rsqrt(_var + eps) * _weight; 131 | 132 | for (int batch = 0; batch < num; ++batch) { 133 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 134 | T _x = x[(batch * chn + plane) * sp + n]; 135 | T _y = (_x - _mean) * mul + _bias; 136 | 137 | x[(batch * chn + plane) * sp + n] = _y; 138 | } 139 | } 140 | } 141 | 142 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 143 | bool affine, float eps) { 144 | CHECK_INPUT(x); 145 | CHECK_INPUT(mean); 146 | CHECK_INPUT(var); 147 | CHECK_INPUT(weight); 148 | CHECK_INPUT(bias); 149 | 150 | // Extract dimensions 151 | int64_t num, chn, sp; 152 | get_dims(x, num, chn, sp); 153 | 154 | // Run kernel 155 | dim3 blocks(chn); 156 | dim3 threads(getNumThreads(sp)); 157 | AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] { 158 | forward_kernel<<>>( 159 | x.data(), 160 | mean.data(), 161 | var.data(), 162 | weight.data(), 163 | bias.data(), 164 | affine, eps, num, chn, sp); 165 | })); 166 | 167 | return x; 168 | } 169 | 170 | /*********** 171 | * edz_eydz 172 | ***********/ 173 | 174 | template 175 | __global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias, 176 | T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) { 177 | int plane = blockIdx.x; 178 | 179 | T _weight = affine ? abs(weight[plane]) + eps : 1.f; 180 | T _bias = affine ? bias[plane] : 0.f; 181 | 182 | Pair res = reduce, GradOp>(GradOp(_weight, _bias, z, dz, chn, sp), plane, num, chn, sp); 183 | __syncthreads(); 184 | 185 | if (threadIdx.x == 0) { 186 | edz[plane] = res.v1; 187 | eydz[plane] = res.v2; 188 | } 189 | } 190 | 191 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 192 | bool affine, float eps) { 193 | CHECK_INPUT(z); 194 | CHECK_INPUT(dz); 195 | CHECK_INPUT(weight); 196 | CHECK_INPUT(bias); 197 | 198 | // Extract dimensions 199 | int64_t num, chn, sp; 200 | get_dims(z, num, chn, sp); 201 | 202 | auto edz = at::empty(z.type(), {chn}); 203 | auto eydz = at::empty(z.type(), {chn}); 204 | 205 | // Run kernel 206 | dim3 blocks(chn); 207 | dim3 threads(getNumThreads(sp)); 208 | AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] { 209 | edz_eydz_kernel<<>>( 210 | z.data(), 211 | dz.data(), 212 | weight.data(), 213 | bias.data(), 214 | edz.data(), 215 | eydz.data(), 216 | affine, eps, num, chn, sp); 217 | })); 218 | 219 | return {edz, eydz}; 220 | } 221 | 222 | /*********** 223 | * backward 224 | ***********/ 225 | 226 | template 227 | __global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz, 228 | const T *eydz, T *dx, T *dweight, T *dbias, 229 | bool affine, float eps, int num, int chn, int sp) { 230 | int plane = blockIdx.x; 231 | 232 | T _weight = affine ? abs(weight[plane]) + eps : 1.f; 233 | T _bias = affine ? bias[plane] : 0.f; 234 | T _var = var[plane]; 235 | T _edz = edz[plane]; 236 | T _eydz = eydz[plane]; 237 | 238 | T _mul = _weight * rsqrt(_var + eps); 239 | T count = T(num * sp); 240 | 241 | for (int batch = 0; batch < num; ++batch) { 242 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 243 | T _dz = dz[(batch * chn + plane) * sp + n]; 244 | T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight; 245 | 246 | dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul; 247 | } 248 | } 249 | 250 | if (threadIdx.x == 0) { 251 | if (affine) { 252 | dweight[plane] = weight[plane] > 0 ? _eydz : -_eydz; 253 | dbias[plane] = _edz; 254 | } 255 | } 256 | } 257 | 258 | std::vector backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 259 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 260 | CHECK_INPUT(z); 261 | CHECK_INPUT(dz); 262 | CHECK_INPUT(var); 263 | CHECK_INPUT(weight); 264 | CHECK_INPUT(bias); 265 | CHECK_INPUT(edz); 266 | CHECK_INPUT(eydz); 267 | 268 | // Extract dimensions 269 | int64_t num, chn, sp; 270 | get_dims(z, num, chn, sp); 271 | 272 | auto dx = at::zeros_like(z); 273 | auto dweight = at::zeros_like(weight); 274 | auto dbias = at::zeros_like(bias); 275 | 276 | // Run kernel 277 | dim3 blocks(chn); 278 | dim3 threads(getNumThreads(sp)); 279 | AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] { 280 | backward_kernel<<>>( 281 | z.data(), 282 | dz.data(), 283 | var.data(), 284 | weight.data(), 285 | bias.data(), 286 | edz.data(), 287 | eydz.data(), 288 | dx.data(), 289 | dweight.data(), 290 | dbias.data(), 291 | affine, eps, num, chn, sp); 292 | })); 293 | 294 | return {dx, dweight, dbias}; 295 | } 296 | 297 | /************** 298 | * activations 299 | **************/ 300 | 301 | template 302 | inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) { 303 | // Create thrust pointers 304 | thrust::device_ptr th_z = thrust::device_pointer_cast(z); 305 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); 306 | 307 | thrust::transform_if(th_dz, th_dz + count, th_z, th_dz, 308 | [slope] __device__ (const T& dz) { return dz * slope; }, 309 | [] __device__ (const T& z) { return z < 0; }); 310 | thrust::transform_if(th_z, th_z + count, th_z, 311 | [slope] __device__ (const T& z) { return z / slope; }, 312 | [] __device__ (const T& z) { return z < 0; }); 313 | } 314 | 315 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) { 316 | CHECK_INPUT(z); 317 | CHECK_INPUT(dz); 318 | 319 | int64_t count = z.numel(); 320 | 321 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { 322 | leaky_relu_backward_impl(z.data(), dz.data(), slope, count); 323 | })); 324 | } 325 | 326 | template 327 | inline void elu_backward_impl(T *z, T *dz, int64_t count) { 328 | // Create thrust pointers 329 | thrust::device_ptr th_z = thrust::device_pointer_cast(z); 330 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); 331 | 332 | thrust::transform_if(th_dz, th_dz + count, th_z, th_z, th_dz, 333 | [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); }, 334 | [] __device__ (const T& z) { return z < 0; }); 335 | thrust::transform_if(th_z, th_z + count, th_z, 336 | [] __device__ (const T& z) { return log1p(z); }, 337 | [] __device__ (const T& z) { return z < 0; }); 338 | } 339 | 340 | void elu_backward_cuda(at::Tensor z, at::Tensor dz) { 341 | CHECK_INPUT(z); 342 | CHECK_INPUT(dz); 343 | 344 | int64_t count = z.numel(); 345 | 346 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { 347 | elu_backward_impl(z.data(), dz.data(), count); 348 | })); 349 | } 350 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Jingyi Xie (hsfzxjy@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import torch 8 | import torch.distributed as torch_dist 9 | 10 | def is_distributed(): 11 | return torch_dist.is_initialized() 12 | 13 | def get_world_size(): 14 | if not torch_dist.is_initialized(): 15 | return 1 16 | return torch_dist.get_world_size() 17 | 18 | def get_rank(): 19 | if not torch_dist.is_initialized(): 20 | return 0 21 | return torch_dist.get_rank() -------------------------------------------------------------------------------- /lib/utils/modelsummary.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | import logging 14 | from collections import namedtuple 15 | 16 | import torch 17 | import torch.nn as nn 18 | 19 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False): 20 | """ 21 | :param model: 22 | :param input_tensors: 23 | :param item_length: 24 | :return: 25 | """ 26 | 27 | summary = [] 28 | 29 | ModuleDetails = namedtuple( 30 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"]) 31 | hooks = [] 32 | layer_instances = {} 33 | 34 | def add_hooks(module): 35 | 36 | def hook(module, input, output): 37 | class_name = str(module.__class__.__name__) 38 | 39 | instance_index = 1 40 | if class_name not in layer_instances: 41 | layer_instances[class_name] = instance_index 42 | else: 43 | instance_index = layer_instances[class_name] + 1 44 | layer_instances[class_name] = instance_index 45 | 46 | layer_name = class_name + "_" + str(instance_index) 47 | 48 | params = 0 49 | 50 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \ 51 | class_name.find("Linear") != -1: 52 | for param_ in module.parameters(): 53 | params += param_.view(-1).size(0) 54 | 55 | flops = "Not Available" 56 | if class_name.find("Conv") != -1 and hasattr(module, "weight"): 57 | flops = ( 58 | torch.prod( 59 | torch.LongTensor(list(module.weight.data.size()))) * 60 | torch.prod( 61 | torch.LongTensor(list(output.size())[2:]))).item() 62 | elif isinstance(module, nn.Linear): 63 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \ 64 | * input[0].size(1)).item() 65 | 66 | if isinstance(input[0], list): 67 | input = input[0] 68 | if isinstance(output, list): 69 | output = output[0] 70 | 71 | summary.append( 72 | ModuleDetails( 73 | name=layer_name, 74 | input_size=list(input[0].size()), 75 | output_size=list(output.size()), 76 | num_parameters=params, 77 | multiply_adds=flops) 78 | ) 79 | 80 | if not isinstance(module, nn.ModuleList) \ 81 | and not isinstance(module, nn.Sequential) \ 82 | and module != model: 83 | hooks.append(module.register_forward_hook(hook)) 84 | 85 | model.eval() 86 | model.apply(add_hooks) 87 | 88 | space_len = item_length 89 | 90 | model(*input_tensors) 91 | for hook in hooks: 92 | hook.remove() 93 | 94 | details = '' 95 | if verbose: 96 | details = "Model Summary" + \ 97 | os.linesep + \ 98 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format( 99 | ' ' * (space_len - len("Name")), 100 | ' ' * (space_len - len("Input Size")), 101 | ' ' * (space_len - len("Output Size")), 102 | ' ' * (space_len - len("Parameters")), 103 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \ 104 | + os.linesep + '-' * space_len * 5 + os.linesep 105 | 106 | params_sum = 0 107 | flops_sum = 0 108 | for layer in summary: 109 | params_sum += layer.num_parameters 110 | if layer.multiply_adds != "Not Available": 111 | flops_sum += layer.multiply_adds 112 | if verbose: 113 | details += "{}{}{}{}{}{}{}{}{}{}".format( 114 | layer.name, 115 | ' ' * (space_len - len(layer.name)), 116 | layer.input_size, 117 | ' ' * (space_len - len(str(layer.input_size))), 118 | layer.output_size, 119 | ' ' * (space_len - len(str(layer.output_size))), 120 | layer.num_parameters, 121 | ' ' * (space_len - len(str(layer.num_parameters))), 122 | layer.multiply_adds, 123 | ' ' * (space_len - len(str(layer.multiply_adds)))) \ 124 | + os.linesep + '-' * space_len * 5 + os.linesep 125 | 126 | details += os.linesep \ 127 | + "Total Parameters: {:,}".format(params_sum) \ 128 | + os.linesep + '-' * space_len * 5 + os.linesep 129 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \ 130 | + os.linesep + '-' * space_len * 5 + os.linesep 131 | details += "Number of Layers" + os.linesep 132 | for layer in layer_instances: 133 | details += "{} : {} layers ".format(layer, layer_instances[layer]) 134 | 135 | return details -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | import time 14 | from pathlib import Path 15 | 16 | import numpy as np 17 | 18 | import torch 19 | import torch.nn as nn 20 | 21 | class FullModel(nn.Module): 22 | """ 23 | Distribute the loss on multi-gpu to reduce 24 | the memory cost in the main gpu. 25 | You can check the following discussion. 26 | https://discuss.pytorch.org/t/dataparallel-imbalanced-memory-usage/22551/21 27 | """ 28 | def __init__(self, model, loss): 29 | super(FullModel, self).__init__() 30 | self.model = model 31 | self.loss = loss 32 | 33 | def forward(self, inputs, labels, *args, **kwargs): 34 | outputs = self.model(inputs, *args, **kwargs) 35 | loss = self.loss(outputs, labels) 36 | return torch.unsqueeze(loss,0), outputs 37 | 38 | class AverageMeter(object): 39 | """Computes and stores the average and current value""" 40 | 41 | def __init__(self): 42 | self.initialized = False 43 | self.val = None 44 | self.avg = None 45 | self.sum = None 46 | self.count = None 47 | 48 | def initialize(self, val, weight): 49 | self.val = val 50 | self.avg = val 51 | self.sum = val * weight 52 | self.count = weight 53 | self.initialized = True 54 | 55 | def update(self, val, weight=1): 56 | if not self.initialized: 57 | self.initialize(val, weight) 58 | else: 59 | self.add(val, weight) 60 | 61 | def add(self, val, weight): 62 | self.val = val 63 | self.sum += val * weight 64 | self.count += weight 65 | self.avg = self.sum / self.count 66 | 67 | def value(self): 68 | return self.val 69 | 70 | def average(self): 71 | return self.avg 72 | 73 | def create_logger(cfg, cfg_name, phase='train'): 74 | root_output_dir = Path(cfg.OUTPUT_DIR) 75 | # set up logger 76 | if not root_output_dir.exists(): 77 | print('=> creating {}'.format(root_output_dir)) 78 | root_output_dir.mkdir() 79 | 80 | dataset = cfg.DATASET.DATASET 81 | model = cfg.MODEL.NAME 82 | cfg_name = os.path.basename(cfg_name).split('.')[0] 83 | 84 | final_output_dir = root_output_dir / dataset / cfg_name 85 | 86 | print('=> creating {}'.format(final_output_dir)) 87 | final_output_dir.mkdir(parents=True, exist_ok=True) 88 | 89 | time_str = time.strftime('%Y-%m-%d-%H-%M') 90 | log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase) 91 | final_log_file = final_output_dir / log_file 92 | head = '%(asctime)-15s %(message)s' 93 | logging.basicConfig(filename=str(final_log_file), 94 | format=head) 95 | logger = logging.getLogger() 96 | logger.setLevel(logging.INFO) 97 | console = logging.StreamHandler() 98 | logging.getLogger('').addHandler(console) 99 | 100 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ 101 | (cfg_name + '_' + time_str) 102 | print('=> creating {}'.format(tensorboard_log_dir)) 103 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 104 | 105 | return logger, str(final_output_dir), str(tensorboard_log_dir) 106 | 107 | def get_confusion_matrix(label, pred, size, num_class, ignore=-1): 108 | """ 109 | Calcute the confusion matrix by given label and pred 110 | """ 111 | output = pred.cpu().numpy().transpose(0, 2, 3, 1) 112 | seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8) 113 | seg_gt = np.asarray( 114 | label.cpu().numpy()[:, :size[-2], :size[-1]], dtype=np.int) 115 | 116 | ignore_index = seg_gt != ignore 117 | seg_gt = seg_gt[ignore_index] 118 | seg_pred = seg_pred[ignore_index] 119 | 120 | index = (seg_gt * num_class + seg_pred).astype('int32') 121 | label_count = np.bincount(index) 122 | confusion_matrix = np.zeros((num_class, num_class)) 123 | 124 | for i_label in range(num_class): 125 | for i_pred in range(num_class): 126 | cur_index = i_label * num_class + i_pred 127 | if cur_index < len(label_count): 128 | confusion_matrix[i_label, 129 | i_pred] = label_count[cur_index] 130 | return confusion_matrix 131 | 132 | def adjust_learning_rate(optimizer, base_lr, max_iters, 133 | cur_iters, power=0.9, nbb_mult=10): 134 | lr = base_lr*((1-float(cur_iters)/max_iters)**(power)) 135 | optimizer.param_groups[0]['lr'] = lr 136 | if len(optimizer.param_groups) == 2: 137 | optimizer.param_groups[1]['lr'] = lr * nbb_mult 138 | return lr -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | EasyDict==1.7 2 | shapely 3 | Cython 4 | scipy 5 | pandas 6 | pyyaml 7 | json_tricks 8 | scikit-image 9 | yacs>=0.1.5 10 | tensorboardX>=1.6 11 | tqdm 12 | ninja 13 | 14 | -------------------------------------------------------------------------------- /run_dist.sh: -------------------------------------------------------------------------------- 1 | PYTHON="/opt/conda/bin/python" 2 | GPU_NUM=$1 3 | CONFIG=$2 4 | 5 | $PYTHON -m pip install -r requirements.txt 6 | 7 | $PYTHON -m torch.distributed.launch \ 8 | --nproc_per_node=$GPU_NUM \ 9 | tools/train.py \ 10 | --cfg experiments/$CONFIG.yaml \ 11 | 2>&1 | tee local_log.txt 12 | -------------------------------------------------------------------------------- /run_local.sh: -------------------------------------------------------------------------------- 1 | PYTHON="/data/anaconda/envs/pytorch1.7.1/bin/python" 2 | GPU_NUM=4 3 | CONFIG="seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle" 4 | 5 | $PYTHON -m pip install -r requirements.txt 6 | 7 | $PYTHON -m torch.distributed.launch \ 8 | --nproc_per_node=$GPU_NUM \ 9 | tools/train.py \ 10 | --cfg experiments/pascal_ctx/$CONFIG.yaml \ 11 | 2>&1 | tee local_log.txt 12 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os.path as osp 12 | import sys 13 | 14 | 15 | def add_path(path): 16 | if path not in sys.path: 17 | sys.path.insert(0, path) 18 | 19 | this_dir = osp.dirname(__file__) 20 | 21 | lib_path = osp.join(this_dir, '..', 'lib') 22 | add_path(lib_path) 23 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import argparse 8 | import os 9 | import pprint 10 | import shutil 11 | import sys 12 | 13 | import logging 14 | import time 15 | import timeit 16 | from pathlib import Path 17 | 18 | import numpy as np 19 | 20 | import torch 21 | import torch.nn as nn 22 | import torch.backends.cudnn as cudnn 23 | 24 | import _init_paths 25 | import models 26 | import datasets 27 | from config import config 28 | from config import update_config 29 | from core.function import testval, test 30 | from utils.modelsummary import get_model_summary 31 | from utils.utils import create_logger, FullModel 32 | 33 | def parse_args(): 34 | parser = argparse.ArgumentParser(description='Train segmentation network') 35 | 36 | parser.add_argument('--cfg', 37 | help='experiment configure file name', 38 | required=True, 39 | type=str) 40 | parser.add_argument('opts', 41 | help="Modify config options using the command-line", 42 | default=None, 43 | nargs=argparse.REMAINDER) 44 | 45 | args = parser.parse_args() 46 | update_config(config, args) 47 | 48 | return args 49 | 50 | def main(): 51 | args = parse_args() 52 | 53 | logger, final_output_dir, _ = create_logger( 54 | config, args.cfg, 'test') 55 | 56 | logger.info(pprint.pformat(args)) 57 | logger.info(pprint.pformat(config)) 58 | 59 | # cudnn related setting 60 | cudnn.benchmark = config.CUDNN.BENCHMARK 61 | cudnn.deterministic = config.CUDNN.DETERMINISTIC 62 | cudnn.enabled = config.CUDNN.ENABLED 63 | 64 | # build model 65 | if torch.__version__.startswith('1'): 66 | module = eval('models.'+config.MODEL.NAME) 67 | module.BatchNorm2d_class = module.BatchNorm2d = torch.nn.BatchNorm2d 68 | model = eval('models.'+config.MODEL.NAME + 69 | '.get_seg_model')(config) 70 | 71 | dump_input = torch.rand( 72 | (1, 3, config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0]) 73 | ) 74 | logger.info(get_model_summary(model.cuda(), dump_input.cuda())) 75 | 76 | if config.TEST.MODEL_FILE: 77 | model_state_file = config.TEST.MODEL_FILE 78 | else: 79 | model_state_file = os.path.join(final_output_dir, 'final_state.pth') 80 | logger.info('=> loading model from {}'.format(model_state_file)) 81 | 82 | pretrained_dict = torch.load(model_state_file) 83 | if 'state_dict' in pretrained_dict: 84 | pretrained_dict = pretrained_dict['state_dict'] 85 | model_dict = model.state_dict() 86 | pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() 87 | if k[6:] in model_dict.keys()} 88 | for k, _ in pretrained_dict.items(): 89 | logger.info( 90 | '=> loading {} from pretrained model'.format(k)) 91 | model_dict.update(pretrained_dict) 92 | model.load_state_dict(model_dict) 93 | 94 | gpus = list(config.GPUS) 95 | model = nn.DataParallel(model, device_ids=gpus).cuda() 96 | 97 | # prepare data 98 | test_size = (config.TEST.IMAGE_SIZE[1], config.TEST.IMAGE_SIZE[0]) 99 | test_dataset = eval('datasets.'+config.DATASET.DATASET)( 100 | root=config.DATASET.ROOT, 101 | list_path=config.DATASET.TEST_SET, 102 | num_samples=None, 103 | num_classes=config.DATASET.NUM_CLASSES, 104 | multi_scale=False, 105 | flip=False, 106 | ignore_label=config.TRAIN.IGNORE_LABEL, 107 | base_size=config.TEST.BASE_SIZE, 108 | crop_size=test_size, 109 | downsample_rate=1) 110 | 111 | testloader = torch.utils.data.DataLoader( 112 | test_dataset, 113 | batch_size=1, 114 | shuffle=False, 115 | num_workers=config.WORKERS, 116 | pin_memory=True) 117 | 118 | start = timeit.default_timer() 119 | if 'val' in config.DATASET.TEST_SET: 120 | mean_IoU, IoU_array, pixel_acc, mean_acc = testval(config, 121 | test_dataset, 122 | testloader, 123 | model) 124 | 125 | msg = 'MeanIU: {: 4.4f}, Pixel_Acc: {: 4.4f}, \ 126 | Mean_Acc: {: 4.4f}, Class IoU: '.format(mean_IoU, 127 | pixel_acc, mean_acc) 128 | logging.info(msg) 129 | logging.info(IoU_array) 130 | elif 'test' in config.DATASET.TEST_SET: 131 | test(config, 132 | test_dataset, 133 | testloader, 134 | model, 135 | sv_dir=final_output_dir) 136 | 137 | end = timeit.default_timer() 138 | logger.info('Mins: %d' % np.int((end-start)/60)) 139 | logger.info('Done') 140 | 141 | 142 | if __name__ == '__main__': 143 | main() 144 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import argparse 8 | import os 9 | import pprint 10 | import shutil 11 | import sys 12 | 13 | import logging 14 | import time 15 | import timeit 16 | from pathlib import Path 17 | 18 | import numpy as np 19 | 20 | import torch 21 | import torch.nn as nn 22 | import torch.backends.cudnn as cudnn 23 | import torch.optim 24 | from tensorboardX import SummaryWriter 25 | 26 | import _init_paths 27 | import models 28 | import datasets 29 | from config import config 30 | from config import update_config 31 | from core.criterion import CrossEntropy, OhemCrossEntropy 32 | from core.function import train, validate 33 | from utils.modelsummary import get_model_summary 34 | from utils.utils import create_logger, FullModel 35 | 36 | def parse_args(): 37 | parser = argparse.ArgumentParser(description='Train segmentation network') 38 | 39 | parser.add_argument('--cfg', 40 | help='experiment configure file name', 41 | required=True, 42 | type=str) 43 | parser.add_argument('--seed', type=int, default=304) 44 | parser.add_argument("--local_rank", type=int, default=-1) 45 | parser.add_argument('opts', 46 | help="Modify config options using the command-line", 47 | default=None, 48 | nargs=argparse.REMAINDER) 49 | 50 | args = parser.parse_args() 51 | update_config(config, args) 52 | 53 | return args 54 | 55 | def get_sampler(dataset): 56 | from utils.distributed import is_distributed 57 | if is_distributed(): 58 | from torch.utils.data.distributed import DistributedSampler 59 | return DistributedSampler(dataset) 60 | else: 61 | return None 62 | 63 | def main(): 64 | args = parse_args() 65 | 66 | if args.seed > 0: 67 | import random 68 | print('Seeding with', args.seed) 69 | random.seed(args.seed) 70 | torch.manual_seed(args.seed) 71 | 72 | logger, final_output_dir, tb_log_dir = create_logger( 73 | config, args.cfg, 'train') 74 | 75 | logger.info(pprint.pformat(args)) 76 | logger.info(config) 77 | 78 | writer_dict = { 79 | 'writer': SummaryWriter(tb_log_dir), 80 | 'train_global_steps': 0, 81 | 'valid_global_steps': 0, 82 | } 83 | 84 | # cudnn related setting 85 | cudnn.benchmark = config.CUDNN.BENCHMARK 86 | cudnn.deterministic = config.CUDNN.DETERMINISTIC 87 | cudnn.enabled = config.CUDNN.ENABLED 88 | gpus = list(config.GPUS) 89 | distributed = args.local_rank >= 0 90 | if distributed: 91 | device = torch.device('cuda:{}'.format(args.local_rank)) 92 | torch.cuda.set_device(device) 93 | torch.distributed.init_process_group( 94 | backend="nccl", init_method="env://", 95 | ) 96 | 97 | # build model 98 | model = eval('models.'+config.MODEL.NAME + 99 | '.get_seg_model')(config) 100 | 101 | # dump_input = torch.rand( 102 | # (1, 3, config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0]) 103 | # ) 104 | # logger.info(get_model_summary(model.cuda(), dump_input.cuda())) 105 | 106 | # copy model file 107 | if distributed and args.local_rank == 0: 108 | this_dir = os.path.dirname(__file__) 109 | models_dst_dir = os.path.join(final_output_dir, 'models') 110 | # if os.path.exists(models_dst_dir): 111 | # shutil.rmtree(models_dst_dir) 112 | # shutil.copytree(os.path.join(this_dir, '../lib/models'), models_dst_dir) 113 | 114 | if distributed: 115 | batch_size = config.TRAIN.BATCH_SIZE_PER_GPU 116 | else: 117 | batch_size = config.TRAIN.BATCH_SIZE_PER_GPU * len(gpus) 118 | 119 | # prepare data 120 | crop_size = (config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0]) 121 | train_dataset = eval('datasets.'+config.DATASET.DATASET)( 122 | root=config.DATASET.ROOT, 123 | list_path=config.DATASET.TRAIN_SET, 124 | num_samples=None, 125 | num_classes=config.DATASET.NUM_CLASSES, 126 | multi_scale=config.TRAIN.MULTI_SCALE, 127 | flip=config.TRAIN.FLIP, 128 | ignore_label=config.TRAIN.IGNORE_LABEL, 129 | base_size=config.TRAIN.BASE_SIZE, 130 | crop_size=crop_size, 131 | downsample_rate=config.TRAIN.DOWNSAMPLERATE, 132 | scale_factor=config.TRAIN.SCALE_FACTOR) 133 | 134 | train_sampler = get_sampler(train_dataset) 135 | trainloader = torch.utils.data.DataLoader( 136 | train_dataset, 137 | batch_size=batch_size, 138 | shuffle=config.TRAIN.SHUFFLE and train_sampler is None, 139 | num_workers=config.WORKERS, 140 | pin_memory=True, 141 | drop_last=True, 142 | sampler=train_sampler) 143 | 144 | extra_epoch_iters = 0 145 | if config.DATASET.EXTRA_TRAIN_SET: 146 | extra_train_dataset = eval('datasets.'+config.DATASET.DATASET)( 147 | root=config.DATASET.ROOT, 148 | list_path=config.DATASET.EXTRA_TRAIN_SET, 149 | num_samples=None, 150 | num_classes=config.DATASET.NUM_CLASSES, 151 | multi_scale=config.TRAIN.MULTI_SCALE, 152 | flip=config.TRAIN.FLIP, 153 | ignore_label=config.TRAIN.IGNORE_LABEL, 154 | base_size=config.TRAIN.BASE_SIZE, 155 | crop_size=crop_size, 156 | downsample_rate=config.TRAIN.DOWNSAMPLERATE, 157 | scale_factor=config.TRAIN.SCALE_FACTOR) 158 | extra_train_sampler = get_sampler(extra_train_dataset) 159 | extra_trainloader = torch.utils.data.DataLoader( 160 | extra_train_dataset, 161 | batch_size=batch_size, 162 | shuffle=config.TRAIN.SHUFFLE and extra_train_sampler is None, 163 | num_workers=config.WORKERS, 164 | pin_memory=True, 165 | drop_last=True, 166 | sampler=extra_train_sampler) 167 | extra_epoch_iters = np.int(extra_train_dataset.__len__() / 168 | config.TRAIN.BATCH_SIZE_PER_GPU / len(gpus)) 169 | 170 | 171 | test_size = (config.TEST.IMAGE_SIZE[1], config.TEST.IMAGE_SIZE[0]) 172 | test_dataset = eval('datasets.'+config.DATASET.DATASET)( 173 | root=config.DATASET.ROOT, 174 | list_path=config.DATASET.TEST_SET, 175 | num_samples=config.TEST.NUM_SAMPLES, 176 | num_classes=config.DATASET.NUM_CLASSES, 177 | multi_scale=False, 178 | flip=False, 179 | ignore_label=config.TRAIN.IGNORE_LABEL, 180 | base_size=config.TEST.BASE_SIZE, 181 | crop_size=test_size, 182 | downsample_rate=1) 183 | 184 | test_sampler = get_sampler(test_dataset) 185 | testloader = torch.utils.data.DataLoader( 186 | test_dataset, 187 | batch_size=batch_size, 188 | shuffle=False, 189 | num_workers=config.WORKERS, 190 | pin_memory=True, 191 | sampler=test_sampler) 192 | 193 | # criterion 194 | if config.LOSS.USE_OHEM: 195 | criterion = OhemCrossEntropy(ignore_label=config.TRAIN.IGNORE_LABEL, 196 | thres=config.LOSS.OHEMTHRES, 197 | min_kept=config.LOSS.OHEMKEEP, 198 | weight=train_dataset.class_weights) 199 | else: 200 | criterion = CrossEntropy(ignore_label=config.TRAIN.IGNORE_LABEL, 201 | weight=train_dataset.class_weights) 202 | 203 | model = FullModel(model, criterion) 204 | if distributed: 205 | model = model.to(device) 206 | model = torch.nn.parallel.DistributedDataParallel( 207 | model, 208 | find_unused_parameters=True, 209 | device_ids=[args.local_rank], 210 | output_device=args.local_rank 211 | ) 212 | else: 213 | model = nn.DataParallel(model, device_ids=gpus).cuda() 214 | 215 | 216 | # optimizer 217 | if config.TRAIN.OPTIMIZER == 'sgd': 218 | 219 | params_dict = dict(model.named_parameters()) 220 | if config.TRAIN.NONBACKBONE_KEYWORDS: 221 | bb_lr = [] 222 | nbb_lr = [] 223 | nbb_keys = set() 224 | for k, param in params_dict.items(): 225 | if any(part in k for part in config.TRAIN.NONBACKBONE_KEYWORDS): 226 | nbb_lr.append(param) 227 | nbb_keys.add(k) 228 | else: 229 | bb_lr.append(param) 230 | print(nbb_keys) 231 | params = [{'params': bb_lr, 'lr': config.TRAIN.LR}, {'params': nbb_lr, 'lr': config.TRAIN.LR * config.TRAIN.NONBACKBONE_MULT}] 232 | else: 233 | params = [{'params': list(params_dict.values()), 'lr': config.TRAIN.LR}] 234 | 235 | optimizer = torch.optim.SGD(params, 236 | lr=config.TRAIN.LR, 237 | momentum=config.TRAIN.MOMENTUM, 238 | weight_decay=config.TRAIN.WD, 239 | nesterov=config.TRAIN.NESTEROV, 240 | ) 241 | else: 242 | raise ValueError('Only Support SGD optimizer') 243 | 244 | epoch_iters = np.int(train_dataset.__len__() / 245 | config.TRAIN.BATCH_SIZE_PER_GPU / len(gpus)) 246 | 247 | best_mIoU = 0 248 | last_epoch = 0 249 | if config.TRAIN.RESUME: 250 | model_state_file = os.path.join(final_output_dir, 251 | 'checkpoint.pth.tar') 252 | if os.path.isfile(model_state_file): 253 | checkpoint = torch.load(model_state_file, map_location={'cuda:0': 'cpu'}) 254 | best_mIoU = checkpoint['best_mIoU'] 255 | last_epoch = checkpoint['epoch'] 256 | dct = checkpoint['state_dict'] 257 | 258 | model.module.model.load_state_dict({k.replace('model.', ''): v for k, v in checkpoint['state_dict'].items() if k.startswith('model.')}) 259 | optimizer.load_state_dict(checkpoint['optimizer']) 260 | logger.info("=> loaded checkpoint (epoch {})" 261 | .format(checkpoint['epoch'])) 262 | if distributed: 263 | torch.distributed.barrier() 264 | 265 | start = timeit.default_timer() 266 | end_epoch = config.TRAIN.END_EPOCH + config.TRAIN.EXTRA_EPOCH 267 | num_iters = config.TRAIN.END_EPOCH * epoch_iters 268 | extra_iters = config.TRAIN.EXTRA_EPOCH * extra_epoch_iters 269 | 270 | for epoch in range(last_epoch, end_epoch): 271 | 272 | current_trainloader = extra_trainloader if epoch >= config.TRAIN.END_EPOCH else trainloader 273 | if current_trainloader.sampler is not None and hasattr(current_trainloader.sampler, 'set_epoch'): 274 | current_trainloader.sampler.set_epoch(epoch) 275 | 276 | # valid_loss, mean_IoU, IoU_array = validate(config, 277 | # testloader, model, writer_dict) 278 | 279 | if epoch >= config.TRAIN.END_EPOCH: 280 | train(config, epoch-config.TRAIN.END_EPOCH, 281 | config.TRAIN.EXTRA_EPOCH, extra_epoch_iters, 282 | config.TRAIN.EXTRA_LR, extra_iters, 283 | extra_trainloader, optimizer, model, writer_dict) 284 | else: 285 | train(config, epoch, config.TRAIN.END_EPOCH, 286 | epoch_iters, config.TRAIN.LR, num_iters, 287 | trainloader, optimizer, model, writer_dict) 288 | 289 | valid_loss, mean_IoU, IoU_array = validate(config, 290 | testloader, model, writer_dict) 291 | 292 | if args.local_rank <= 0: 293 | logger.info('=> saving checkpoint to {}'.format( 294 | final_output_dir + 'checkpoint.pth.tar')) 295 | torch.save({ 296 | 'epoch': epoch+1, 297 | 'best_mIoU': best_mIoU, 298 | 'state_dict': model.module.state_dict(), 299 | 'optimizer': optimizer.state_dict(), 300 | }, os.path.join(final_output_dir,'checkpoint.pth.tar')) 301 | if mean_IoU > best_mIoU: 302 | best_mIoU = mean_IoU 303 | torch.save(model.module.state_dict(), 304 | os.path.join(final_output_dir, 'best.pth')) 305 | msg = 'Loss: {:.3f}, MeanIU: {: 4.4f}, Best_mIoU: {: 4.4f}'.format( 306 | valid_loss, mean_IoU, best_mIoU) 307 | logging.info(msg) 308 | logging.info(IoU_array) 309 | 310 | if args.local_rank <= 0: 311 | 312 | torch.save(model.module.state_dict(), 313 | os.path.join(final_output_dir, 'final_state.pth')) 314 | 315 | writer_dict['writer'].close() 316 | end = timeit.default_timer() 317 | logger.info('Hours: %d' % np.int((end-start)/3600)) 318 | logger.info('Done') 319 | 320 | 321 | if __name__ == '__main__': 322 | main() 323 | --------------------------------------------------------------------------------