├── .gitignore
├── LICENSE
├── README.md
├── data
    └── list
    │   ├── ade20k
    │       ├── testval.lst
    │       ├── train.lst
    │       ├── trainval.lst
    │       └── val.lst
    │   ├── cityscapes
    │       ├── test.lst
    │       ├── train.lst
    │       ├── trainval.lst
    │       └── val.lst
    │   └── cocostuff
    │       ├── testval.lst
    │       ├── train.lst
    │       ├── trainval.lst
    │       └── val.lst
├── experiments
    ├── ade20k
    │   ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml
    │   ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml
    │   ├── seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml
    │   ├── seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml
    │   └── seg_hrnet_w48_520x520_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml
    ├── cityscapes
    │   ├── seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
    │   ├── seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml
    │   ├── seg_hrnet_ocr_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
    │   ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
    │   ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484_paddle.yaml
    │   ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml
    │   ├── seg_hrnet_w48_train_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
    │   ├── seg_hrnet_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml
    │   └── seg_hrnet_w48_trainval_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml
    ├── cocostuff
    │   ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml
    │   ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml
    │   ├── seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml
    │   ├── seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml
    │   └── seg_hrnet_w48_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml
    ├── lip
    │   ├── seg_hrnet_ocr_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml
    │   ├── seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml
    │   └── seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150_paddle.yaml
    └── pascal_ctx
    │   ├── seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml
    │   ├── seg_hrnet_ocr_w48_cls60_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml
    │   ├── seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml
    │   └── seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle.yaml
├── figures
    ├── OCR.PNG
    ├── SegmentationTransformerOCR.png
    ├── SegmentationTransformerOCR1.png
    ├── SegmentationTransformerOCR2.png
    └── seg-hrnet.png
├── hubconf.py
├── lib
    ├── config
    │   ├── __init__.py
    │   ├── default.py
    │   ├── hrnet_config.py
    │   └── models.py
    ├── core
    │   ├── criterion.py
    │   └── function.py
    ├── datasets
    │   ├── __init__.py
    │   ├── ade20k.py
    │   ├── base_dataset.py
    │   ├── cityscapes.py
    │   ├── cocostuff.py
    │   ├── lip.py
    │   └── pascal_ctx.py
    ├── models
    │   ├── __init__.py
    │   ├── bn_helper.py
    │   ├── hrnet.py
    │   ├── seg_hrnet.py
    │   ├── seg_hrnet_ocr.py
    │   └── sync_bn
    │   │   ├── LICENSE
    │   │   ├── __init__.py
    │   │   └── inplace_abn
    │   │       ├── __init__.py
    │   │       ├── bn.py
    │   │       ├── functions.py
    │   │       └── src
    │   │           ├── common.h
    │   │           ├── inplace_abn.cpp
    │   │           ├── inplace_abn.h
    │   │           ├── inplace_abn_cpu.cpp
    │   │           └── inplace_abn_cuda.cu
    └── utils
    │   ├── __init__.py
    │   ├── distributed.py
    │   ├── modelsummary.py
    │   └── utils.py
├── local_log.txt
├── requirements.txt
├── run_dist.sh
├── run_local.sh
└── tools
    ├── _init_paths.py
    ├── test.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode
 2 | __pycache__/
 3 | *.py[co]
 4 | data/
 5 | log/
 6 | output/
 7 | pretrained_models
 8 | scripts/
 9 | detail-api/
10 | data/list
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [2019] [Microsoft]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | =======================================================================================
24 | 3-clause BSD licenses
25 | =======================================================================================
26 | 1. syncbn - For details, see lib/models/syncbn/LICENSE
27 |          Copyright (c) 2017 mapillary
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # High-resolution networks and Segmentation Transformer for Semantic Segmentation
  2 | ## Branches
  3 | - This is the implementation for HRNet + OCR.
  4 | - The PyTroch 1.1 version ia available [here](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/pytorch-v1.1).
  5 | - The PyTroch 0.4.1 version is available [here](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/master).
  6 | 
  7 | ## News
  8 | - [2021/05/04] We rephrase the OCR approach as **Segmentation Transformer** [pdf](https://arxiv.org/pdf/1909.11065.pdf). We will provide the updated implementation soon.
  9 | - [2021/02/16] Based on the [PaddleClas](https://github.com/PaddlePaddle/PaddleClas) ImageNet pretrained weights, we achieve **83.22%** on Cityscapes val, **59.62%** on PASCAL-Context val (**new SOTA**), **45.20%** on COCO-Stuff val (**new SOTA**), **58.21%** on LIP val and  **47.98%** on ADE20K val. Please checkout [openseg.pytorch](https://github.com/openseg-group/openseg.pytorch/tree/pytorch-1.7) for more details.
 10 | - [2020/08/16] [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) has supported our HRNet + OCR.
 11 | - [2020/07/20] The researchers from AInnovation have achieved **Rank#1** on [ADE20K Leaderboard](http://sceneparsing.csail.mit.edu/) via training our HRNet + OCR with a semi-supervised learning scheme. More details are in their [Technical Report](https://arxiv.org/pdf/2007.10591.pdf).
 12 | - [2020/07/09] Our paper is accepted by ECCV 2020: [Object-Contextual Representations for Semantic Segmentation](https://arxiv.org/pdf/1909.11065.pdf). Notably, the reseachers from Nvidia set a new state-of-the-art performance on Cityscapes leaderboard: [85.4%](https://www.cityscapes-dataset.com/method-details/?submissionID=7836) via combining our HRNet + OCR with a new [hierarchical mult-scale attention scheme](https://arxiv.org/abs/2005.10821). 
 13 | - [2020/03/13] Our paper is accepted by TPAMI: [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/pdf/1908.07919.pdf).
 14 | - HRNet + OCR + SegFix: Rank \#1 (84.5) in [Cityscapes leaderboard](https://www.cityscapes-dataset.com/benchmarks/). OCR: object contextual represenations [pdf](https://arxiv.org/pdf/1909.11065.pdf). ***HRNet + OCR is reproduced [here](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/HRNet-OCR)***.
 15 | - Thanks Google and UIUC researchers. A modified HRNet combined with semantic and instance multi-scale context achieves SOTA panoptic segmentation result on the Mapillary Vista challenge. See [the paper](https://arxiv.org/pdf/1910.04751.pdf).
 16 | - Small HRNet models for Cityscapes segmentation. Superior to MobileNetV2Plus ....
 17 | - Rank \#1 (83.7) in [Cityscapes leaderboard](https://www.cityscapes-dataset.com/benchmarks/). HRNet combined with an extension of [object context](https://arxiv.org/pdf/1809.00916.pdf)
 18 | 
 19 | - Pytorch-v1.1 and the official Sync-BN supported. We have reproduced the cityscapes results on the new codebase. Please check the [pytorch-v1.1 branch](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/pytorch-v1.1).
 20 | 
 21 | ## Introduction
 22 | This is the official code of [high-resolution representations for Semantic Segmentation](https://arxiv.org/abs/1904.04514). 
 23 | We augment the HRNet with a very simple segmentation head shown in the figure below. We aggregate the output representations at four different resolutions, and then use a 1x1 convolutions to fuse these representations. The output representations is fed into the classifier. We evaluate our methods on three datasets, Cityscapes, PASCAL-Context and LIP.
 24 | 
 25 | <!-- ![](figures/seg-hrnet.png) -->
 26 | <figure>
 27 |   <text-align: center;>
 28 |   <img src="./figures/seg-hrnet.png" alt="hrnet" title="" width="900" height="150" />
 29 | </figcaption>
 30 | </figure>
 31 | 
 32 | Besides, we further combine HRNet with [Object Contextual Representation](https://arxiv.org/pdf/1909.11065.pdf) and achieve higher performance on the three datasets. The code of HRNet+OCR is contained in this branch. We illustrate the overall framework of OCR in the Figure and the equivalent Transformer pipelines:
 33 | 
 34 | <figure>
 35 |   <text-align: center;>
 36 |   <img src="./figures/OCR.PNG" alt="OCR" title="" width="900" height="200" />
 37 | </figure>
 38 |   
 39 |  <figure>
 40 |   <text-align: center;>
 41 |   <img src="./figures/SegmentationTransformerOCR.png" alt="Segmentation Transformer" title="" width="600" />
 42 | </figure>
 43 | 
 44 | ## Segmentation models
 45 | The models are initialized by the weights pretrained on the ImageNet. ''Paddle'' means the results are based on PaddleCls pretrained HRNet models.
 46 | You can download the pretrained models from  https://github.com/HRNet/HRNet-Image-Classification. *Slightly different, we use align_corners = True for upsampling in HRNet*.
 47 | 
 48 | 1. Performance on the Cityscapes dataset. The models are trained and tested with the input size of 512x1024 and 1024x2048 respectively.
 49 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75.
 50 | 
 51 | | model | Train Set | Test Set | OHEM | Multi-scale| Flip | mIoU | Link |
 52 | | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: |
 53 | | HRNetV2-W48 | Train | Val | No | No | No | 80.9 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_cs_8090_torch11.pth)/[BaiduYun(Access Code:pmix)](https://pan.baidu.com/s/1KyiOUOR0SYxKtJfIlD5o-w)|
 54 | | HRNetV2-W48 + OCR | Train | Val | No | No | No | 81.6 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cs_8162_torch11.pth)/[BaiduYun(Access Code:fa6i)](https://pan.baidu.com/s/1BGNt4Xmx3yfXUS8yjde0hQ)|
 55 | | HRNetV2-W48 + OCR | Train + Val | Test | No | Yes | Yes | 82.3 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cs_trainval_8227_torch11.pth)/[BaiduYun(Access Code:ycrk)](https://pan.baidu.com/s/16mD81UnGzjUBD-haDQfzIQ)|
 56 | | HRNetV2-W48 (Paddle) | Train | Val | No | No | No | 81.6 | ---|
 57 | | HRNetV2-W48 + OCR (Paddle) | Train | Val | No | No | No | --- | ---|
 58 | | HRNetV2-W48 + OCR (Paddle) | Train + Val | Test | No | Yes | Yes | --- | ---|
 59 | 
 60 | 2. Performance on the LIP dataset. The models are trained and tested with the input size of 473x473.
 61 | 
 62 | | model | OHEM | Multi-scale| Flip | mIoU | Link |
 63 | | :--: | :--: | :--: | :--: | :--: | :--: |
 64 | | HRNetV2-W48 | No | No | Yes | 55.83 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_lip_5583_torch04.pth)/[BaiduYun(Access Code:fahi)](https://pan.baidu.com/s/15DamFiGEoxwDDF1TwuZdnA)|
 65 | | HRNetV2-W48 + OCR | No | No | Yes | 56.48 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_lip_5648_torch04.pth)/[BaiduYun(Access Code:xex2)](https://pan.baidu.com/s/1dFYSR2bahRnvpIOdh88kOQ)|
 66 | | HRNetV2-W48 (Paddle) | No | No | Yes | --- | --- |
 67 | | HRNetV2-W48 + OCR (Paddle) | No | No | Yes | --- | ---|
 68 | 
 69 | 
 70 | **Note** Currently we could only reproduce HRNet+OCR results on LIP dataset with PyTorch 0.4.1.
 71 | 
 72 | 3. Performance on the PASCAL-Context dataset. The models are trained and tested with the input size of 520x520.
 73 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75,2.0 (the same as EncNet, DANet etc.).
 74 | 
 75 | | model |num classes | OHEM | Multi-scale| Flip | mIoU | Link |
 76 | | :--: | :--: | :--: | :--: | :--: | :--: | :--: |
 77 | | HRNetV2-W48 | 59 classes | No | Yes | Yes | 54.1 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_pascal_ctx_5410_torch11.pth)/[BaiduYun(Access Code:wz6v)](https://pan.baidu.com/s/1m0MqpHSk0SX380EYEMawSA)|
 78 | | HRNetV2-W48 + OCR | 59 classes | No | Yes | Yes | 56.2 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_pascal_ctx_5618_torch11.pth)/[BaiduYun(Access Code:yyxh)](https://pan.baidu.com/s/1XYP54gr3XB76tHmCcKdU9g)|
 79 | | HRNetV2-W48 | 60 classes | No | Yes | Yes | 48.3 | [OneDrive](https://1drv.ms/u/s!Aus8VCZ_C_33gQEHDQrZCiv4R5mf)/[BaiduYun(Access Code:9uf8)](https://pan.baidu.com/s/1pgYt8P8ht2HOOzcA0F7Kag)|
 80 | | HRNetV2-W48 + OCR | 60 classes | No | Yes | Yes | 50.1 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_pascal_ctx_5410_torch11.pth)/[BaiduYun(Access Code:gtkb)](https://pan.baidu.com/s/13AYjwzh1LJSlipJwNpJ3Uw)|
 81 | | HRNetV2-W48 (Paddle) | 59 classes | No | Yes | Yes | --- | ---|
 82 | | HRNetV2-W48 (Paddle) | 60 classes | No | Yes | Yes | --- | ---|
 83 | | HRNetV2-W48 + OCR (Paddle) | 59 classes | No | Yes | Yes | --- | ---|
 84 | | HRNetV2-W48 + OCR (Paddle) | 60 classes | No | Yes | Yes | --- | ---|
 85 | 
 86 | 4. Performance on the COCO-Stuff dataset. The models are trained and tested with the input size of 520x520.
 87 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75,2.0 (the same as EncNet, DANet etc.).
 88 | 
 89 | | model | OHEM | Multi-scale| Flip | mIoU | Link |
 90 | | :--: | :--: | :--: | :--: | :--: | :--: |
 91 | | HRNetV2-W48 | Yes | No | No | 36.2 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_cocostuff_3617_torch04.pth)/[BaiduYun(Access Code:92gw)](https://pan.baidu.com/s/1VAV6KThH1Irzv9HZgLWE2Q)|
 92 | | HRNetV2-W48 + OCR | Yes | No | No | 39.7 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cocostuff_3965_torch04.pth)/[BaiduYun(Access Code:sjc4)](https://pan.baidu.com/s/1HFSYyVwKBG3E6y76gcPjDA)|
 93 | | HRNetV2-W48 | Yes | Yes | Yes | 37.9 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_cocostuff_3617_torch04.pth)/[BaiduYun(Access Code:92gw)](https://pan.baidu.com/s/1VAV6KThH1Irzv9HZgLWE2Q) |
 94 | | HRNetV2-W48 + OCR | Yes | Yes | Yes | 40.6 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cocostuff_3965_torch04.pth)/[BaiduYun(Access Code:sjc4)](https://pan.baidu.com/s/1HFSYyVwKBG3E6y76gcPjDA) |
 95 | | HRNetV2-W48 (Paddle) | Yes | No | No | --- | ---|
 96 | | HRNetV2-W48 + OCR (Paddle) | Yes | No | No | --- | ---|
 97 | | HRNetV2-W48 (Paddle) | Yes | Yes | Yes | --- | ---|
 98 | | HRNetV2-W48 + OCR (Paddle) | Yes | Yes | Yes | --- | ---|
 99 | 
100 | <!-- **Note** We reproduce HRNet+OCR results on COCO-Stuff dataset with PyTorch 0.4.1. -->
101 | 
102 | 5. Performance on the ADE20K dataset. The models are trained and tested with the input size of 520x520.
103 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75,2.0 (the same as EncNet, DANet etc.).
104 | 
105 | | model | OHEM | Multi-scale| Flip | mIoU | Link |
106 | | :--: | :--: | :--: | :--: | :--: | :--: |
107 | | HRNetV2-W48 | Yes | No | No | 43.1 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ade20k_4312_torch04.pth)/[BaiduYun(Access Code:f6xf)](https://pan.baidu.com/s/11neVkzxx27qS2-mPFW9dfg)|
108 | | HRNetV2-W48 + OCR | Yes | No | No | 44.5 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_ade20k_4451_torch04.pth)/[BaiduYun(Access Code:peg4)](https://pan.baidu.com/s/1HLhjiLIdgaOHs0SzEtkgkQ)|
109 | | HRNetV2-W48 | Yes | Yes | Yes | 44.2 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ade20k_4312_torch04.pth)/[BaiduYun(Access Code:f6xf)](https://pan.baidu.com/s/11neVkzxx27qS2-mPFW9dfg) |
110 | | HRNetV2-W48 + OCR | Yes | Yes | Yes | 45.5 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_ade20k_4451_torch04.pth)/[BaiduYun(Access Code:peg4)](https://pan.baidu.com/s/1HLhjiLIdgaOHs0SzEtkgkQ) |
111 | | HRNetV2-W48 (Paddle) | Yes | No | No | --- | ---|
112 | | HRNetV2-W48 + OCR (Paddle) | Yes | No | No | --- | ---|
113 | | HRNetV2-W48 (Paddle) | Yes | Yes | Yes | --- | ---|
114 | | HRNetV2-W48 + OCR (Paddle) | Yes | Yes | Yes | --- | ---|
115 | 
116 | <!-- **Note** We reproduce HRNet+OCR results on ADE20K dataset with PyTorch 0.4.1. -->
117 | 
118 | ## Quick start
119 | ### Install
120 | 1. For LIP dataset, install PyTorch=0.4.1 following the [official instructions](https://pytorch.org/). For Cityscapes and PASCAL-Context, we use PyTorch=1.1.0.
121 | 2. `git clone https://github.com/HRNet/HRNet-Semantic-Segmentation $SEG_ROOT`
122 | 3. Install dependencies: pip install -r requirements.txt
123 | 
124 | If you want to train and evaluate our models on PASCAL-Context, you need to install [details](https://github.com/zhanghang1989/detail-api).
125 | ````bash
126 | pip install git+https://github.com/zhanghang1989/detail-api.git#subdirectory=PythonAPI
127 | ````
128 | 
129 | ### Data preparation
130 | You need to download the [Cityscapes](https://www.cityscapes-dataset.com/), [LIP](http://sysu-hcp.net/lip/) and [PASCAL-Context](https://cs.stanford.edu/~roozbeh/pascal-context/) datasets.
131 | 
132 | Your directory tree should be look like this:
133 | ````bash
134 | $SEG_ROOT/data
135 | ├── cityscapes
136 | │   ├── gtFine
137 | │   │   ├── test
138 | │   │   ├── train
139 | │   │   └── val
140 | │   └── leftImg8bit
141 | │       ├── test
142 | │       ├── train
143 | │       └── val
144 | ├── lip
145 | │   ├── TrainVal_images
146 | │   │   ├── train_images
147 | │   │   └── val_images
148 | │   └── TrainVal_parsing_annotations
149 | │       ├── train_segmentations
150 | │       ├── train_segmentations_reversed
151 | │       └── val_segmentations
152 | ├── pascal_ctx
153 | │   ├── common
154 | │   ├── PythonAPI
155 | │   ├── res
156 | │   └── VOCdevkit
157 | │       └── VOC2010
158 | ├── cocostuff
159 | │   ├── train
160 | │   │   ├── image
161 | │   │   └── label
162 | │   └── val
163 | │       ├── image
164 | │       └── label
165 | ├── ade20k
166 | │   ├── train
167 | │   │   ├── image
168 | │   │   └── label
169 | │   └── val
170 | │       ├── image
171 | │       └── label
172 | ├── list
173 | │   ├── cityscapes
174 | │   │   ├── test.lst
175 | │   │   ├── trainval.lst
176 | │   │   └── val.lst
177 | │   ├── lip
178 | │   │   ├── testvalList.txt
179 | │   │   ├── trainList.txt
180 | │   │   └── valList.txt
181 | ````
182 | 
183 | ### Train and Test
184 | 
185 | #### PyTorch Version Differences
186 | 
187 | Note that the codebase supports both PyTorch 0.4.1 and 1.1.0, and they use different command for training. In the following context, we use `$PY_CMD` to denote different startup command.
188 | 
189 | ```bash
190 | # For PyTorch 0.4.1
191 | PY_CMD="python"
192 | # For PyTorch 1.1.0
193 | PY_CMD="python -m torch.distributed.launch --nproc_per_node=4"
194 | ```
195 | 
196 | e.g., when training on Cityscapes, we use PyTorch 1.1.0. So the command
197 | ````bash
198 | $PY_CMD tools/train.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
199 | ````
200 | indicates
201 | ````bash
202 | python -m torch.distributed.launch --nproc_per_node=4 tools/train.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
203 | ````
204 | #### Training
205 | 
206 | Just specify the configuration file for `tools/train.py`.
207 | 
208 | For example, train the HRNet-W48 on Cityscapes with a batch size of 12 on 4 GPUs:
209 | ````bash
210 | $PY_CMD tools/train.py --cfg experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
211 | ````
212 | For example, train the HRNet-W48 + OCR on Cityscapes with a batch size of 12 on 4 GPUs:
213 | ````bash
214 | $PY_CMD tools/train.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
215 | ````
216 | 
217 | Note that we only reproduce HRNet+OCR on LIP dataset using PyTorch 0.4.1. So we recommend to use PyTorch 0.4.1 if you want to train on LIP dataset.
218 | 
219 | #### Testing
220 | 
221 | For example, evaluating HRNet+OCR on the Cityscapes validation set with multi-scale and flip testing:
222 | ````bash
223 | python tools/test.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml \
224 |                      TEST.MODEL_FILE hrnet_ocr_cs_8162_torch11.pth \
225 |                      TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75 \
226 |                      TEST.FLIP_TEST True
227 | ````
228 | Evaluating HRNet+OCR on the Cityscapes test set with multi-scale and flip testing:
229 | ````bash
230 | python tools/test.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml \
231 |                      DATASET.TEST_SET list/cityscapes/test.lst \
232 |                      TEST.MODEL_FILE hrnet_ocr_trainval_cs_8227_torch11.pth \
233 |                      TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75 \
234 |                      TEST.FLIP_TEST True
235 | ````
236 | Evaluating HRNet+OCR on the PASCAL-Context validation set with multi-scale and flip testing:
237 | ````bash
238 | python tools/test.py --cfg experiments/pascal_ctx/seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml \
239 |                      DATASET.TEST_SET testval \
240 |                      TEST.MODEL_FILE hrnet_ocr_pascal_ctx_5618_torch11.pth \
241 |                      TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75,2.0 \
242 |                      TEST.FLIP_TEST True
243 | ````
244 | Evaluating HRNet+OCR on the LIP validation set with flip testing:
245 | ````bash
246 | python tools/test.py --cfg experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml \
247 |                      DATASET.TEST_SET list/lip/testvalList.txt \
248 |                      TEST.MODEL_FILE hrnet_ocr_lip_5648_torch04.pth \
249 |                      TEST.FLIP_TEST True \
250 |                      TEST.NUM_SAMPLES 0
251 | ````
252 | Evaluating HRNet+OCR on the COCO-Stuff validation set with multi-scale and flip testing:
253 | ````bash
254 | python tools/test.py --cfg experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml \
255 |                      DATASET.TEST_SET list/cocostuff/testval.lst \
256 |                      TEST.MODEL_FILE hrnet_ocr_cocostuff_3965_torch04.pth \
257 |                      TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75,2.0 \
258 |                      TEST.MULTI_SCALE True TEST.FLIP_TEST True
259 | ````
260 | Evaluating HRNet+OCR on the ADE20K validation set with multi-scale and flip testing:
261 | ````bash
262 | python tools/test.py --cfg experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml \
263 |                      DATASET.TEST_SET list/ade20k/testval.lst \
264 |                      TEST.MODEL_FILE hrnet_ocr_ade20k_4451_torch04.pth \
265 |                      TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75,2.0 \
266 |                      TEST.MULTI_SCALE True TEST.FLIP_TEST True
267 | ````
268 | 
269 | ## Other applications of HRNet
270 | * [Human pose estimation](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch)
271 | * [Image Classification](https://github.com/HRNet/HRNet-Image-Classification)
272 | * [Object detection](https://github.com/HRNet/HRNet-Object-Detection)
273 | * [Facial landmark detection](https://github.com/HRNet/HRNet-Facial-Landmark-Detection)
274 | 
275 | ## Citation
276 | If you find this work or code is helpful in your research, please cite:
277 | ````
278 | @inproceedings{SunXLW19,
279 |   title={Deep High-Resolution Representation Learning for Human Pose Estimation},
280 |   author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
281 |   booktitle={CVPR},
282 |   year={2019}
283 | }
284 | 
285 | @article{WangSCJDZLMTWLX19,
286 |   title={Deep High-Resolution Representation Learning for Visual Recognition},
287 |   author={Jingdong Wang and Ke Sun and Tianheng Cheng and 
288 |           Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and 
289 |           Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
290 |   journal={TPAMI},
291 |   year={2019}
292 | }
293 | 
294 | @article{YuanCW19,
295 |   title={Object-Contextual Representations for Semantic Segmentation},
296 |   author={Yuhui Yuan and Xilin Chen and Jingdong Wang},
297 |   booktitle={ECCV},
298 |   year={2020}
299 | }
300 | ````
301 | 
302 | ## Reference
303 | [1] Deep High-Resolution Representation Learning for Visual Recognition. Jingdong Wang, Ke Sun, Tianheng Cheng, 
304 |     Borui Jiang, Chaorui Deng, Yang Zhao, Dong Liu, Yadong Mu, Mingkui Tan, Xinggang Wang, Wenyu Liu, Bin Xiao. Accepted by TPAMI.  [download](https://arxiv.org/pdf/1908.07919.pdf)
305 |     
306 | [2] Object-Contextual Representations for Semantic Segmentation. Yuhui Yuan, Xilin Chen, Jingdong Wang. [download](https://arxiv.org/pdf/1909.11065.pdf)
307 | 
308 | ## Acknowledgement
309 | We adopt sync-bn implemented by [InplaceABN](https://github.com/mapillary/inplace_abn) for PyTorch 0.4.1 experiments and the official 
310 | sync-bn provided by PyTorch for PyTorch 1.10 experiments.
311 | 
312 | We adopt data precosessing on the PASCAL-Context dataset, implemented by [PASCAL API](https://github.com/zhanghang1989/detail-api).
313 | 


--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: ade20k
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/ade20k/val.lst'
 15 |   TRAIN_SET: 'list/ade20k/train.lst'
 16 |   NUM_CLASSES: 150
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: true
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 520
 79 |   - 520
 80 |   BASE_SIZE: 520
 81 |   BATCH_SIZE_PER_GPU: 4
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 120
 85 |   RESUME: true
 86 |   OPTIMIZER: sgd
 87 |   LR: 0.02
 88 |   WD: 0.0001
 89 |   MOMENTUM: 0.9
 90 |   NESTEROV: false
 91 |   FLIP: true
 92 |   MULTI_SCALE: true
 93 |   DOWNSAMPLERATE: 1
 94 |   IGNORE_LABEL: 255
 95 |   SCALE_FACTOR: 16
 96 | TEST:
 97 |   IMAGE_SIZE:
 98 |   - 520
 99 |   - 520
100 |   BASE_SIZE: 520
101 |   BATCH_SIZE_PER_GPU: 1
102 |   NUM_SAMPLES: 200
103 |   FLIP_TEST: false
104 |   MULTI_SCALE: false
105 | 


--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3,4,5,6,7)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 8
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: ade20k
 13 |   ROOT: '../../../../dataset/ade20k/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 150
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: true
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 520
 79 |   - 520
 80 |   BASE_SIZE: 520
 81 |   BATCH_SIZE_PER_GPU: 2
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 120
 85 |   RESUME: true
 86 |   OPTIMIZER: sgd
 87 |   LR: 0.02
 88 |   WD: 0.0001
 89 |   MOMENTUM: 0.9
 90 |   NESTEROV: false
 91 |   FLIP: true
 92 |   MULTI_SCALE: true
 93 |   DOWNSAMPLERATE: 1
 94 |   IGNORE_LABEL: 255
 95 |   SCALE_FACTOR: 16
 96 | TEST:
 97 |   IMAGE_SIZE:
 98 |   - 520
 99 |   - 520
100 |   BASE_SIZE: 520
101 |   BATCH_SIZE_PER_GPU: 1
102 |   NUM_SAMPLES: 200
103 |   FLIP_TEST: false
104 |   MULTI_SCALE: false
105 | 


--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: ade20k
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/ade20k/val.lst'
 15 |   TRAIN_SET: 'list/ade20k/train.lst'
 16 |   NUM_CLASSES: 150
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   NUM_OUTPUTS: 1
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: true
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 520
 78 |   - 520
 79 |   BASE_SIZE: 520
 80 |   BATCH_SIZE_PER_GPU: 4
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 120
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.02
 87 |   WD: 0.0001
 88 |   MOMENTUM: 0.9
 89 |   NESTEROV: false
 90 |   FLIP: true
 91 |   MULTI_SCALE: true
 92 |   DOWNSAMPLERATE: 1
 93 |   IGNORE_LABEL: 255
 94 |   SCALE_FACTOR: 11
 95 | TEST:
 96 |   IMAGE_SIZE:
 97 |   - 520
 98 |   - 520
 99 |   BASE_SIZE: 520
100 |   BATCH_SIZE_PER_GPU: 1
101 |   NUM_SAMPLES: 200
102 |   FLIP_TEST: false
103 |   MULTI_SCALE: false
104 | 


--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3,4,5,6,7)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 8
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: ade20k
 13 |   ROOT: '../../../../dataset/ade20k/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 150
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   NUM_OUTPUTS: 1
 20 |   PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: true
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 520
 78 |   - 520
 79 |   BASE_SIZE: 520
 80 |   BATCH_SIZE_PER_GPU: 2
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 120
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.02
 87 |   WD: 0.0001
 88 |   MOMENTUM: 0.9
 89 |   NESTEROV: false
 90 |   FLIP: true
 91 |   MULTI_SCALE: true
 92 |   DOWNSAMPLERATE: 1
 93 |   IGNORE_LABEL: 255
 94 |   SCALE_FACTOR: 11
 95 | TEST:
 96 |   IMAGE_SIZE:
 97 |   - 520
 98 |   - 520
 99 |   BASE_SIZE: 520
100 |   BATCH_SIZE_PER_GPU: 1
101 |   NUM_SAMPLES: 200
102 |   FLIP_TEST: false
103 |   MULTI_SCALE: false
104 | 


--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_w48_520x520_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: ade20k
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/ade20k/val.lst'
 15 |   TRAIN_SET: 'list/ade20k/train.lst'
 16 |   NUM_CLASSES: 150
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   NUM_OUTPUTS: 1
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 520
 78 |   - 520
 79 |   BASE_SIZE: 520
 80 |   BATCH_SIZE_PER_GPU: 4
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 120
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.02
 87 |   WD: 0.0001
 88 |   MOMENTUM: 0.9
 89 |   NESTEROV: false
 90 |   FLIP: true
 91 |   MULTI_SCALE: true
 92 |   DOWNSAMPLERATE: 1
 93 |   IGNORE_LABEL: 255
 94 |   SCALE_FACTOR: 11
 95 | TEST:
 96 |   IMAGE_SIZE:
 97 |   - 520
 98 |   - 520
 99 |   BASE_SIZE: 520
100 |   BATCH_SIZE_PER_GPU: 1
101 |   NUM_SAMPLES: 200
102 |   FLIP_TEST: false
103 |   MULTI_SCALE: false
104 | 


--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: cityscapes
 13 |   ROOT: data/
 14 |   TEST_SET: 'list/cityscapes/val.lst'
 15 |   TRAIN_SET: 'list/cityscapes/train.lst'
 16 |   NUM_CLASSES: 19
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: "pretrained_models/hrnetv2_w48_imagenet_pretrained.pth"
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 1024
 79 |   - 512
 80 |   BASE_SIZE: 2048
 81 |   BATCH_SIZE_PER_GPU: 3
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 484
 85 |   RESUME: true
 86 |   OPTIMIZER: sgd
 87 |   LR: 0.01
 88 |   WD: 0.0005
 89 |   MOMENTUM: 0.9
 90 |   NESTEROV: false
 91 |   FLIP: true
 92 |   MULTI_SCALE: true
 93 |   DOWNSAMPLERATE: 1
 94 |   IGNORE_LABEL: 255
 95 |   SCALE_FACTOR: 16
 96 | TEST:
 97 |   IMAGE_SIZE:
 98 |   - 2048
 99 |   - 1024
100 |   BASE_SIZE: 2048
101 |   BATCH_SIZE_PER_GPU: 4
102 |   FLIP_TEST: false
103 |   MULTI_SCALE: false
104 | 


--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3,4,5,6,7)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 8
  9 | PRINT_FREQ: 100
 10 | 
 11 | DATASET:
 12 |   DATASET: cityscapes
 13 |   ROOT: '../../../../dataset/original_cityscapes/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 19
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 1024
 79 |   - 512
 80 |   BASE_SIZE: 2048
 81 |   BATCH_SIZE_PER_GPU: 2
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 484
 85 |   RESUME: true
 86 |   OPTIMIZER: sgd
 87 |   LR: 0.01
 88 |   WD: 0.0005
 89 |   MOMENTUM: 0.9
 90 |   NESTEROV: false
 91 |   FLIP: true
 92 |   MULTI_SCALE: true
 93 |   DOWNSAMPLERATE: 1
 94 |   IGNORE_LABEL: 255
 95 |   SCALE_FACTOR: 16
 96 | TEST:
 97 |   IMAGE_SIZE:
 98 |   - 2048
 99 |   - 1024
100 |   BASE_SIZE: 2048
101 |   BATCH_SIZE_PER_GPU: 2
102 |   FLIP_TEST: false
103 |   MULTI_SCALE: false
104 | 


--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_ocr_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: cityscapes
 13 |   ROOT: data/
 14 |   TEST_SET: 'list/cityscapes/val.lst'
 15 |   TRAIN_SET: 'list/cityscapes/trainval.lst'
 16 |   NUM_CLASSES: 19
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: "pretrained_models/hrnetv2_w48_imagenet_pretrained.pth"
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 1024
 79 |   - 512
 80 |   BASE_SIZE: 2048
 81 |   BATCH_SIZE_PER_GPU: 3
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 484
 85 |   RESUME: true
 86 |   OPTIMIZER: sgd
 87 |   LR: 0.01
 88 |   WD: 0.0005
 89 |   MOMENTUM: 0.9
 90 |   NESTEROV: false
 91 |   FLIP: true
 92 |   MULTI_SCALE: true
 93 |   DOWNSAMPLERATE: 1
 94 |   IGNORE_LABEL: 255
 95 |   SCALE_FACTOR: 16
 96 | TEST:
 97 |   IMAGE_SIZE:
 98 |   - 2048
 99 |   - 1024
100 |   BASE_SIZE: 2048
101 |   BATCH_SIZE_PER_GPU: 4
102 |   FLIP_TEST: false
103 |   MULTI_SCALE: false
104 | 


--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 100
 10 | 
 11 | DATASET:
 12 |   DATASET: cityscapes
 13 |   ROOT: '../../../../dataset/original_cityscapes/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 19
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   ALIGN_CORNERS: False
 20 |   PRETRAINED: '../../../../dataset/pretrained_models/hrnetv2_w48_imagenet_pretrained_top1_21.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 1024
 78 |   - 512
 79 |   BASE_SIZE: 2048
 80 |   BATCH_SIZE_PER_GPU: 3
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 484
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.01
 87 |   WD: 0.0005
 88 |   MOMENTUM: 0.9
 89 |   NESTEROV: false
 90 |   FLIP: true
 91 |   MULTI_SCALE: true
 92 |   DOWNSAMPLERATE: 1
 93 |   IGNORE_LABEL: 255
 94 |   SCALE_FACTOR: 16
 95 | TEST:
 96 |   IMAGE_SIZE:
 97 |   - 2048
 98 |   - 1024
 99 |   BASE_SIZE: 2048
100 |   BATCH_SIZE_PER_GPU: 4
101 |   FLIP_TEST: false
102 |   MULTI_SCALE: false
103 | 


--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484_paddle.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 100
 10 | 
 11 | DATASET:
 12 |   DATASET: cityscapes
 13 |   ROOT: '../../../../dataset/original_cityscapes/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 19
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   ALIGN_CORNERS: False
 20 |   PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 1024
 78 |   - 512
 79 |   BASE_SIZE: 2048
 80 |   BATCH_SIZE_PER_GPU: 3
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 484
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.01
 87 |   WD: 0.0005
 88 |   MOMENTUM: 0.9
 89 |   NESTEROV: false
 90 |   FLIP: true
 91 |   MULTI_SCALE: true
 92 |   DOWNSAMPLERATE: 1
 93 |   IGNORE_LABEL: 255
 94 |   SCALE_FACTOR: 16
 95 | TEST:
 96 |   IMAGE_SIZE:
 97 |   - 2048
 98 |   - 1024
 99 |   BASE_SIZE: 2048
100 |   BATCH_SIZE_PER_GPU: 4
101 |   FLIP_TEST: false
102 |   MULTI_SCALE: false
103 | 


--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 100
 10 | 
 11 | DATASET:
 12 |   DATASET: cityscapes
 13 |   ROOT: '../../../../dataset/original_cityscapes/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 19
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   ALIGN_CORNERS: False
 20 |   PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 1024
 78 |   - 512
 79 |   BASE_SIZE: 2048
 80 |   BATCH_SIZE_PER_GPU: 2
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 484
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.01
 87 |   WD: 0.0005
 88 |   MOMENTUM: 0.9
 89 |   NESTEROV: false
 90 |   FLIP: true
 91 |   MULTI_SCALE: true
 92 |   DOWNSAMPLERATE: 1
 93 |   IGNORE_LABEL: 255
 94 |   SCALE_FACTOR: 16
 95 | TEST:
 96 |   IMAGE_SIZE:
 97 |   - 2048
 98 |   - 1024
 99 |   BASE_SIZE: 2048
100 |   BATCH_SIZE_PER_GPU: 4
101 |   FLIP_TEST: false
102 |   MULTI_SCALE: false


--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_train_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 100
 10 | 
 11 | DATASET:
 12 |   DATASET: cityscapes
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/cityscapes/val.lst'
 15 |   TRAIN_SET: 'list/cityscapes/train.lst'
 16 |   NUM_CLASSES: 19
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   ALIGN_CORNERS: False
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: true
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 1024
 78 |   - 512
 79 |   BASE_SIZE: 2048
 80 |   BATCH_SIZE_PER_GPU: 3
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 484
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.01
 87 |   WD: 0.0005
 88 |   MOMENTUM: 0.9
 89 |   NESTEROV: false
 90 |   FLIP: true
 91 |   MULTI_SCALE: true
 92 |   DOWNSAMPLERATE: 1
 93 |   IGNORE_LABEL: 255
 94 |   SCALE_FACTOR: 16
 95 | TEST:
 96 |   IMAGE_SIZE:
 97 |   - 2048
 98 |   - 1024
 99 |   BASE_SIZE: 2048
100 |   BATCH_SIZE_PER_GPU: 4
101 |   FLIP_TEST: false
102 |   MULTI_SCALE: false
103 | 


--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 100
 10 | 
 11 | DATASET:
 12 |   DATASET: cityscapes
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/cityscapes/val.lst'
 15 |   TRAIN_SET: 'list/cityscapes/train.lst'
 16 |   EXTRA_TRAIN_SET: 'list/cityscapes/trainval.lst'
 17 |   NUM_CLASSES: 19
 18 | MODEL:
 19 |   NAME: seg_hrnet
 20 |   ALIGN_CORNERS: False
 21 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 22 |   EXTRA:
 23 |     FINAL_CONV_KERNEL: 1
 24 |     STAGE1:
 25 |       NUM_MODULES: 1
 26 |       NUM_RANCHES: 1
 27 |       BLOCK: BOTTLENECK
 28 |       NUM_BLOCKS:
 29 |       - 4
 30 |       NUM_CHANNELS:
 31 |       - 64
 32 |       FUSE_METHOD: SUM
 33 |     STAGE2:
 34 |       NUM_MODULES: 1
 35 |       NUM_BRANCHES: 2
 36 |       BLOCK: BASIC
 37 |       NUM_BLOCKS:
 38 |       - 4
 39 |       - 4
 40 |       NUM_CHANNELS:
 41 |       - 48
 42 |       - 96
 43 |       FUSE_METHOD: SUM
 44 |     STAGE3:
 45 |       NUM_MODULES: 4
 46 |       NUM_BRANCHES: 3
 47 |       BLOCK: BASIC
 48 |       NUM_BLOCKS:
 49 |       - 4
 50 |       - 4
 51 |       - 4
 52 |       NUM_CHANNELS:
 53 |       - 48
 54 |       - 96
 55 |       - 192
 56 |       FUSE_METHOD: SUM
 57 |     STAGE4:
 58 |       NUM_MODULES: 3
 59 |       NUM_BRANCHES: 4
 60 |       BLOCK: BASIC
 61 |       NUM_BLOCKS:
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       - 4
 66 |       NUM_CHANNELS:
 67 |       - 48
 68 |       - 96
 69 |       - 192
 70 |       - 384
 71 |       FUSE_METHOD: SUM
 72 | LOSS:
 73 |   USE_OHEM: false
 74 |   OHEMTHRES: 0.9
 75 |   OHEMKEEP: 131072
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 1024
 79 |   - 512
 80 |   BASE_SIZE: 2048
 81 |   BATCH_SIZE_PER_GPU: 3
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 484
 85 |   EXTRA_EPOCH: 484
 86 |   RESUME: true
 87 |   OPTIMIZER: sgd
 88 |   LR: 0.01
 89 |   EXTRA_LR: 0.001
 90 |   WD: 0.0005
 91 |   MOMENTUM: 0.9
 92 |   NESTEROV: false
 93 |   FLIP: true
 94 |   MULTI_SCALE: true
 95 |   DOWNSAMPLERATE: 1
 96 |   IGNORE_LABEL: 255
 97 |   SCALE_FACTOR: 16
 98 | TEST:
 99 |   IMAGE_SIZE:
100 |   - 2048
101 |   - 1024
102 |   BASE_SIZE: 2048
103 |   BATCH_SIZE_PER_GPU: 4
104 |   FLIP_TEST: false
105 |   MULTI_SCALE: false
106 | 


--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_trainval_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 100
 10 | 
 11 | DATASET:
 12 |   DATASET: cityscapes
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/cityscapes/val.lst'
 15 |   TRAIN_SET: 'list/cityscapes/train.lst'
 16 |   EXTRA_TRAIN_SET: 'list/cityscapes/trainval.lst'
 17 |   NUM_CLASSES: 19
 18 | MODEL:
 19 |   NAME: seg_hrnet
 20 |   ALIGN_CORNERS: False
 21 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 22 |   EXTRA:
 23 |     FINAL_CONV_KERNEL: 1
 24 |     STAGE1:
 25 |       NUM_MODULES: 1
 26 |       NUM_RANCHES: 1
 27 |       BLOCK: BOTTLENECK
 28 |       NUM_BLOCKS:
 29 |       - 4
 30 |       NUM_CHANNELS:
 31 |       - 64
 32 |       FUSE_METHOD: SUM
 33 |     STAGE2:
 34 |       NUM_MODULES: 1
 35 |       NUM_BRANCHES: 2
 36 |       BLOCK: BASIC
 37 |       NUM_BLOCKS:
 38 |       - 4
 39 |       - 4
 40 |       NUM_CHANNELS:
 41 |       - 48
 42 |       - 96
 43 |       FUSE_METHOD: SUM
 44 |     STAGE3:
 45 |       NUM_MODULES: 4
 46 |       NUM_BRANCHES: 3
 47 |       BLOCK: BASIC
 48 |       NUM_BLOCKS:
 49 |       - 4
 50 |       - 4
 51 |       - 4
 52 |       NUM_CHANNELS:
 53 |       - 48
 54 |       - 96
 55 |       - 192
 56 |       FUSE_METHOD: SUM
 57 |     STAGE4:
 58 |       NUM_MODULES: 3
 59 |       NUM_BRANCHES: 4
 60 |       BLOCK: BASIC
 61 |       NUM_BLOCKS:
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       - 4
 66 |       NUM_CHANNELS:
 67 |       - 48
 68 |       - 96
 69 |       - 192
 70 |       - 384
 71 |       FUSE_METHOD: SUM
 72 | LOSS:
 73 |   USE_OHEM: true
 74 |   OHEMTHRES: 0.9
 75 |   OHEMKEEP: 131072
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 1024
 79 |   - 512
 80 |   BASE_SIZE: 2048
 81 |   BATCH_SIZE_PER_GPU: 3
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 484
 85 |   EXTRA_EPOCH: 484
 86 |   RESUME: true
 87 |   OPTIMIZER: sgd
 88 |   LR: 0.01
 89 |   EXTRA_LR: 0.001
 90 |   WD: 0.0005
 91 |   MOMENTUM: 0.9
 92 |   NESTEROV: false
 93 |   FLIP: true
 94 |   MULTI_SCALE: true
 95 |   DOWNSAMPLERATE: 1
 96 |   IGNORE_LABEL: 255
 97 |   SCALE_FACTOR: 16
 98 | TEST:
 99 |   IMAGE_SIZE:
100 |   - 2048
101 |   - 1024
102 |   BASE_SIZE: 2048
103 |   BATCH_SIZE_PER_GPU: 4
104 |   FLIP_TEST: false
105 |   MULTI_SCALE: false
106 | 


--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: cocostuff
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/cocostuff/val.lst'
 15 |   TRAIN_SET: 'list/cocostuff/train.lst'
 16 |   NUM_CLASSES: 171
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: true
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 520
 79 |   - 520
 80 |   BASE_SIZE: 520
 81 |   BATCH_SIZE_PER_GPU: 4
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 110
 85 |   RESUME: true
 86 |   OPTIMIZER: sgd
 87 |   LR: 0.001
 88 |   WD: 0.0001
 89 |   NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr']
 90 |   NONBACKBONE_MULT: 10
 91 |   MOMENTUM: 0.9
 92 |   NESTEROV: false
 93 |   FLIP: true
 94 |   MULTI_SCALE: true
 95 |   DOWNSAMPLERATE: 1
 96 |   IGNORE_LABEL: 255
 97 |   SCALE_FACTOR: 16
 98 | TEST:
 99 |   IMAGE_SIZE:
100 |   - 520
101 |   - 520
102 |   BASE_SIZE: 520
103 |   BATCH_SIZE_PER_GPU: 1
104 |   NUM_SAMPLES: 200
105 |   FLIP_TEST: false
106 |   MULTI_SCALE: false
107 | 


--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3,4,5,6,7)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 8
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: cocostuff
 13 |   ROOT: '../../../../dataset/coco_stuff_10k/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 171
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: true
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 520
 79 |   - 520
 80 |   BASE_SIZE: 520
 81 |   BATCH_SIZE_PER_GPU: 2
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 110
 85 |   RESUME: true
 86 |   OPTIMIZER: sgd
 87 |   LR: 0.001
 88 |   WD: 0.0001
 89 |   NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr']
 90 |   NONBACKBONE_MULT: 10
 91 |   MOMENTUM: 0.9
 92 |   NESTEROV: false
 93 |   FLIP: true
 94 |   MULTI_SCALE: true
 95 |   DOWNSAMPLERATE: 1
 96 |   IGNORE_LABEL: 255
 97 |   SCALE_FACTOR: 16
 98 | TEST:
 99 |   IMAGE_SIZE:
100 |   - 520
101 |   - 520
102 |   BASE_SIZE: 520
103 |   BATCH_SIZE_PER_GPU: 1
104 |   NUM_SAMPLES: 200
105 |   FLIP_TEST: false
106 |   MULTI_SCALE: false
107 | 


--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: cocostuff
 13 |   ROOT: '../../../../dataset/coco_stuff_10k/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 171
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   NUM_OUTPUTS: 1
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: true
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 520
 78 |   - 520
 79 |   BASE_SIZE: 520
 80 |   BATCH_SIZE_PER_GPU: 4
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 110
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.001
 87 |   WD: 0.0001
 88 |   NONBACKBONE_KEYWORDS: ['last_layer']
 89 |   NONBACKBONE_MULT: 10
 90 |   MOMENTUM: 0.9
 91 |   NESTEROV: false
 92 |   FLIP: true
 93 |   MULTI_SCALE: true
 94 |   DOWNSAMPLERATE: 1
 95 |   IGNORE_LABEL: 255
 96 |   SCALE_FACTOR: 16
 97 | TEST:
 98 |   IMAGE_SIZE:
 99 |   - 520
100 |   - 520
101 |   BASE_SIZE: 520
102 |   BATCH_SIZE_PER_GPU: 1
103 |   NUM_SAMPLES: 200
104 |   FLIP_TEST: false
105 |   MULTI_SCALE: false
106 | 


--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3,4,5,6,7)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 8
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: cocostuff
 13 |   ROOT: '../../../../dataset/coco_stuff_10k/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 171
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   NUM_OUTPUTS: 1
 20 |   PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: true
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 520
 78 |   - 520
 79 |   BASE_SIZE: 520
 80 |   BATCH_SIZE_PER_GPU: 2
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 110
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.001
 87 |   WD: 0.0001
 88 |   NONBACKBONE_KEYWORDS: ['last_layer']
 89 |   NONBACKBONE_MULT: 10
 90 |   MOMENTUM: 0.9
 91 |   NESTEROV: false
 92 |   FLIP: true
 93 |   MULTI_SCALE: true
 94 |   DOWNSAMPLERATE: 1
 95 |   IGNORE_LABEL: 255
 96 |   SCALE_FACTOR: 16
 97 | TEST:
 98 |   IMAGE_SIZE:
 99 |   - 520
100 |   - 520
101 |   BASE_SIZE: 520
102 |   BATCH_SIZE_PER_GPU: 1
103 |   NUM_SAMPLES: 200
104 |   FLIP_TEST: false
105 |   MULTI_SCALE: false
106 | 


--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_w48_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: cocostuff
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/cocostuff/val.lst'
 15 |   TRAIN_SET: 'list/cocostuff/train.lst'
 16 |   NUM_CLASSES: 171
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   NUM_OUTPUTS: 1
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 520
 78 |   - 520
 79 |   BASE_SIZE: 520
 80 |   BATCH_SIZE_PER_GPU: 4
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 110
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.001
 87 |   WD: 0.0001
 88 |   NONBACKBONE_KEYWORDS: ['last_layer']
 89 |   NONBACKBONE_MULT: 10
 90 |   MOMENTUM: 0.9
 91 |   NESTEROV: false
 92 |   FLIP: true
 93 |   MULTI_SCALE: true
 94 |   DOWNSAMPLERATE: 1
 95 |   IGNORE_LABEL: 255
 96 |   SCALE_FACTOR: 16
 97 | TEST:
 98 |   IMAGE_SIZE:
 99 |   - 520
100 |   - 520
101 |   BASE_SIZE: 520
102 |   BATCH_SIZE_PER_GPU: 1
103 |   NUM_SAMPLES: 200
104 |   FLIP_TEST: false
105 |   MULTI_SCALE: false
106 | 


--------------------------------------------------------------------------------
/experiments/lip/seg_hrnet_ocr_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: lip
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/lip/valList.txt'
 15 |   TRAIN_SET: 'list/lip/trainList.txt'
 16 |   NUM_CLASSES: 20
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained_2.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 473
 79 |   - 473
 80 |   BASE_SIZE: 473
 81 |   BATCH_SIZE_PER_GPU: 10
 82 |   SHUFFLE: true
 83 |   BEGIN_EPOCH: 0
 84 |   END_EPOCH: 150
 85 |   RESUME: true
 86 |   OPTIMIZER: sgd
 87 |   LR: 0.007
 88 |   WD: 0.0005
 89 |   MOMENTUM: 0.9
 90 |   NESTEROV: false
 91 |   FLIP: true
 92 |   MULTI_SCALE: true
 93 |   DOWNSAMPLERATE: 1
 94 |   IGNORE_LABEL: 255
 95 |   SCALE_FACTOR: 11
 96 | TEST:
 97 |   IMAGE_SIZE:
 98 |   - 473
 99 |   - 473
100 |   BASE_SIZE: 473
101 |   BATCH_SIZE_PER_GPU: 10
102 |   NUM_SAMPLES: 2000
103 |   FLIP_TEST: false
104 |   MULTI_SCALE: false
105 | 


--------------------------------------------------------------------------------
/experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 100
 10 | 
 11 | DATASET:
 12 |   DATASET: lip
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'list/lip/valList.txt'
 15 |   TRAIN_SET: 'list/lip/trainList.txt'
 16 |   NUM_CLASSES: 20
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   ALIGN_CORNERS: False
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 473
 78 |   - 473
 79 |   BASE_SIZE: 473
 80 |   BATCH_SIZE_PER_GPU: 10
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 150
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.007
 87 |   WD: 0.0005
 88 |   MOMENTUM: 0.9
 89 |   NESTEROV: false
 90 |   FLIP: true
 91 |   MULTI_SCALE: true
 92 |   DOWNSAMPLERATE: 1
 93 |   IGNORE_LABEL: 255
 94 |   SCALE_FACTOR: 11
 95 | TEST:
 96 |   IMAGE_SIZE:
 97 |   - 473
 98 |   - 473
 99 |   BASE_SIZE: 473
100 |   BATCH_SIZE_PER_GPU: 16
101 |   NUM_SAMPLES: 2000
102 |   FLIP_TEST: false
103 |   MULTI_SCALE: false
104 | 


--------------------------------------------------------------------------------
/experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150_paddle.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 8
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: lip
 13 |   ROOT: '../../../../dataset/lip/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 20
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   ALIGN_CORNERS: False
 20 |   PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 | TRAIN:
 76 |   IMAGE_SIZE:
 77 |   - 473
 78 |   - 473
 79 |   BASE_SIZE: 473
 80 |   BATCH_SIZE_PER_GPU: 10
 81 |   SHUFFLE: true
 82 |   BEGIN_EPOCH: 0
 83 |   END_EPOCH: 150
 84 |   RESUME: true
 85 |   OPTIMIZER: sgd
 86 |   LR: 0.007
 87 |   WD: 0.0005
 88 |   MOMENTUM: 0.9
 89 |   NESTEROV: false
 90 |   FLIP: true
 91 |   MULTI_SCALE: true
 92 |   DOWNSAMPLERATE: 1
 93 |   IGNORE_LABEL: 255
 94 |   SCALE_FACTOR: 11
 95 | TEST:
 96 |   IMAGE_SIZE:
 97 |   - 473
 98 |   - 473
 99 |   BASE_SIZE: 473
100 |   BATCH_SIZE_PER_GPU: 8
101 |   NUM_SAMPLES: 2000
102 |   FLIP_TEST: false
103 |   MULTI_SCALE: false
104 | 


--------------------------------------------------------------------------------
/experiments/pascal_ctx/seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: pascal_ctx
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'val'
 15 |   TRAIN_SET: 'train'
 16 |   NUM_CLASSES: 59
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 520
 79 |   - 520
 80 |   BASE_SIZE: 520
 81 |   BATCH_SIZE_PER_GPU: 4
 82 |   NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr']
 83 |   NONBACKBONE_MULT: 10  
 84 |   SHUFFLE: true
 85 |   BEGIN_EPOCH: 0
 86 |   END_EPOCH: 200
 87 |   RESUME: true
 88 |   OPTIMIZER: sgd
 89 |   LR: 0.001
 90 |   WD: 0.0001
 91 |   MOMENTUM: 0.9
 92 |   NESTEROV: false
 93 |   FLIP: true
 94 |   MULTI_SCALE: true
 95 |   DOWNSAMPLERATE: 1
 96 |   IGNORE_LABEL: -1
 97 |   SCALE_FACTOR: 16
 98 | TEST:
 99 |   IMAGE_SIZE:
100 |   - 520
101 |   - 520
102 |   BASE_SIZE: 520
103 |   BATCH_SIZE_PER_GPU: 16
104 |   FLIP_TEST: false
105 |   MULTI_SCALE: false
106 | 


--------------------------------------------------------------------------------
/experiments/pascal_ctx/seg_hrnet_ocr_w48_cls60_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: pascal_ctx
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'val'
 15 |   TRAIN_SET: 'train'
 16 |   NUM_CLASSES: 60
 17 | MODEL:
 18 |   NAME: seg_hrnet_ocr
 19 |   NUM_OUTPUTS: 2
 20 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 21 |   EXTRA:
 22 |     FINAL_CONV_KERNEL: 1
 23 |     STAGE1:
 24 |       NUM_MODULES: 1
 25 |       NUM_RANCHES: 1
 26 |       BLOCK: BOTTLENECK
 27 |       NUM_BLOCKS:
 28 |       - 4
 29 |       NUM_CHANNELS:
 30 |       - 64
 31 |       FUSE_METHOD: SUM
 32 |     STAGE2:
 33 |       NUM_MODULES: 1
 34 |       NUM_BRANCHES: 2
 35 |       BLOCK: BASIC
 36 |       NUM_BLOCKS:
 37 |       - 4
 38 |       - 4
 39 |       NUM_CHANNELS:
 40 |       - 48
 41 |       - 96
 42 |       FUSE_METHOD: SUM
 43 |     STAGE3:
 44 |       NUM_MODULES: 4
 45 |       NUM_BRANCHES: 3
 46 |       BLOCK: BASIC
 47 |       NUM_BLOCKS:
 48 |       - 4
 49 |       - 4
 50 |       - 4
 51 |       NUM_CHANNELS:
 52 |       - 48
 53 |       - 96
 54 |       - 192
 55 |       FUSE_METHOD: SUM
 56 |     STAGE4:
 57 |       NUM_MODULES: 3
 58 |       NUM_BRANCHES: 4
 59 |       BLOCK: BASIC
 60 |       NUM_BLOCKS:
 61 |       - 4
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       NUM_CHANNELS:
 66 |       - 48
 67 |       - 96
 68 |       - 192
 69 |       - 384
 70 |       FUSE_METHOD: SUM
 71 | LOSS:
 72 |   USE_OHEM: false
 73 |   OHEMTHRES: 0.9
 74 |   OHEMKEEP: 131072
 75 |   BALANCE_WEIGHTS: [0.4, 1]
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 520
 79 |   - 520
 80 |   BASE_SIZE: 520
 81 |   BATCH_SIZE_PER_GPU: 4
 82 |   NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr']
 83 |   NONBACKBONE_MULT: 10  
 84 |   SHUFFLE: true
 85 |   BEGIN_EPOCH: 0
 86 |   END_EPOCH: 200
 87 |   RESUME: true
 88 |   OPTIMIZER: sgd
 89 |   LR: 0.001
 90 |   WD: 0.0001
 91 |   MOMENTUM: 0.9
 92 |   NESTEROV: false
 93 |   FLIP: true
 94 |   MULTI_SCALE: true
 95 |   DOWNSAMPLERATE: 1
 96 |   IGNORE_LABEL: -1
 97 |   SCALE_FACTOR: 16
 98 | TEST:
 99 |   IMAGE_SIZE:
100 |   - 520
101 |   - 520
102 |   BASE_SIZE: 520
103 |   BATCH_SIZE_PER_GPU: 16
104 |   FLIP_TEST: false
105 |   MULTI_SCALE: false
106 | 


--------------------------------------------------------------------------------
/experiments/pascal_ctx/seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: pascal_ctx
 13 |   ROOT: 'data/'
 14 |   TEST_SET: 'val'
 15 |   TRAIN_SET: 'train'
 16 |   NUM_CLASSES: 59
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   ALIGN_CORNERS: False
 20 |   NUM_OUTPUTS: 1
 21 |   PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
 22 |   EXTRA:
 23 |     FINAL_CONV_KERNEL: 1
 24 |     STAGE1:
 25 |       NUM_MODULES: 1
 26 |       NUM_RANCHES: 1
 27 |       BLOCK: BOTTLENECK
 28 |       NUM_BLOCKS:
 29 |       - 4
 30 |       NUM_CHANNELS:
 31 |       - 64
 32 |       FUSE_METHOD: SUM
 33 |     STAGE2:
 34 |       NUM_MODULES: 1
 35 |       NUM_BRANCHES: 2
 36 |       BLOCK: BASIC
 37 |       NUM_BLOCKS:
 38 |       - 4
 39 |       - 4
 40 |       NUM_CHANNELS:
 41 |       - 48
 42 |       - 96
 43 |       FUSE_METHOD: SUM
 44 |     STAGE3:
 45 |       NUM_MODULES: 4
 46 |       NUM_BRANCHES: 3
 47 |       BLOCK: BASIC
 48 |       NUM_BLOCKS:
 49 |       - 4
 50 |       - 4
 51 |       - 4
 52 |       NUM_CHANNELS:
 53 |       - 48
 54 |       - 96
 55 |       - 192
 56 |       FUSE_METHOD: SUM
 57 |     STAGE4:
 58 |       NUM_MODULES: 3
 59 |       NUM_BRANCHES: 4
 60 |       BLOCK: BASIC
 61 |       NUM_BLOCKS:
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       - 4
 66 |       NUM_CHANNELS:
 67 |       - 48
 68 |       - 96
 69 |       - 192
 70 |       - 384
 71 |       FUSE_METHOD: SUM
 72 | LOSS:
 73 |   USE_OHEM: false
 74 |   OHEMTHRES: 0.9
 75 |   OHEMKEEP: 131072
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 520
 79 |   - 520
 80 |   BASE_SIZE: 520
 81 |   BATCH_SIZE_PER_GPU: 4
 82 |   NONBACKBONE_KEYWORDS: ['last_layer']
 83 |   NONBACKBONE_MULT: 10  
 84 |   SHUFFLE: true
 85 |   BEGIN_EPOCH: 0
 86 |   END_EPOCH: 200
 87 |   RESUME: true
 88 |   OPTIMIZER: sgd
 89 |   LR: 0.001
 90 |   WD: 0.0001
 91 |   MOMENTUM: 0.9
 92 |   NESTEROV: false
 93 |   FLIP: true
 94 |   MULTI_SCALE: true
 95 |   DOWNSAMPLERATE: 1
 96 |   IGNORE_LABEL: -1
 97 |   SCALE_FACTOR: 16
 98 | TEST:
 99 |   IMAGE_SIZE:
100 |   - 520
101 |   - 520
102 |   BASE_SIZE: 520
103 |   BATCH_SIZE_PER_GPU: 16
104 |   FLIP_TEST: false
105 |   MULTI_SCALE: false
106 | 


--------------------------------------------------------------------------------
/experiments/pascal_ctx/seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle.yaml:
--------------------------------------------------------------------------------
  1 | CUDNN:
  2 |   BENCHMARK: true
  3 |   DETERMINISTIC: false
  4 |   ENABLED: true
  5 | GPUS: (0,1,2,3)
  6 | OUTPUT_DIR: 'output'
  7 | LOG_DIR: 'log'
  8 | WORKERS: 4
  9 | PRINT_FREQ: 10
 10 | 
 11 | DATASET:
 12 |   DATASET: pascal_ctx
 13 |   ROOT: '../../../../dataset/pascal_context/'
 14 |   TEST_SET: 'val.lst'
 15 |   TRAIN_SET: 'train.lst'
 16 |   NUM_CLASSES: 59
 17 | MODEL:
 18 |   NAME: seg_hrnet
 19 |   ALIGN_CORNERS: False
 20 |   NUM_OUTPUTS: 1
 21 |   PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
 22 |   EXTRA:
 23 |     FINAL_CONV_KERNEL: 1
 24 |     STAGE1:
 25 |       NUM_MODULES: 1
 26 |       NUM_RANCHES: 1
 27 |       BLOCK: BOTTLENECK
 28 |       NUM_BLOCKS:
 29 |       - 4
 30 |       NUM_CHANNELS:
 31 |       - 64
 32 |       FUSE_METHOD: SUM
 33 |     STAGE2:
 34 |       NUM_MODULES: 1
 35 |       NUM_BRANCHES: 2
 36 |       BLOCK: BASIC
 37 |       NUM_BLOCKS:
 38 |       - 4
 39 |       - 4
 40 |       NUM_CHANNELS:
 41 |       - 48
 42 |       - 96
 43 |       FUSE_METHOD: SUM
 44 |     STAGE3:
 45 |       NUM_MODULES: 4
 46 |       NUM_BRANCHES: 3
 47 |       BLOCK: BASIC
 48 |       NUM_BLOCKS:
 49 |       - 4
 50 |       - 4
 51 |       - 4
 52 |       NUM_CHANNELS:
 53 |       - 48
 54 |       - 96
 55 |       - 192
 56 |       FUSE_METHOD: SUM
 57 |     STAGE4:
 58 |       NUM_MODULES: 3
 59 |       NUM_BRANCHES: 4
 60 |       BLOCK: BASIC
 61 |       NUM_BLOCKS:
 62 |       - 4
 63 |       - 4
 64 |       - 4
 65 |       - 4
 66 |       NUM_CHANNELS:
 67 |       - 48
 68 |       - 96
 69 |       - 192
 70 |       - 384
 71 |       FUSE_METHOD: SUM
 72 | LOSS:
 73 |   USE_OHEM: false
 74 |   OHEMTHRES: 0.9
 75 |   OHEMKEEP: 131072
 76 | TRAIN:
 77 |   IMAGE_SIZE:
 78 |   - 520
 79 |   - 520
 80 |   BASE_SIZE: 520
 81 |   BATCH_SIZE_PER_GPU: 4
 82 |   NONBACKBONE_KEYWORDS: ['last_layer']
 83 |   NONBACKBONE_MULT: 10  
 84 |   SHUFFLE: true
 85 |   BEGIN_EPOCH: 0
 86 |   END_EPOCH: 200
 87 |   RESUME: true
 88 |   OPTIMIZER: sgd
 89 |   LR: 0.001
 90 |   WD: 0.0001
 91 |   MOMENTUM: 0.9
 92 |   NESTEROV: false
 93 |   FLIP: true
 94 |   MULTI_SCALE: true
 95 |   DOWNSAMPLERATE: 1
 96 |   IGNORE_LABEL: -1
 97 |   SCALE_FACTOR: 16
 98 | TEST:
 99 |   IMAGE_SIZE:
100 |   - 520
101 |   - 520
102 |   BASE_SIZE: 520
103 |   BATCH_SIZE_PER_GPU: 16
104 |   FLIP_TEST: false
105 |   MULTI_SCALE: false
106 | 


--------------------------------------------------------------------------------
/figures/OCR.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/OCR.PNG


--------------------------------------------------------------------------------
/figures/SegmentationTransformerOCR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/SegmentationTransformerOCR.png


--------------------------------------------------------------------------------
/figures/SegmentationTransformerOCR1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/SegmentationTransformerOCR1.png


--------------------------------------------------------------------------------
/figures/SegmentationTransformerOCR2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/SegmentationTransformerOCR2.png


--------------------------------------------------------------------------------
/figures/seg-hrnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/seg-hrnet.png


--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
 1 | """File for accessing HRNet via PyTorch Hub https://pytorch.org/hub/
 2 | 
 3 | Usage:
 4 |     import torch
 5 |     model = torch.hub.load('AlexeyAB/PyTorch_YOLOv4:u5_preview', 'yolov4_pacsp_s', pretrained=True, channels=3, classes=80)
 6 | """
 7 | 
 8 | dependencies = ['torch']
 9 | import torch
10 | from lib.models.seg_hrnet import get_seg_model
11 | 
12 | 
13 | state_dict_url = 'https://github.com/huawei-noah/ghostnet/raw/master/pytorch/models/state_dict_93.98.pth'
14 | 
15 | 
16 | def hrnet_w48_cityscapes(pretrained=False, **kwargs):
17 | 	  """ # This docstring shows up in hub.help()
18 |     HRNetW48 model pretrained on Cityscapes
19 |     pretrained (bool): kwargs, load pretrained weights into the model
20 |     """
21 | 	  model = ghostnet(num_classes=1000, width=1.0, dropout=0.2)
22 | 	  if pretrained:
23 | 	  	  state_dict = torch.hub.load_state_dict_from_url(state_dict_url, progress=True)
24 | 	  	  model.load_state_dict(state_dict)
25 | 	  return model


--------------------------------------------------------------------------------
/lib/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
 5 | # ------------------------------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | from .default import _C as config
11 | from .default import update_config
12 | from .models import MODEL_EXTRAS
13 | 


--------------------------------------------------------------------------------
/lib/config/default.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # ------------------------------------------------------------------------------
  3 | # Copyright (c) Microsoft
  4 | # Licensed under the MIT License.
  5 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import os
 13 | 
 14 | from yacs.config import CfgNode as CN
 15 | 
 16 | 
 17 | _C = CN()
 18 | 
 19 | _C.OUTPUT_DIR = ''
 20 | _C.LOG_DIR = ''
 21 | _C.GPUS = (0,)
 22 | _C.WORKERS = 4
 23 | _C.PRINT_FREQ = 20
 24 | _C.AUTO_RESUME = False
 25 | _C.PIN_MEMORY = True
 26 | _C.RANK = 0
 27 | 
 28 | # Cudnn related params
 29 | _C.CUDNN = CN()
 30 | _C.CUDNN.BENCHMARK = True
 31 | _C.CUDNN.DETERMINISTIC = False
 32 | _C.CUDNN.ENABLED = True
 33 | 
 34 | # common params for NETWORK
 35 | _C.MODEL = CN()
 36 | _C.MODEL.NAME = 'seg_hrnet'
 37 | _C.MODEL.PRETRAINED = ''
 38 | _C.MODEL.ALIGN_CORNERS = True
 39 | _C.MODEL.NUM_OUTPUTS = 1
 40 | _C.MODEL.EXTRA = CN(new_allowed=True)
 41 | 
 42 | 
 43 | _C.MODEL.OCR = CN()
 44 | _C.MODEL.OCR.MID_CHANNELS = 512
 45 | _C.MODEL.OCR.KEY_CHANNELS = 256
 46 | _C.MODEL.OCR.DROPOUT = 0.05
 47 | _C.MODEL.OCR.SCALE = 1
 48 | 
 49 | _C.LOSS = CN()
 50 | _C.LOSS.USE_OHEM = False
 51 | _C.LOSS.OHEMTHRES = 0.9
 52 | _C.LOSS.OHEMKEEP = 100000
 53 | _C.LOSS.CLASS_BALANCE = False
 54 | _C.LOSS.BALANCE_WEIGHTS = [1]
 55 | 
 56 | # DATASET related params
 57 | _C.DATASET = CN()
 58 | _C.DATASET.ROOT = ''
 59 | _C.DATASET.DATASET = 'cityscapes'
 60 | _C.DATASET.NUM_CLASSES = 19
 61 | _C.DATASET.TRAIN_SET = 'list/cityscapes/train.lst'
 62 | _C.DATASET.EXTRA_TRAIN_SET = ''
 63 | _C.DATASET.TEST_SET = 'list/cityscapes/val.lst'
 64 | 
 65 | # training
 66 | _C.TRAIN = CN()
 67 | 
 68 | _C.TRAIN.FREEZE_LAYERS = ''
 69 | _C.TRAIN.FREEZE_EPOCHS = -1
 70 | _C.TRAIN.NONBACKBONE_KEYWORDS = []
 71 | _C.TRAIN.NONBACKBONE_MULT = 10
 72 | 
 73 | _C.TRAIN.IMAGE_SIZE = [1024, 512]  # width * height
 74 | _C.TRAIN.BASE_SIZE = 2048
 75 | _C.TRAIN.DOWNSAMPLERATE = 1
 76 | _C.TRAIN.FLIP = True
 77 | _C.TRAIN.MULTI_SCALE = True
 78 | _C.TRAIN.SCALE_FACTOR = 16
 79 | 
 80 | _C.TRAIN.RANDOM_BRIGHTNESS = False
 81 | _C.TRAIN.RANDOM_BRIGHTNESS_SHIFT_VALUE = 10
 82 | 
 83 | _C.TRAIN.LR_FACTOR = 0.1
 84 | _C.TRAIN.LR_STEP = [90, 110]
 85 | _C.TRAIN.LR = 0.01
 86 | _C.TRAIN.EXTRA_LR = 0.001
 87 | 
 88 | _C.TRAIN.OPTIMIZER = 'sgd'
 89 | _C.TRAIN.MOMENTUM = 0.9
 90 | _C.TRAIN.WD = 0.0001
 91 | _C.TRAIN.NESTEROV = False
 92 | _C.TRAIN.IGNORE_LABEL = -1
 93 | 
 94 | _C.TRAIN.BEGIN_EPOCH = 0
 95 | _C.TRAIN.END_EPOCH = 484
 96 | _C.TRAIN.EXTRA_EPOCH = 0
 97 | 
 98 | _C.TRAIN.RESUME = False
 99 | 
100 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32
101 | _C.TRAIN.SHUFFLE = True
102 | # only using some training samples
103 | _C.TRAIN.NUM_SAMPLES = 0
104 | 
105 | # testing
106 | _C.TEST = CN()
107 | 
108 | _C.TEST.IMAGE_SIZE = [2048, 1024]  # width * height
109 | _C.TEST.BASE_SIZE = 2048
110 | 
111 | _C.TEST.BATCH_SIZE_PER_GPU = 32
112 | # only testing some samples
113 | _C.TEST.NUM_SAMPLES = 0
114 | 
115 | _C.TEST.MODEL_FILE = ''
116 | _C.TEST.FLIP_TEST = False
117 | _C.TEST.MULTI_SCALE = False
118 | _C.TEST.SCALE_LIST = [1]
119 | 
120 | _C.TEST.OUTPUT_INDEX = -1
121 | 
122 | # debug
123 | _C.DEBUG = CN()
124 | _C.DEBUG.DEBUG = False
125 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False
126 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False
127 | _C.DEBUG.SAVE_HEATMAPS_GT = False
128 | _C.DEBUG.SAVE_HEATMAPS_PRED = False
129 | 
130 | 
131 | def update_config(cfg, args):
132 |     cfg.defrost()
133 |     
134 |     cfg.merge_from_file(args.cfg)
135 |     cfg.merge_from_list(args.opts)
136 | 
137 |     cfg.freeze()
138 | 
139 | 
140 | if __name__ == '__main__':
141 |     import sys
142 |     with open(sys.argv[1], 'w') as f:
143 |         print(_C, file=f)
144 | 
145 | 


--------------------------------------------------------------------------------
/lib/config/hrnet_config.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Create by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn), Rainbowsecret (yuyua@microsoft.com)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | from yacs.config import CfgNode as CN
 13 | 
 14 | 
 15 | # configs for HRNet48
 16 | HRNET_48 = CN()
 17 | HRNET_48.FINAL_CONV_KERNEL = 1
 18 | 
 19 | HRNET_48.STAGE1 = CN()
 20 | HRNET_48.STAGE1.NUM_MODULES = 1
 21 | HRNET_48.STAGE1.NUM_BRANCHES = 1
 22 | HRNET_48.STAGE1.NUM_BLOCKS = [4]
 23 | HRNET_48.STAGE1.NUM_CHANNELS = [64]
 24 | HRNET_48.STAGE1.BLOCK = 'BOTTLENECK'
 25 | HRNET_48.STAGE1.FUSE_METHOD = 'SUM'
 26 | 
 27 | HRNET_48.STAGE2 = CN()
 28 | HRNET_48.STAGE2.NUM_MODULES = 1
 29 | HRNET_48.STAGE2.NUM_BRANCHES = 2
 30 | HRNET_48.STAGE2.NUM_BLOCKS = [4, 4]
 31 | HRNET_48.STAGE2.NUM_CHANNELS = [48, 96]
 32 | HRNET_48.STAGE2.BLOCK = 'BASIC'
 33 | HRNET_48.STAGE2.FUSE_METHOD = 'SUM'
 34 | 
 35 | HRNET_48.STAGE3 = CN()
 36 | HRNET_48.STAGE3.NUM_MODULES = 4
 37 | HRNET_48.STAGE3.NUM_BRANCHES = 3
 38 | HRNET_48.STAGE3.NUM_BLOCKS = [4, 4, 4]
 39 | HRNET_48.STAGE3.NUM_CHANNELS = [48, 96, 192]
 40 | HRNET_48.STAGE3.BLOCK = 'BASIC'
 41 | HRNET_48.STAGE3.FUSE_METHOD = 'SUM'
 42 | 
 43 | HRNET_48.STAGE4 = CN()
 44 | HRNET_48.STAGE4.NUM_MODULES = 3
 45 | HRNET_48.STAGE4.NUM_BRANCHES = 4
 46 | HRNET_48.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
 47 | HRNET_48.STAGE4.NUM_CHANNELS = [48, 96, 192, 384]
 48 | HRNET_48.STAGE4.BLOCK = 'BASIC'
 49 | HRNET_48.STAGE4.FUSE_METHOD = 'SUM'
 50 | 
 51 | 
 52 | # configs for HRNet32
 53 | HRNET_32 = CN()
 54 | HRNET_32.FINAL_CONV_KERNEL = 1
 55 | 
 56 | HRNET_32.STAGE1 = CN()
 57 | HRNET_32.STAGE1.NUM_MODULES = 1
 58 | HRNET_32.STAGE1.NUM_BRANCHES = 1
 59 | HRNET_32.STAGE1.NUM_BLOCKS = [4]
 60 | HRNET_32.STAGE1.NUM_CHANNELS = [64]
 61 | HRNET_32.STAGE1.BLOCK = 'BOTTLENECK'
 62 | HRNET_32.STAGE1.FUSE_METHOD = 'SUM'
 63 | 
 64 | HRNET_32.STAGE2 = CN()
 65 | HRNET_32.STAGE2.NUM_MODULES = 1
 66 | HRNET_32.STAGE2.NUM_BRANCHES = 2
 67 | HRNET_32.STAGE2.NUM_BLOCKS = [4, 4]
 68 | HRNET_32.STAGE2.NUM_CHANNELS = [32, 64]
 69 | HRNET_32.STAGE2.BLOCK = 'BASIC'
 70 | HRNET_32.STAGE2.FUSE_METHOD = 'SUM'
 71 | 
 72 | HRNET_32.STAGE3 = CN()
 73 | HRNET_32.STAGE3.NUM_MODULES = 4
 74 | HRNET_32.STAGE3.NUM_BRANCHES = 3
 75 | HRNET_32.STAGE3.NUM_BLOCKS = [4, 4, 4]
 76 | HRNET_32.STAGE3.NUM_CHANNELS = [32, 64, 128]
 77 | HRNET_32.STAGE3.BLOCK = 'BASIC'
 78 | HRNET_32.STAGE3.FUSE_METHOD = 'SUM'
 79 | 
 80 | HRNET_32.STAGE4 = CN()
 81 | HRNET_32.STAGE4.NUM_MODULES = 3
 82 | HRNET_32.STAGE4.NUM_BRANCHES = 4
 83 | HRNET_32.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
 84 | HRNET_32.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
 85 | HRNET_32.STAGE4.BLOCK = 'BASIC'
 86 | HRNET_32.STAGE4.FUSE_METHOD = 'SUM'
 87 | 
 88 | 
 89 | # configs for HRNet18
 90 | HRNET_18 = CN()
 91 | HRNET_18.FINAL_CONV_KERNEL = 1
 92 | 
 93 | HRNET_18.STAGE1 = CN()
 94 | HRNET_18.STAGE1.NUM_MODULES = 1
 95 | HRNET_18.STAGE1.NUM_BRANCHES = 1
 96 | HRNET_18.STAGE1.NUM_BLOCKS = [4]
 97 | HRNET_18.STAGE1.NUM_CHANNELS = [64]
 98 | HRNET_18.STAGE1.BLOCK = 'BOTTLENECK'
 99 | HRNET_18.STAGE1.FUSE_METHOD = 'SUM'
100 | 
101 | HRNET_18.STAGE2 = CN()
102 | HRNET_18.STAGE2.NUM_MODULES = 1
103 | HRNET_18.STAGE2.NUM_BRANCHES = 2
104 | HRNET_18.STAGE2.NUM_BLOCKS = [4, 4]
105 | HRNET_18.STAGE2.NUM_CHANNELS = [18, 36]
106 | HRNET_18.STAGE2.BLOCK = 'BASIC'
107 | HRNET_18.STAGE2.FUSE_METHOD = 'SUM'
108 | 
109 | HRNET_18.STAGE3 = CN()
110 | HRNET_18.STAGE3.NUM_MODULES = 4
111 | HRNET_18.STAGE3.NUM_BRANCHES = 3
112 | HRNET_18.STAGE3.NUM_BLOCKS = [4, 4, 4]
113 | HRNET_18.STAGE3.NUM_CHANNELS = [18, 36, 72]
114 | HRNET_18.STAGE3.BLOCK = 'BASIC'
115 | HRNET_18.STAGE3.FUSE_METHOD = 'SUM'
116 | 
117 | HRNET_18.STAGE4 = CN()
118 | HRNET_18.STAGE4.NUM_MODULES = 3
119 | HRNET_18.STAGE4.NUM_BRANCHES = 4
120 | HRNET_18.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
121 | HRNET_18.STAGE4.NUM_CHANNELS = [18, 36, 72, 144]
122 | HRNET_18.STAGE4.BLOCK = 'BASIC'
123 | HRNET_18.STAGE4.FUSE_METHOD = 'SUM'
124 | 
125 | 
126 | MODEL_CONFIGS = {
127 |     'hrnet18': HRNET_18,
128 |     'hrnet32': HRNET_32,
129 |     'hrnet48': HRNET_48,
130 | }


--------------------------------------------------------------------------------
/lib/config/models.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from yacs.config import CfgNode as CN
12 | 
13 | # high_resoluton_net related params for segmentation
14 | HIGH_RESOLUTION_NET = CN()
15 | HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*']
16 | HIGH_RESOLUTION_NET.STEM_INPLANES = 64
17 | HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1
18 | HIGH_RESOLUTION_NET.WITH_HEAD = True
19 | 
20 | HIGH_RESOLUTION_NET.STAGE2 = CN()
21 | HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1
22 | HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2
23 | HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4]
24 | HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64]
25 | HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC'
26 | HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM'
27 | 
28 | HIGH_RESOLUTION_NET.STAGE3 = CN()
29 | HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1
30 | HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3
31 | HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4]
32 | HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128]
33 | HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC'
34 | HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM'
35 | 
36 | HIGH_RESOLUTION_NET.STAGE4 = CN()
37 | HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1
38 | HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4
39 | HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
40 | HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
41 | HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC'
42 | HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM'
43 | 
44 | MODEL_EXTRAS = {
45 |     'seg_hrnet': HIGH_RESOLUTION_NET,
46 | }
47 | 


--------------------------------------------------------------------------------
/lib/core/criterion.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | from torch.nn import functional as F
 10 | import logging
 11 | from config import config
 12 | 
 13 | 
 14 | class CrossEntropy(nn.Module):
 15 |     def __init__(self, ignore_label=-1, weight=None):
 16 |         super(CrossEntropy, self).__init__()
 17 |         self.ignore_label = ignore_label
 18 |         self.criterion = nn.CrossEntropyLoss(
 19 |             weight=weight,
 20 |             ignore_index=ignore_label
 21 |         )
 22 | 
 23 |     def _forward(self, score, target):
 24 |         ph, pw = score.size(2), score.size(3)
 25 |         h, w = target.size(1), target.size(2)
 26 |         if ph != h or pw != w:
 27 |             score = F.interpolate(input=score, size=(
 28 |                 h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS)
 29 | 
 30 |         loss = self.criterion(score, target)
 31 | 
 32 |         return loss
 33 | 
 34 |     def forward(self, score, target):
 35 | 
 36 |         if config.MODEL.NUM_OUTPUTS == 1:
 37 |             score = [score]
 38 | 
 39 |         weights = config.LOSS.BALANCE_WEIGHTS
 40 |         assert len(weights) == len(score)
 41 | 
 42 |         return sum([w * self._forward(x, target) for (w, x) in zip(weights, score)])
 43 | 
 44 | 
 45 | class OhemCrossEntropy(nn.Module):
 46 |     def __init__(self, ignore_label=-1, thres=0.7,
 47 |                  min_kept=100000, weight=None):
 48 |         super(OhemCrossEntropy, self).__init__()
 49 |         self.thresh = thres
 50 |         self.min_kept = max(1, min_kept)
 51 |         self.ignore_label = ignore_label
 52 |         self.criterion = nn.CrossEntropyLoss(
 53 |             weight=weight,
 54 |             ignore_index=ignore_label,
 55 |             reduction='none'
 56 |         )
 57 | 
 58 |     def _ce_forward(self, score, target):
 59 |         ph, pw = score.size(2), score.size(3)
 60 |         h, w = target.size(1), target.size(2)
 61 |         if ph != h or pw != w:
 62 |             score = F.interpolate(input=score, size=(
 63 |                 h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS)
 64 | 
 65 |         loss = self.criterion(score, target)
 66 | 
 67 |         return loss
 68 | 
 69 |     def _ohem_forward(self, score, target, **kwargs):
 70 |         ph, pw = score.size(2), score.size(3)
 71 |         h, w = target.size(1), target.size(2)
 72 |         if ph != h or pw != w:
 73 |             score = F.interpolate(input=score, size=(
 74 |                 h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS)
 75 |         pred = F.softmax(score, dim=1)
 76 |         pixel_losses = self.criterion(score, target).contiguous().view(-1)
 77 |         mask = target.contiguous().view(-1) != self.ignore_label
 78 | 
 79 |         tmp_target = target.clone()
 80 |         tmp_target[tmp_target == self.ignore_label] = 0
 81 |         pred = pred.gather(1, tmp_target.unsqueeze(1))
 82 |         pred, ind = pred.contiguous().view(-1,)[mask].contiguous().sort()
 83 |         min_value = pred[min(self.min_kept, pred.numel() - 1)]
 84 |         threshold = max(min_value, self.thresh)
 85 | 
 86 |         pixel_losses = pixel_losses[mask][ind]
 87 |         pixel_losses = pixel_losses[pred < threshold]
 88 |         return pixel_losses.mean()
 89 | 
 90 |     def forward(self, score, target):
 91 | 
 92 |         if config.MODEL.NUM_OUTPUTS == 1:
 93 |             score = [score]
 94 | 
 95 |         weights = config.LOSS.BALANCE_WEIGHTS
 96 |         assert len(weights) == len(score)
 97 | 
 98 |         functions = [self._ce_forward] * \
 99 |             (len(weights) - 1) + [self._ohem_forward]
100 |         return sum([
101 |             w * func(x, target)
102 |             for (w, x, func) in zip(weights, score, functions)
103 |         ])
104 | 


--------------------------------------------------------------------------------
/lib/core/function.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | import logging
  8 | import os
  9 | import time
 10 | 
 11 | import numpy as np
 12 | import numpy.ma as ma
 13 | from tqdm import tqdm
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | from torch.nn import functional as F
 18 | 
 19 | from utils.utils import AverageMeter
 20 | from utils.utils import get_confusion_matrix
 21 | from utils.utils import adjust_learning_rate
 22 | 
 23 | import utils.distributed as dist
 24 | 
 25 | 
 26 | def reduce_tensor(inp):
 27 |     """
 28 |     Reduce the loss from all processes so that 
 29 |     process with rank 0 has the averaged results.
 30 |     """
 31 |     world_size = dist.get_world_size()
 32 |     if world_size < 2:
 33 |         return inp
 34 |     with torch.no_grad():
 35 |         reduced_inp = inp
 36 |         torch.distributed.reduce(reduced_inp, dst=0)
 37 |     return reduced_inp / world_size
 38 | 
 39 | 
 40 | def train(config, epoch, num_epoch, epoch_iters, base_lr,
 41 |           num_iters, trainloader, optimizer, model, writer_dict):
 42 |     # Training
 43 |     model.train()
 44 | 
 45 |     batch_time = AverageMeter()
 46 |     ave_loss = AverageMeter()
 47 |     tic = time.time()
 48 |     cur_iters = epoch*epoch_iters
 49 |     writer = writer_dict['writer']
 50 |     global_steps = writer_dict['train_global_steps']
 51 | 
 52 |     for i_iter, batch in enumerate(trainloader, 0):
 53 |         images, labels, _, _ = batch
 54 |         images = images.cuda()
 55 |         labels = labels.long().cuda()
 56 | 
 57 |         losses, _ = model(images, labels)
 58 |         loss = losses.mean()
 59 | 
 60 |         if dist.is_distributed():
 61 |             reduced_loss = reduce_tensor(loss)
 62 |         else:
 63 |             reduced_loss = loss
 64 | 
 65 |         model.zero_grad()
 66 |         loss.backward()
 67 |         optimizer.step()
 68 | 
 69 |         # measure elapsed time
 70 |         batch_time.update(time.time() - tic)
 71 |         tic = time.time()
 72 | 
 73 |         # update average loss
 74 |         ave_loss.update(reduced_loss.item())
 75 | 
 76 |         lr = adjust_learning_rate(optimizer,
 77 |                                   base_lr,
 78 |                                   num_iters,
 79 |                                   i_iter+cur_iters)
 80 | 
 81 |         if i_iter % config.PRINT_FREQ == 0 and dist.get_rank() == 0:
 82 |             msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \
 83 |                   'lr: {}, Loss: {:.6f}' .format(
 84 |                       epoch, num_epoch, i_iter, epoch_iters,
 85 |                       batch_time.average(), [x['lr'] for x in optimizer.param_groups], ave_loss.average())
 86 |             logging.info(msg)
 87 | 
 88 |     writer.add_scalar('train_loss', ave_loss.average(), global_steps)
 89 |     writer_dict['train_global_steps'] = global_steps + 1
 90 | 
 91 | def validate(config, testloader, model, writer_dict):
 92 |     model.eval()
 93 |     ave_loss = AverageMeter()
 94 |     nums = config.MODEL.NUM_OUTPUTS
 95 |     confusion_matrix = np.zeros(
 96 |         (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES, nums))
 97 |     with torch.no_grad():
 98 |         for idx, batch in enumerate(testloader):
 99 |             image, label, _, _ = batch
100 |             size = label.size()
101 |             image = image.cuda()
102 |             label = label.long().cuda()
103 | 
104 |             losses, pred = model(image, label)
105 |             if not isinstance(pred, (list, tuple)):
106 |                 pred = [pred]
107 |             for i, x in enumerate(pred):
108 |                 x = F.interpolate(
109 |                     input=x, size=size[-2:],
110 |                     mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
111 |                 )
112 | 
113 |                 confusion_matrix[..., i] += get_confusion_matrix(
114 |                     label,
115 |                     x,
116 |                     size,
117 |                     config.DATASET.NUM_CLASSES,
118 |                     config.TRAIN.IGNORE_LABEL
119 |                 )
120 | 
121 |             if idx % 10 == 0:
122 |                 print(idx)
123 | 
124 |             loss = losses.mean()
125 |             if dist.is_distributed():
126 |                 reduced_loss = reduce_tensor(loss)
127 |             else:
128 |                 reduced_loss = loss
129 |             ave_loss.update(reduced_loss.item())
130 | 
131 |     if dist.is_distributed():
132 |         confusion_matrix = torch.from_numpy(confusion_matrix).cuda()
133 |         reduced_confusion_matrix = reduce_tensor(confusion_matrix)
134 |         confusion_matrix = reduced_confusion_matrix.cpu().numpy()
135 | 
136 |     for i in range(nums):
137 |         pos = confusion_matrix[..., i].sum(1)
138 |         res = confusion_matrix[..., i].sum(0)
139 |         tp = np.diag(confusion_matrix[..., i])
140 |         IoU_array = (tp / np.maximum(1.0, pos + res - tp))
141 |         mean_IoU = IoU_array.mean()
142 |         if dist.get_rank() <= 0:
143 |             logging.info('{} {} {}'.format(i, IoU_array, mean_IoU))
144 | 
145 |     writer = writer_dict['writer']
146 |     global_steps = writer_dict['valid_global_steps']
147 |     writer.add_scalar('valid_loss', ave_loss.average(), global_steps)
148 |     writer.add_scalar('valid_mIoU', mean_IoU, global_steps)
149 |     writer_dict['valid_global_steps'] = global_steps + 1
150 |     return ave_loss.average(), mean_IoU, IoU_array
151 | 
152 | 
153 | def testval(config, test_dataset, testloader, model,
154 |             sv_dir='', sv_pred=False):
155 |     model.eval()
156 |     confusion_matrix = np.zeros(
157 |         (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES))
158 |     with torch.no_grad():
159 |         for index, batch in enumerate(tqdm(testloader)):
160 |             image, label, _, name, *border_padding = batch
161 |             size = label.size()
162 |             pred = test_dataset.multi_scale_inference(
163 |                 config,
164 |                 model,
165 |                 image,
166 |                 scales=config.TEST.SCALE_LIST,
167 |                 flip=config.TEST.FLIP_TEST)
168 | 
169 |             if len(border_padding) > 0:
170 |                 border_padding = border_padding[0]
171 |                 pred = pred[:, :, 0:pred.size(2) - border_padding[0], 0:pred.size(3) - border_padding[1]]
172 | 
173 |             if pred.size()[-2] != size[-2] or pred.size()[-1] != size[-1]:
174 |                 pred = F.interpolate(
175 |                     pred, size[-2:],
176 |                     mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
177 |                 )
178 | 
179 |             confusion_matrix += get_confusion_matrix(
180 |                 label,
181 |                 pred,
182 |                 size,
183 |                 config.DATASET.NUM_CLASSES,
184 |                 config.TRAIN.IGNORE_LABEL)
185 | 
186 |             if sv_pred:
187 |                 sv_path = os.path.join(sv_dir, 'test_results')
188 |                 if not os.path.exists(sv_path):
189 |                     os.mkdir(sv_path)
190 |                 test_dataset.save_pred(pred, sv_path, name)
191 | 
192 |             if index % 100 == 0:
193 |                 logging.info('processing: %d images' % index)
194 |                 pos = confusion_matrix.sum(1)
195 |                 res = confusion_matrix.sum(0)
196 |                 tp = np.diag(confusion_matrix)
197 |                 IoU_array = (tp / np.maximum(1.0, pos + res - tp))
198 |                 mean_IoU = IoU_array.mean()
199 |                 logging.info('mIoU: %.4f' % (mean_IoU))
200 | 
201 |     pos = confusion_matrix.sum(1)
202 |     res = confusion_matrix.sum(0)
203 |     tp = np.diag(confusion_matrix)
204 |     pixel_acc = tp.sum()/pos.sum()
205 |     mean_acc = (tp/np.maximum(1.0, pos)).mean()
206 |     IoU_array = (tp / np.maximum(1.0, pos + res - tp))
207 |     mean_IoU = IoU_array.mean()
208 | 
209 |     return mean_IoU, IoU_array, pixel_acc, mean_acc
210 | 
211 | 
212 | def test(config, test_dataset, testloader, model,
213 |          sv_dir='', sv_pred=True):
214 |     model.eval()
215 |     with torch.no_grad():
216 |         for _, batch in enumerate(tqdm(testloader)):
217 |             image, size, name = batch
218 |             size = size[0]
219 |             pred = test_dataset.multi_scale_inference(
220 |                 config,
221 |                 model,
222 |                 image,
223 |                 scales=config.TEST.SCALE_LIST,
224 |                 flip=config.TEST.FLIP_TEST)
225 | 
226 |             if pred.size()[-2] != size[0] or pred.size()[-1] != size[1]:
227 |                 pred = F.interpolate(
228 |                     pred, size[-2:],
229 |                     mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
230 |                 )
231 | 
232 |             if sv_pred:
233 |                 sv_path = os.path.join(sv_dir, 'test_results')
234 |                 if not os.path.exists(sv_path):
235 |                     os.mkdir(sv_path)
236 |                 test_dataset.save_pred(pred, sv_path, name)
237 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from .cityscapes import Cityscapes as cityscapes
12 | from .lip import LIP as lip
13 | from .pascal_ctx import PASCALContext as pascal_ctx
14 | from .ade20k import ADE20K as ade20k
15 | from .cocostuff import COCOStuff as cocostuff


--------------------------------------------------------------------------------
/lib/datasets/ade20k.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | import os
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | 
 12 | import torch
 13 | from torch.nn import functional as F
 14 | from PIL import Image
 15 | 
 16 | from .base_dataset import BaseDataset
 17 | 
 18 | 
 19 | class ADE20K(BaseDataset):
 20 |     def __init__(self,
 21 |                  root,
 22 |                  list_path,
 23 |                  num_samples=None,
 24 |                  num_classes=150,
 25 |                  multi_scale=True,
 26 |                  flip=True,
 27 |                  ignore_label=-1,
 28 |                  base_size=520,
 29 |                  crop_size=(520, 520),
 30 |                  downsample_rate=1,
 31 |                  scale_factor=11,
 32 |                  mean=[0.485, 0.456, 0.406],
 33 |                  std=[0.229, 0.224, 0.225]):
 34 | 
 35 |         super(ADE20K, self).__init__(ignore_label, base_size,
 36 |                                   crop_size, downsample_rate, scale_factor, mean, std)
 37 | 
 38 |         self.root = root
 39 |         self.num_classes = num_classes
 40 |         self.list_path = list_path
 41 |         self.class_weights = None
 42 | 
 43 |         self.multi_scale = multi_scale
 44 |         self.flip = flip
 45 |         self.img_list = [line.strip().split() for line in open(root+list_path)]
 46 | 
 47 |         self.files = self.read_files()
 48 |         if num_samples:
 49 |             self.files = self.files[:num_samples]
 50 | 
 51 |     def read_files(self):
 52 |         files = []
 53 |         for item in self.img_list:
 54 |             image_path, label_path = item
 55 |             name = os.path.splitext(os.path.basename(label_path))[0]
 56 |             sample = {
 57 |                 'img': image_path,
 58 |                 'label': label_path,
 59 |                 'name': name
 60 |             }
 61 |             files.append(sample)
 62 |         return files
 63 | 
 64 |     def resize_image(self, image, label, size):
 65 |         image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
 66 |         label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST)
 67 |         return image, label
 68 | 
 69 |     def __getitem__(self, index):
 70 |         item = self.files[index]
 71 |         name = item["name"]
 72 |         # image_path = os.path.join(self.root, 'ade20k', item['img'])
 73 |         # label_path = os.path.join(self.root, 'ade20k', item['label'])
 74 |         image_path = os.path.join(self.root, item['img'])
 75 |         label_path = os.path.join(self.root, item['label'])
 76 |         image = cv2.imread(
 77 |             image_path,
 78 |             cv2.IMREAD_COLOR
 79 |         )
 80 |         label = np.array(
 81 |             Image.open(label_path).convert('P')
 82 |         )
 83 |         label = self.reduce_zero_label(label)
 84 |         size = label.shape
 85 | 
 86 |         if 'testval' in self.list_path:
 87 |             image = self.resize_short_length(
 88 |                 image,
 89 |                 short_length=self.base_size,
 90 |                 fit_stride=8
 91 |             )
 92 |             image = self.input_transform(image)
 93 |             image = image.transpose((2, 0, 1))
 94 | 
 95 |             return image.copy(), label.copy(), np.array(size), name
 96 | 
 97 |         if 'val' in self.list_path:
 98 |             image, label = self.resize_short_length(
 99 |                 image,
100 |                 label=label,
101 |                 short_length=self.base_size,
102 |                 fit_stride=8
103 |             )
104 |             image, label = self.rand_crop(image, label)
105 |             image = self.input_transform(image)
106 |             image = image.transpose((2, 0, 1))
107 | 
108 |             return image.copy(), label.copy(), np.array(size), name
109 | 
110 |         image, label = self.resize_short_length(image, label, short_length=self.base_size)
111 |         image, label = self.gen_sample(image, label, self.multi_scale, self.flip)
112 | 
113 |         return image.copy(), label.copy(), np.array(size), name


--------------------------------------------------------------------------------
/lib/datasets/base_dataset.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | import os
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | import random
 12 | 
 13 | import torch
 14 | from torch.nn import functional as F
 15 | from torch.utils import data
 16 | 
 17 | from config import config
 18 | 
 19 | 
 20 | class BaseDataset(data.Dataset):
 21 |     def __init__(self,
 22 |                  ignore_label=-1,
 23 |                  base_size=2048,
 24 |                  crop_size=(512, 1024),
 25 |                  downsample_rate=1,
 26 |                  scale_factor=16,
 27 |                  mean=[0.485, 0.456, 0.406],
 28 |                  std=[0.229, 0.224, 0.225]):
 29 | 
 30 |         self.base_size = base_size
 31 |         self.crop_size = crop_size
 32 |         self.ignore_label = ignore_label
 33 | 
 34 |         self.mean = mean
 35 |         self.std = std
 36 |         self.scale_factor = scale_factor
 37 |         self.downsample_rate = 1./downsample_rate
 38 | 
 39 |         self.files = []
 40 | 
 41 |     def __len__(self):
 42 |         return len(self.files)
 43 | 
 44 |     def input_transform(self, image):
 45 |         image = image.astype(np.float32)[:, :, ::-1]
 46 |         image = image / 255.0
 47 |         image -= self.mean
 48 |         image /= self.std
 49 |         return image
 50 | 
 51 |     def label_transform(self, label):
 52 |         return np.array(label).astype('int32')
 53 | 
 54 |     def pad_image(self, image, h, w, size, padvalue):
 55 |         pad_image = image.copy()
 56 |         pad_h = max(size[0] - h, 0)
 57 |         pad_w = max(size[1] - w, 0)
 58 |         if pad_h > 0 or pad_w > 0:
 59 |             pad_image = cv2.copyMakeBorder(image, 0, pad_h, 0,
 60 |                                            pad_w, cv2.BORDER_CONSTANT,
 61 |                                            value=padvalue)
 62 | 
 63 |         return pad_image
 64 | 
 65 |     def rand_crop(self, image, label):
 66 |         h, w = image.shape[:-1]
 67 |         image = self.pad_image(image, h, w, self.crop_size,
 68 |                                (0.0, 0.0, 0.0))
 69 |         label = self.pad_image(label, h, w, self.crop_size,
 70 |                                (self.ignore_label,))
 71 | 
 72 |         new_h, new_w = label.shape
 73 |         x = random.randint(0, new_w - self.crop_size[1])
 74 |         y = random.randint(0, new_h - self.crop_size[0])
 75 |         image = image[y:y+self.crop_size[0], x:x+self.crop_size[1]]
 76 |         label = label[y:y+self.crop_size[0], x:x+self.crop_size[1]]
 77 | 
 78 |         return image, label
 79 | 
 80 |     def multi_scale_aug(self, image, label=None,
 81 |                         rand_scale=1, rand_crop=True):
 82 |         long_size = np.int(self.base_size * rand_scale + 0.5)
 83 |         h, w = image.shape[:2]
 84 |         if h > w:
 85 |             new_h = long_size
 86 |             new_w = np.int(w * long_size / h + 0.5)
 87 |         else:
 88 |             new_w = long_size
 89 |             new_h = np.int(h * long_size / w + 0.5)
 90 | 
 91 |         image = cv2.resize(image, (new_w, new_h),
 92 |                            interpolation=cv2.INTER_LINEAR)
 93 |         if label is not None:
 94 |             label = cv2.resize(label, (new_w, new_h),
 95 |                                interpolation=cv2.INTER_NEAREST)
 96 |         else:
 97 |             return image
 98 | 
 99 |         if rand_crop:
100 |             image, label = self.rand_crop(image, label)
101 | 
102 |         return image, label
103 | 
104 |     def resize_short_length(self, image, label=None, short_length=None, fit_stride=None, return_padding=False):
105 |         h, w = image.shape[:2]
106 |         if h < w:
107 |             new_h = short_length
108 |             new_w = np.int(w * short_length / h + 0.5)
109 |         else:
110 |             new_w = short_length
111 |             new_h = np.int(h * short_length / w + 0.5)        
112 |         image = cv2.resize(image, (new_w, new_h),
113 |                            interpolation=cv2.INTER_LINEAR)
114 |         pad_w, pad_h = 0, 0
115 |         if fit_stride is not None:
116 |             pad_w = 0 if (new_w % fit_stride == 0) else fit_stride - (new_w % fit_stride)
117 |             pad_h = 0 if (new_h % fit_stride == 0) else fit_stride - (new_h % fit_stride)
118 |             image = cv2.copyMakeBorder(
119 |                 image, 0, pad_h, 0, pad_w, 
120 |                 cv2.BORDER_CONSTANT, value=tuple(x * 255 for x in self.mean[::-1])
121 |             )
122 | 
123 |         if label is not None:
124 |             label = cv2.resize(
125 |                 label, (new_w, new_h),
126 |                 interpolation=cv2.INTER_NEAREST)
127 |             if pad_h > 0 or pad_w > 0:
128 |                 label = cv2.copyMakeBorder(
129 |                     label, 0, pad_h, 0, pad_w, 
130 |                     cv2.BORDER_CONSTANT, value=self.ignore_label
131 |                 )
132 |             if return_padding:
133 |                 return image, label, (pad_h, pad_w)
134 |             else:
135 |                 return image, label
136 |         else:
137 |             if return_padding:
138 |                 return image, (pad_h, pad_w)
139 |             else:
140 |                 return image  
141 | 
142 |     def random_brightness(self, img):
143 |         if not config.TRAIN.RANDOM_BRIGHTNESS:
144 |             return img
145 |         if random.random() < 0.5:
146 |             return img
147 |         self.shift_value = config.TRAIN.RANDOM_BRIGHTNESS_SHIFT_VALUE
148 |         img = img.astype(np.float32)
149 |         shift = random.randint(-self.shift_value, self.shift_value)
150 |         img[:, :, :] += shift
151 |         img = np.around(img)
152 |         img = np.clip(img, 0, 255).astype(np.uint8)
153 |         return img
154 | 
155 |     def gen_sample(self, image, label,
156 |                    multi_scale=True, is_flip=True):
157 |         if multi_scale:
158 |             rand_scale = 0.5 + random.randint(0, self.scale_factor) / 10.0
159 |             image, label = self.multi_scale_aug(image, label,
160 |                                                 rand_scale=rand_scale)
161 | 
162 |         image = self.random_brightness(image)
163 |         image = self.input_transform(image)
164 |         label = self.label_transform(label)
165 | 
166 |         image = image.transpose((2, 0, 1))
167 | 
168 |         if is_flip:
169 |             flip = np.random.choice(2) * 2 - 1
170 |             image = image[:, :, ::flip]
171 |             label = label[:, ::flip]
172 | 
173 |         if self.downsample_rate != 1:
174 |             label = cv2.resize(
175 |                 label,
176 |                 None,
177 |                 fx=self.downsample_rate,
178 |                 fy=self.downsample_rate,
179 |                 interpolation=cv2.INTER_NEAREST
180 |             )
181 | 
182 |         return image, label
183 | 
184 |     def reduce_zero_label(self, labelmap):
185 |         labelmap = np.array(labelmap)
186 |         encoded_labelmap = labelmap - 1
187 | 
188 |         return encoded_labelmap
189 | 
190 |     def inference(self, config, model, image, flip=False):
191 |         size = image.size()
192 |         pred = model(image)
193 | 
194 |         if config.MODEL.NUM_OUTPUTS > 1:
195 |             pred = pred[config.TEST.OUTPUT_INDEX]
196 | 
197 |         pred = F.interpolate(
198 |             input=pred, size=size[-2:],
199 |             mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
200 |         )
201 | 
202 |         if flip:
203 |             flip_img = image.numpy()[:, :, :, ::-1]
204 |             flip_output = model(torch.from_numpy(flip_img.copy()))
205 | 
206 |             if config.MODEL.NUM_OUTPUTS > 1:
207 |                 flip_output = flip_output[config.TEST.OUTPUT_INDEX]
208 | 
209 |             flip_output = F.interpolate(
210 |                 input=flip_output, size=size[-2:],
211 |                 mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
212 |             )
213 | 
214 |             flip_pred = flip_output.cpu().numpy().copy()
215 |             flip_pred = torch.from_numpy(
216 |                 flip_pred[:, :, :, ::-1].copy()).cuda()
217 |             pred += flip_pred
218 |             pred = pred * 0.5
219 |         return pred.exp()
220 | 
221 |     def multi_scale_inference(self, config, model, image, scales=[1], flip=False):
222 |         batch, _, ori_height, ori_width = image.size()
223 |         assert batch == 1, "only supporting batchsize 1."
224 |         image = image.numpy()[0].transpose((1, 2, 0)).copy()
225 |         stride_h = np.int(self.crop_size[0] * 2.0 / 3.0)
226 |         stride_w = np.int(self.crop_size[1] * 2.0 / 3.0)
227 |         final_pred = torch.zeros([1, self.num_classes,
228 |                                   ori_height, ori_width]).cuda()
229 |         padvalue = -1.0 * np.array(self.mean) / np.array(self.std)
230 |         for scale in scales:
231 |             new_img = self.multi_scale_aug(image=image,
232 |                                            rand_scale=scale,
233 |                                            rand_crop=False)
234 |             height, width = new_img.shape[:-1]
235 | 
236 |             if max(height, width) <= np.min(self.crop_size):
237 |                 new_img = self.pad_image(new_img, height, width,
238 |                                          self.crop_size, padvalue)
239 |                 new_img = new_img.transpose((2, 0, 1))
240 |                 new_img = np.expand_dims(new_img, axis=0)
241 |                 new_img = torch.from_numpy(new_img)
242 |                 preds = self.inference(config, model, new_img, flip)
243 |                 preds = preds[:, :, 0:height, 0:width]
244 |             else:
245 |                 if height < self.crop_size[0] or width < self.crop_size[1]:
246 |                     new_img = self.pad_image(new_img, height, width,
247 |                                              self.crop_size, padvalue)
248 |                 new_h, new_w = new_img.shape[:-1]
249 |                 rows = np.int(np.ceil(1.0 * (new_h -
250 |                                              self.crop_size[0]) / stride_h)) + 1
251 |                 cols = np.int(np.ceil(1.0 * (new_w -
252 |                                              self.crop_size[1]) / stride_w)) + 1
253 |                 preds = torch.zeros([1, self.num_classes,
254 |                                      new_h, new_w]).cuda()
255 |                 count = torch.zeros([1, 1, new_h, new_w]).cuda()
256 | 
257 |                 for r in range(rows):
258 |                     for c in range(cols):
259 |                         h0 = r * stride_h
260 |                         w0 = c * stride_w
261 |                         h1 = min(h0 + self.crop_size[0], new_h)
262 |                         w1 = min(w0 + self.crop_size[1], new_w)
263 |                         crop_img = new_img[h0:h1, w0:w1, :]
264 |                         if h1 == new_h or w1 == new_w:
265 |                             crop_img = self.pad_image(crop_img,
266 |                                                       h1-h0,
267 |                                                       w1-w0,
268 |                                                       self.crop_size,
269 |                                                       padvalue)
270 |                         crop_img = crop_img.transpose((2, 0, 1))
271 |                         crop_img = np.expand_dims(crop_img, axis=0)
272 |                         crop_img = torch.from_numpy(crop_img)
273 |                         pred = self.inference(config, model, crop_img, flip)
274 |                         preds[:, :, h0:h1, w0:w1] += pred[:, :, 0:h1-h0, 0:w1-w0]
275 |                         count[:, :, h0:h1, w0:w1] += 1
276 |                 preds = preds / count
277 |                 preds = preds[:, :, :height, :width]
278 | 
279 |             preds = F.interpolate(
280 |                 preds, (ori_height, ori_width),
281 |                 mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
282 |             )
283 |             final_pred += preds
284 |         return final_pred
285 | 


--------------------------------------------------------------------------------
/lib/datasets/cityscapes.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | import os
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | from PIL import Image
 12 | 
 13 | import torch
 14 | from torch.nn import functional as F
 15 | 
 16 | from .base_dataset import BaseDataset
 17 | 
 18 | class Cityscapes(BaseDataset):
 19 |     def __init__(self, 
 20 |                  root, 
 21 |                  list_path, 
 22 |                  num_samples=None, 
 23 |                  num_classes=19,
 24 |                  multi_scale=True, 
 25 |                  flip=True, 
 26 |                  ignore_label=-1, 
 27 |                  base_size=2048, 
 28 |                  crop_size=(512, 1024), 
 29 |                  downsample_rate=1,
 30 |                  scale_factor=16,
 31 |                  mean=[0.485, 0.456, 0.406], 
 32 |                  std=[0.229, 0.224, 0.225]):
 33 | 
 34 |         super(Cityscapes, self).__init__(ignore_label, base_size,
 35 |                 crop_size, downsample_rate, scale_factor, mean, std,)
 36 | 
 37 |         self.root = root
 38 |         self.list_path = list_path
 39 |         self.num_classes = num_classes
 40 | 
 41 |         self.multi_scale = multi_scale
 42 |         self.flip = flip
 43 |         
 44 |         self.img_list = [line.strip().split() for line in open(root+list_path)]
 45 | 
 46 |         self.files = self.read_files()
 47 |         if num_samples:
 48 |             self.files = self.files[:num_samples]
 49 | 
 50 |         self.label_mapping = {-1: ignore_label, 0: ignore_label, 
 51 |                               1: ignore_label, 2: ignore_label, 
 52 |                               3: ignore_label, 4: ignore_label, 
 53 |                               5: ignore_label, 6: ignore_label, 
 54 |                               7: 0, 8: 1, 9: ignore_label, 
 55 |                               10: ignore_label, 11: 2, 12: 3, 
 56 |                               13: 4, 14: ignore_label, 15: ignore_label, 
 57 |                               16: ignore_label, 17: 5, 18: ignore_label, 
 58 |                               19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11,
 59 |                               25: 12, 26: 13, 27: 14, 28: 15, 
 60 |                               29: ignore_label, 30: ignore_label, 
 61 |                               31: 16, 32: 17, 33: 18}
 62 |         self.class_weights = torch.FloatTensor([0.8373, 0.918, 0.866, 1.0345, 
 63 |                                         1.0166, 0.9969, 0.9754, 1.0489,
 64 |                                         0.8786, 1.0023, 0.9539, 0.9843, 
 65 |                                         1.1116, 0.9037, 1.0865, 1.0955, 
 66 |                                         1.0865, 1.1529, 1.0507]).cuda()
 67 |     
 68 |     def read_files(self):
 69 |         files = []
 70 |         if 'test' in self.list_path:
 71 |             for item in self.img_list:
 72 |                 image_path = item
 73 |                 name = os.path.splitext(os.path.basename(image_path[0]))[0]
 74 |                 files.append({
 75 |                     "img": image_path[0],
 76 |                     "name": name,
 77 |                 })
 78 |         else:
 79 |             for item in self.img_list:
 80 |                 image_path, label_path = item
 81 |                 name = os.path.splitext(os.path.basename(label_path))[0]
 82 |                 files.append({
 83 |                     "img": image_path,
 84 |                     "label": label_path,
 85 |                     "name": name,
 86 |                     "weight": 1
 87 |                 })
 88 |         return files
 89 |         
 90 |     def convert_label(self, label, inverse=False):
 91 |         temp = label.copy()
 92 |         if inverse:
 93 |             for v, k in self.label_mapping.items():
 94 |                 label[temp == k] = v
 95 |         else:
 96 |             for k, v in self.label_mapping.items():
 97 |                 label[temp == k] = v
 98 |         return label
 99 | 
100 |     def __getitem__(self, index):
101 |         item = self.files[index]
102 |         name = item["name"]
103 |         # image = cv2.imread(os.path.join(self.root,'cityscapes',item["img"]),
104 |         #                    cv2.IMREAD_COLOR)
105 |         image = cv2.imread(os.path.join(self.root, item["img"]),
106 |                            cv2.IMREAD_COLOR)
107 |         size = image.shape
108 | 
109 |         if 'test' in self.list_path:
110 |             image = self.input_transform(image)
111 |             image = image.transpose((2, 0, 1))
112 | 
113 |             return image.copy(), np.array(size), name
114 | 
115 |         # label = cv2.imread(os.path.join(self.root,'cityscapes',item["label"]),
116 |         #                    cv2.IMREAD_GRAYSCALE)
117 |         label = cv2.imread(os.path.join(self.root, item["label"]),
118 |                            cv2.IMREAD_GRAYSCALE)
119 |         label = self.convert_label(label)
120 | 
121 |         image, label = self.gen_sample(image, label, 
122 |                                 self.multi_scale, self.flip)
123 | 
124 |         return image.copy(), label.copy(), np.array(size), name
125 | 
126 |     def multi_scale_inference(self, config, model, image, scales=[1], flip=False):
127 |         batch, _, ori_height, ori_width = image.size()
128 |         assert batch == 1, "only supporting batchsize 1."
129 |         image = image.numpy()[0].transpose((1,2,0)).copy()
130 |         stride_h = np.int(self.crop_size[0] * 1.0)
131 |         stride_w = np.int(self.crop_size[1] * 1.0)
132 |         final_pred = torch.zeros([1, self.num_classes,
133 |                                     ori_height,ori_width]).cuda()
134 |         for scale in scales:
135 |             new_img = self.multi_scale_aug(image=image,
136 |                                            rand_scale=scale,
137 |                                            rand_crop=False)
138 |             height, width = new_img.shape[:-1]
139 |                 
140 |             if scale <= 1.0:
141 |                 new_img = new_img.transpose((2, 0, 1))
142 |                 new_img = np.expand_dims(new_img, axis=0)
143 |                 new_img = torch.from_numpy(new_img)
144 |                 preds = self.inference(config, model, new_img, flip)
145 |                 preds = preds[:, :, 0:height, 0:width]
146 |             else:
147 |                 new_h, new_w = new_img.shape[:-1]
148 |                 rows = np.int(np.ceil(1.0 * (new_h - 
149 |                                 self.crop_size[0]) / stride_h)) + 1
150 |                 cols = np.int(np.ceil(1.0 * (new_w - 
151 |                                 self.crop_size[1]) / stride_w)) + 1
152 |                 preds = torch.zeros([1, self.num_classes,
153 |                                            new_h,new_w]).cuda()
154 |                 count = torch.zeros([1,1, new_h, new_w]).cuda()
155 | 
156 |                 for r in range(rows):
157 |                     for c in range(cols):
158 |                         h0 = r * stride_h
159 |                         w0 = c * stride_w
160 |                         h1 = min(h0 + self.crop_size[0], new_h)
161 |                         w1 = min(w0 + self.crop_size[1], new_w)
162 |                         h0 = max(int(h1 - self.crop_size[0]), 0)
163 |                         w0 = max(int(w1 - self.crop_size[1]), 0)
164 |                         crop_img = new_img[h0:h1, w0:w1, :]
165 |                         crop_img = crop_img.transpose((2, 0, 1))
166 |                         crop_img = np.expand_dims(crop_img, axis=0)
167 |                         crop_img = torch.from_numpy(crop_img)
168 |                         pred = self.inference(config, model, crop_img, flip)
169 |                         preds[:,:,h0:h1,w0:w1] += pred[:,:, 0:h1-h0, 0:w1-w0]
170 |                         count[:,:,h0:h1,w0:w1] += 1
171 |                 preds = preds / count
172 |                 preds = preds[:,:,:height,:width]
173 | 
174 |             preds = F.interpolate(
175 |                 preds, (ori_height, ori_width), 
176 |                 mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
177 |             )            
178 |             final_pred += preds
179 |         return final_pred
180 | 
181 |     def get_palette(self, n):
182 |         palette = [0] * (n * 3)
183 |         for j in range(0, n):
184 |             lab = j
185 |             palette[j * 3 + 0] = 0
186 |             palette[j * 3 + 1] = 0
187 |             palette[j * 3 + 2] = 0
188 |             i = 0
189 |             while lab:
190 |                 palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
191 |                 palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
192 |                 palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
193 |                 i += 1
194 |                 lab >>= 3
195 |         return palette
196 | 
197 |     def save_pred(self, preds, sv_path, name):
198 |         palette = self.get_palette(256)
199 |         preds = np.asarray(np.argmax(preds.cpu(), axis=1), dtype=np.uint8)
200 |         for i in range(preds.shape[0]):
201 |             pred = self.convert_label(preds[i], inverse=True)
202 |             save_img = Image.fromarray(pred)
203 |             save_img.putpalette(palette)
204 |             save_img.save(os.path.join(sv_path, name[i]+'.png'))
205 | 
206 |         
207 |         
208 | 


--------------------------------------------------------------------------------
/lib/datasets/cocostuff.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | import os
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | 
 12 | import torch
 13 | from torch.nn import functional as F
 14 | from PIL import Image
 15 | 
 16 | from .base_dataset import BaseDataset
 17 | 
 18 | 
 19 | class COCOStuff(BaseDataset):
 20 |     def __init__(self,
 21 |                  root,
 22 |                  list_path,
 23 |                  num_samples=None,
 24 |                  num_classes=171,
 25 |                  multi_scale=True,
 26 |                  flip=True,
 27 |                  ignore_label=-1,
 28 |                  base_size=520,
 29 |                  crop_size=(520, 520),
 30 |                  downsample_rate=1,
 31 |                  scale_factor=11,
 32 |                  mean=[0.485, 0.456, 0.406],
 33 |                  std=[0.229, 0.224, 0.225]):
 34 | 
 35 |         super(COCOStuff, self).__init__(ignore_label, base_size,
 36 |                                   crop_size, downsample_rate, scale_factor, mean, std)
 37 | 
 38 |         self.root = root
 39 |         self.num_classes = num_classes
 40 |         self.list_path = list_path
 41 |         self.class_weights = None
 42 | 
 43 |         self.multi_scale = multi_scale
 44 |         self.flip = flip
 45 |         self.crop_size = crop_size
 46 |         self.img_list = [line.strip().split() for line in open(root+list_path)]
 47 | 
 48 |         self.files = self.read_files()
 49 |         if num_samples:
 50 |             self.files = self.files[:num_samples]
 51 |         self.mapping = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 
 52 |                     21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 
 53 |                     40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 
 54 |                     59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 
 55 |                     78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 
 56 |                     97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 
 57 |                     113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 
 58 |                     129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
 59 |                     145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 
 60 |                     161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 
 61 |                     177, 178, 179, 180, 181, 182]
 62 | 
 63 |     def read_files(self):
 64 |         files = []
 65 |         for item in self.img_list:
 66 |             image_path, label_path = item
 67 |             name = os.path.splitext(os.path.basename(label_path))[0]
 68 |             sample = {
 69 |                 'img': image_path,
 70 |                 'label': label_path,
 71 |                 'name': name
 72 |             }
 73 |             files.append(sample)
 74 |         return files
 75 | 
 76 |     def encode_label(self, labelmap):
 77 |         ret = np.ones_like(labelmap) * 255
 78 |         for idx, label in enumerate(self.mapping):
 79 |             ret[labelmap == label] = idx
 80 | 
 81 |         return ret
 82 | 
 83 |     def resize_image(self, image, label, size):
 84 |         image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
 85 |         label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST)
 86 |         return image, label
 87 | 
 88 |     def __getitem__(self, index):
 89 |         item = self.files[index]
 90 |         name = item["name"]
 91 |         image_path = os.path.join(self.root, item['img'])
 92 |         label_path = os.path.join(self.root, item['label'])
 93 |         image = cv2.imread(
 94 |             image_path,
 95 |             cv2.IMREAD_COLOR
 96 |         )
 97 |         label = np.array(
 98 |             Image.open(label_path).convert('P')
 99 |         )
100 |         label = self.encode_label(label)
101 |         label = self.reduce_zero_label(label)
102 |         size = label.shape
103 | 
104 |         if 'testval' in self.list_path:
105 |             image, border_padding = self.resize_short_length(
106 |                 image,
107 |                 short_length=self.base_size,
108 |                 fit_stride=8,
109 |                 return_padding=True
110 |             )
111 |             image = self.input_transform(image)
112 |             image = image.transpose((2, 0, 1))
113 | 
114 |             return image.copy(), label.copy(), np.array(size), name, border_padding
115 | 
116 |         if 'val' in self.list_path:
117 |             image, label = self.resize_short_length(
118 |                 image,
119 |                 label=label,
120 |                 short_length=self.base_size,
121 |                 fit_stride=8
122 |             )
123 |             image, label = self.rand_crop(image, label)
124 |             image = self.input_transform(image)
125 |             image = image.transpose((2, 0, 1))
126 | 
127 |             return image.copy(), label.copy(), np.array(size), name
128 | 
129 |         image, label = self.resize_short_length(image, label, short_length=self.base_size)
130 |         image, label = self.gen_sample(image, label, self.multi_scale, self.flip)
131 | 
132 |         return image.copy(), label.copy(), np.array(size), name


--------------------------------------------------------------------------------
/lib/datasets/lip.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | import os
  8 | 
  9 | import cv2
 10 | import numpy as np
 11 | 
 12 | import torch
 13 | from torch.nn import functional as F
 14 | from PIL import Image
 15 | 
 16 | from .base_dataset import BaseDataset
 17 | 
 18 | 
 19 | class LIP(BaseDataset):
 20 |     def __init__(self,
 21 |                  root,
 22 |                  list_path,
 23 |                  num_samples=None,
 24 |                  num_classes=20,
 25 |                  multi_scale=True,
 26 |                  flip=True,
 27 |                  ignore_label=-1,
 28 |                  base_size=473,
 29 |                  crop_size=(473, 473),
 30 |                  downsample_rate=1,
 31 |                  scale_factor=11,
 32 |                  mean=[0.485, 0.456, 0.406],
 33 |                  std=[0.229, 0.224, 0.225]):
 34 | 
 35 |         super(LIP, self).__init__(ignore_label, base_size,
 36 |                                   crop_size, downsample_rate, scale_factor, mean, std)
 37 | 
 38 |         self.root = root
 39 |         self.num_classes = num_classes
 40 |         self.list_path = list_path
 41 |         self.class_weights = None
 42 | 
 43 |         self.multi_scale = multi_scale
 44 |         self.flip = flip
 45 |         self.img_list = [line.strip().split() for line in open(root+list_path)]
 46 | 
 47 |         self.files = self.read_files()
 48 |         if num_samples:
 49 |             self.files = self.files[:num_samples]
 50 | 
 51 |     def read_files(self):
 52 |         files = []
 53 |         for item in self.img_list:
 54 |             if 'train' in self.list_path:
 55 |                 image_path, label_path, _ = item
 56 |                 name = os.path.splitext(os.path.basename(label_path))[0]
 57 |                 sample = {"img": image_path,
 58 |                           "label": label_path,
 59 |                           "name": name, }
 60 |             elif 'val' in self.list_path:
 61 |                 image_path, label_path = item
 62 |                 name = os.path.splitext(os.path.basename(label_path))[0]
 63 |                 sample = {"img": image_path,
 64 |                           "label": label_path,
 65 |                           "name": name, }
 66 |             else:
 67 |                 raise NotImplementedError('Unknown subset.')
 68 |             files.append(sample)
 69 |         return files
 70 | 
 71 |     def resize_image(self, image, label, size):
 72 |         image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
 73 |         label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST)
 74 |         return image, label
 75 | 
 76 |     def __getitem__(self, index):
 77 |         item = self.files[index]
 78 |         name = item["name"]
 79 |         image_path = os.path.join(self.root, item['img'])
 80 |         label_path = os.path.join(self.root, item['label'])
 81 |         image = cv2.imread(
 82 |             image_path,
 83 |             cv2.IMREAD_COLOR
 84 |         )
 85 |         label = np.array(
 86 |             Image.open(label_path).convert('P')
 87 |         )
 88 | 
 89 |         size = label.shape
 90 |         if 'testval' in self.list_path:
 91 |             image = cv2.resize(image, self.crop_size,
 92 |                                interpolation=cv2.INTER_LINEAR)
 93 |             image = self.input_transform(image)
 94 |             image = image.transpose((2, 0, 1))
 95 | 
 96 |             return image.copy(), label.copy(), np.array(size), name
 97 | 
 98 |         if self.flip:
 99 |             flip = np.random.choice(2) * 2 - 1
100 |             image = image[:, ::flip, :]
101 |             label = label[:, ::flip]
102 | 
103 |             if flip == -1:
104 |                 right_idx = [15, 17, 19]
105 |                 left_idx = [14, 16, 18]
106 |                 for i in range(0, 3):
107 |                     right_pos = np.where(label == right_idx[i])
108 |                     left_pos = np.where(label == left_idx[i])
109 |                     label[right_pos[0], right_pos[1]] = left_idx[i]
110 |                     label[left_pos[0], left_pos[1]] = right_idx[i]
111 | 
112 |         image, label = self.resize_image(image, label, self.crop_size)
113 |         image, label = self.gen_sample(image, label,
114 |                                        self.multi_scale, False)
115 | 
116 |         return image.copy(), label.copy(), np.array(size), name
117 | 
118 |     def inference(self, config, model, image, flip):
119 |         size = image.size()
120 |         pred = model(image)
121 |         if config.MODEL.NUM_OUTPUTS > 1:
122 |             pred = pred[config.TEST.OUTPUT_INDEX]
123 | 
124 |         pred = F.interpolate(
125 |             input=pred, size=size[-2:],
126 |             mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
127 |         )
128 | 
129 |         if flip:
130 |             flip_img = image.numpy()[:, :, :, ::-1]
131 |             flip_output = model(torch.from_numpy(flip_img.copy()))
132 | 
133 |             if config.MODEL.NUM_OUTPUTS > 1:
134 |                 flip_output = flip_output[config.TEST.OUTPUT_INDEX]            
135 | 
136 |             flip_output = F.interpolate(
137 |                 input=flip_output, size=size[-2:],
138 |                 mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
139 |             )
140 | 
141 |             flip_output = flip_output.cpu()
142 |             flip_pred = flip_output.cpu().numpy().copy()
143 |             flip_pred[:, 14, :, :] = flip_output[:, 15, :, :]
144 |             flip_pred[:, 15, :, :] = flip_output[:, 14, :, :]
145 |             flip_pred[:, 16, :, :] = flip_output[:, 17, :, :]
146 |             flip_pred[:, 17, :, :] = flip_output[:, 16, :, :]
147 |             flip_pred[:, 18, :, :] = flip_output[:, 19, :, :]
148 |             flip_pred[:, 19, :, :] = flip_output[:, 18, :, :]
149 |             flip_pred = torch.from_numpy(
150 |                 flip_pred[:, :, :, ::-1].copy()).cuda()
151 |             pred += flip_pred
152 |             pred = pred * 0.5
153 |         return pred.exp()
154 | 


--------------------------------------------------------------------------------
/lib/datasets/pascal_ctx.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # Referring to the implementation in 
  6 | # https://github.com/zhanghang1989/PyTorch-Encoding
  7 | # ------------------------------------------------------------------------------
  8 | 
  9 | import os
 10 | 
 11 | import cv2
 12 | import numpy as np
 13 | 
 14 | import torch
 15 | from torch.nn import functional as F
 16 | from PIL import Image
 17 | 
 18 | from .base_dataset import BaseDataset
 19 | 
 20 | class PASCALContext(BaseDataset):
 21 |     def __init__(self,
 22 |                  root,
 23 |                  list_path,
 24 |                  num_samples=None,
 25 |                  num_classes=59,
 26 |                  multi_scale=True,
 27 |                  flip=True,
 28 |                  ignore_label=-1,
 29 |                  base_size=520,
 30 |                  crop_size=(480, 480),
 31 |                  downsample_rate=1,
 32 |                  scale_factor=16,
 33 |                  mean=[0.485, 0.456, 0.406],
 34 |                  std=[0.229, 0.224, 0.225]):
 35 | 
 36 |         super(PASCALContext, self).__init__(ignore_label, base_size,
 37 |                 crop_size, downsample_rate, scale_factor, mean, std)
 38 | 
 39 |         self.root = root
 40 |         self.num_classes = num_classes
 41 |         self.list_path = list_path
 42 |         self.class_weights = None
 43 | 
 44 |         self.multi_scale = multi_scale
 45 |         self.flip = flip
 46 |         self.crop_size = crop_size
 47 |         self.img_list = [line.strip().split() for line in open(root+list_path)]
 48 | 
 49 |         self.files = self.read_files()
 50 |         if num_samples:
 51 |             self.files = self.files[:num_samples]
 52 | 
 53 |     def read_files(self):
 54 |         files = []
 55 |         for item in self.img_list:
 56 |             image_path, label_path = item
 57 |             name = os.path.splitext(os.path.basename(label_path))[0]
 58 |             sample = {
 59 |                 'img': image_path,
 60 |                 'label': label_path,
 61 |                 'name': name
 62 |             }
 63 |             files.append(sample)
 64 |         return files
 65 | 
 66 |     def resize_image(self, image, label, size):
 67 |         image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
 68 |         label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST)
 69 |         return image, label
 70 | 
 71 |     def __getitem__(self, index):
 72 |         item = self.files[index]
 73 |         name = item["name"]
 74 |         image_path = os.path.join(self.root, item['img'])
 75 |         label_path = os.path.join(self.root, item['label'])
 76 |         image = cv2.imread(
 77 |             image_path,
 78 |             cv2.IMREAD_COLOR
 79 |         )
 80 |         label = np.array(
 81 |             Image.open(label_path).convert('P')
 82 |         )
 83 |         if self.num_classes == 59:
 84 |             label = self.reduce_zero_label(label)
 85 |         size = label.shape
 86 | 
 87 |         if 'testval' in self.list_path:
 88 |             image, border_padding = self.resize_short_length(
 89 |                 image,
 90 |                 short_length=self.base_size,
 91 |                 fit_stride=8,
 92 |                 return_padding=True
 93 |             )
 94 |             image = self.input_transform(image)
 95 |             image = image.transpose((2, 0, 1))
 96 | 
 97 |             return image.copy(), label.copy(), np.array(size), name, border_padding
 98 | 
 99 |         if 'val' in self.list_path:
100 |             image, label = self.resize_short_length(
101 |                 image,
102 |                 label=label,
103 |                 short_length=self.base_size,
104 |                 fit_stride=8
105 |             )
106 |             image, label = self.rand_crop(image, label)
107 |             image = self.input_transform(image)
108 |             image = image.transpose((2, 0, 1))
109 | 
110 |             return image.copy(), label.copy(), np.array(size), name
111 | 
112 |         image, label = self.resize_short_length(image, label, short_length=self.base_size)
113 |         image, label = self.gen_sample(image, label, self.multi_scale, self.flip)
114 | 
115 |         return image.copy(), label.copy(), np.array(size), name


--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import models.seg_hrnet
12 | import models.seg_hrnet_ocr


--------------------------------------------------------------------------------
/lib/models/bn_helper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import functools
 3 | 
 4 | if torch.__version__.startswith('0'):
 5 |     from .sync_bn.inplace_abn.bn import InPlaceABNSync
 6 |     BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
 7 |     BatchNorm2d_class = InPlaceABNSync
 8 |     relu_inplace = False
 9 | else:
10 |     BatchNorm2d_class = BatchNorm2d = torch.nn.SyncBatchNorm
11 |     relu_inplace = True


--------------------------------------------------------------------------------
/lib/models/sync_bn/LICENSE:
--------------------------------------------------------------------------------
 1 |     
 2 | BSD 3-Clause License
 3 | 
 4 | Copyright (c) 2017, mapillary
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 | * Redistributions of source code must retain the above copyright notice, this
11 |   list of conditions and the following disclaimer.
12 | 
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 |   this list of conditions and the following disclaimer in the documentation
15 |   and/or other materials provided with the distribution.
16 | 
17 | * Neither the name of the copyright holder nor the names of its
18 |   contributors may be used to endorse or promote products derived from
19 |   this software without specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/lib/models/sync_bn/__init__.py:
--------------------------------------------------------------------------------
1 | from .inplace_abn import bn


--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/__init__.py:
--------------------------------------------------------------------------------
1 | from .bn import ABN, InPlaceABN, InPlaceABNSync
2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
3 | 


--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/bn.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as functional
  5 | 
  6 | try:
  7 |     from queue import Queue
  8 | except ImportError:
  9 |     from Queue import Queue
 10 | 
 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 12 | sys.path.append(BASE_DIR)
 13 | sys.path.append(os.path.join(BASE_DIR, '../src'))
 14 | from functions import *
 15 | 
 16 | 
 17 | class ABN(nn.Module):
 18 |     """Activated Batch Normalization
 19 | 
 20 |     This gathers a `BatchNorm2d` and an activation function in a single module
 21 |     """
 22 | 
 23 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
 24 |         """Creates an Activated Batch Normalization module
 25 | 
 26 |         Parameters
 27 |         ----------
 28 |         num_features : int
 29 |             Number of feature channels in the input and output.
 30 |         eps : float
 31 |             Small constant to prevent numerical issues.
 32 |         momentum : float
 33 |             Momentum factor applied to compute running statistics as.
 34 |         affine : bool
 35 |             If `True` apply learned scale and shift transformation after normalization.
 36 |         activation : str
 37 |             Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
 38 |         slope : float
 39 |             Negative slope for the `leaky_relu` activation.
 40 |         """
 41 |         super(ABN, self).__init__()
 42 |         self.num_features = num_features
 43 |         self.affine = affine
 44 |         self.eps = eps
 45 |         self.momentum = momentum
 46 |         self.activation = activation
 47 |         self.slope = slope
 48 |         if self.affine:
 49 |             self.weight = nn.Parameter(torch.ones(num_features))
 50 |             self.bias = nn.Parameter(torch.zeros(num_features))
 51 |         else:
 52 |             self.register_parameter('weight', None)
 53 |             self.register_parameter('bias', None)
 54 |         self.register_buffer('running_mean', torch.zeros(num_features))
 55 |         self.register_buffer('running_var', torch.ones(num_features))
 56 |         self.reset_parameters()
 57 | 
 58 |     def reset_parameters(self):
 59 |         nn.init.constant_(self.running_mean, 0)
 60 |         nn.init.constant_(self.running_var, 1)
 61 |         if self.affine:
 62 |             nn.init.constant_(self.weight, 1)
 63 |             nn.init.constant_(self.bias, 0)
 64 | 
 65 |     def forward(self, x):
 66 |         x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
 67 |                                   self.training, self.momentum, self.eps)
 68 | 
 69 |         if self.activation == ACT_RELU:
 70 |             return functional.relu(x, inplace=True)
 71 |         elif self.activation == ACT_LEAKY_RELU:
 72 |             return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
 73 |         elif self.activation == ACT_ELU:
 74 |             return functional.elu(x, inplace=True)
 75 |         else:
 76 |             return x
 77 | 
 78 |     def __repr__(self):
 79 |         rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
 80 |               ' affine={affine}, activation={activation}'
 81 |         if self.activation == "leaky_relu":
 82 |             rep += ', slope={slope})'
 83 |         else:
 84 |             rep += ')'
 85 |         return rep.format(name=self.__class__.__name__, **self.__dict__)
 86 | 
 87 | 
 88 | class InPlaceABN(ABN):
 89 |     """InPlace Activated Batch Normalization"""
 90 | 
 91 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
 92 |         """Creates an InPlace Activated Batch Normalization module
 93 | 
 94 |         Parameters
 95 |         ----------
 96 |         num_features : int
 97 |             Number of feature channels in the input and output.
 98 |         eps : float
 99 |             Small constant to prevent numerical issues.
100 |         momentum : float
101 |             Momentum factor applied to compute running statistics as.
102 |         affine : bool
103 |             If `True` apply learned scale and shift transformation after normalization.
104 |         activation : str
105 |             Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
106 |         slope : float
107 |             Negative slope for the `leaky_relu` activation.
108 |         """
109 |         super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
110 | 
111 |     def forward(self, x):
112 |         return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
113 |                            self.training, self.momentum, self.eps, self.activation, self.slope)
114 | 
115 | 
116 | class InPlaceABNSync(ABN):
117 |     """InPlace Activated Batch Normalization with cross-GPU synchronization
118 | 
119 |     This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DataParallel`.
120 |     """
121 | 
122 |     def __init__(self, num_features, devices=None, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu",
123 |                  slope=0.01):
124 |         """Creates a synchronized, InPlace Activated Batch Normalization module
125 | 
126 |         Parameters
127 |         ----------
128 |         num_features : int
129 |             Number of feature channels in the input and output.
130 |         devices : list of int or None
131 |             IDs of the GPUs that will run the replicas of this module.
132 |         eps : float
133 |             Small constant to prevent numerical issues.
134 |         momentum : float
135 |             Momentum factor applied to compute running statistics as.
136 |         affine : bool
137 |             If `True` apply learned scale and shift transformation after normalization.
138 |         activation : str
139 |             Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
140 |         slope : float
141 |             Negative slope for the `leaky_relu` activation.
142 |         """
143 |         super(InPlaceABNSync, self).__init__(num_features, eps, momentum, affine, activation, slope)
144 |         self.devices = devices if devices else list(range(torch.cuda.device_count()))
145 | 
146 |         # Initialize queues
147 |         self.worker_ids = self.devices[1:]
148 |         self.master_queue = Queue(len(self.worker_ids))
149 |         self.worker_queues = [Queue(1) for _ in self.worker_ids]
150 | 
151 |     def forward(self, x):
152 |         if x.get_device() == self.devices[0]:
153 |             # Master mode
154 |             extra = {
155 |                 "is_master": True,
156 |                 "master_queue": self.master_queue,
157 |                 "worker_queues": self.worker_queues,
158 |                 "worker_ids": self.worker_ids
159 |             }
160 |         else:
161 |             # Worker mode
162 |             extra = {
163 |                 "is_master": False,
164 |                 "master_queue": self.master_queue,
165 |                 "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())]
166 |             }
167 | 
168 |         return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
169 |                                 extra, self.training, self.momentum, self.eps, self.activation, self.slope)
170 | 
171 |     def __repr__(self):
172 |         rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
173 |               ' affine={affine}, devices={devices}, activation={activation}'
174 |         if self.activation == "leaky_relu":
175 |             rep += ', slope={slope})'
176 |         else:
177 |             rep += ')'
178 |         return rep.format(name=self.__class__.__name__, **self.__dict__)
179 | 


--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/functions.py:
--------------------------------------------------------------------------------
  1 | from os import path
  2 | 
  3 | import torch.autograd as autograd
  4 | import torch.cuda.comm as comm
  5 | from torch.autograd.function import once_differentiable
  6 | from torch.utils.cpp_extension import load
  7 | 
  8 | _src_path = path.join(path.dirname(path.abspath(__file__)), "src")
  9 | _backend = load(name="inplace_abn",
 10 |                 extra_cflags=["-O3"],
 11 |                 sources=[path.join(_src_path, f) for f in [
 12 |                     "inplace_abn.cpp",
 13 |                     "inplace_abn_cpu.cpp",
 14 |                     "inplace_abn_cuda.cu"
 15 |                 ]],
 16 |                 extra_cuda_cflags=["--expt-extended-lambda"])
 17 | 
 18 | # Activation names
 19 | ACT_RELU = "relu"
 20 | ACT_LEAKY_RELU = "leaky_relu"
 21 | ACT_ELU = "elu"
 22 | ACT_NONE = "none"
 23 | 
 24 | 
 25 | def _check(fn, *args, **kwargs):
 26 |     success = fn(*args, **kwargs)
 27 |     if not success:
 28 |         raise RuntimeError("CUDA Error encountered in {}".format(fn))
 29 | 
 30 | 
 31 | def _broadcast_shape(x):
 32 |     out_size = []
 33 |     for i, s in enumerate(x.size()):
 34 |         if i != 1:
 35 |             out_size.append(1)
 36 |         else:
 37 |             out_size.append(s)
 38 |     return out_size
 39 | 
 40 | 
 41 | def _reduce(x):
 42 |     if len(x.size()) == 2:
 43 |         return x.sum(dim=0)
 44 |     else:
 45 |         n, c = x.size()[0:2]
 46 |         return x.contiguous().view((n, c, -1)).sum(2).sum(0)
 47 | 
 48 | 
 49 | def _count_samples(x):
 50 |     count = 1
 51 |     for i, s in enumerate(x.size()):
 52 |         if i != 1:
 53 |             count *= s
 54 |     return count
 55 | 
 56 | 
 57 | def _act_forward(ctx, x):
 58 |     if ctx.activation == ACT_LEAKY_RELU:
 59 |         _backend.leaky_relu_forward(x, ctx.slope)
 60 |     elif ctx.activation == ACT_ELU:
 61 |         _backend.elu_forward(x)
 62 |     elif ctx.activation == ACT_NONE:
 63 |         pass
 64 | 
 65 | 
 66 | def _act_backward(ctx, x, dx):
 67 |     if ctx.activation == ACT_LEAKY_RELU:
 68 |         _backend.leaky_relu_backward(x, dx, ctx.slope)
 69 |     elif ctx.activation == ACT_ELU:
 70 |         _backend.elu_backward(x, dx)
 71 |     elif ctx.activation == ACT_NONE:
 72 |         pass
 73 | 
 74 | 
 75 | class InPlaceABN(autograd.Function):
 76 |     @staticmethod
 77 |     def forward(ctx, x, weight, bias, running_mean, running_var,
 78 |                 training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
 79 |         # Save context
 80 |         ctx.training = training
 81 |         ctx.momentum = momentum
 82 |         ctx.eps = eps
 83 |         ctx.activation = activation
 84 |         ctx.slope = slope
 85 |         ctx.affine = weight is not None and bias is not None
 86 | 
 87 |         # Prepare inputs
 88 |         count = _count_samples(x)
 89 |         x = x.contiguous()
 90 |         weight = weight.contiguous() if ctx.affine else x.new_empty(0)
 91 |         bias = bias.contiguous() if ctx.affine else x.new_empty(0)
 92 | 
 93 |         if ctx.training:
 94 |             mean, var = _backend.mean_var(x)
 95 | 
 96 |             # Update running stats
 97 |             running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
 98 |             running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
 99 | 
100 |             # Mark in-place modified tensors
101 |             ctx.mark_dirty(x, running_mean, running_var)
102 |         else:
103 |             mean, var = running_mean.contiguous(), running_var.contiguous()
104 |             ctx.mark_dirty(x)
105 | 
106 |         # BN forward + activation
107 |         _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
108 |         _act_forward(ctx, x)
109 | 
110 |         # Output
111 |         ctx.var = var
112 |         ctx.save_for_backward(x, var, weight, bias)
113 |         return x
114 | 
115 |     @staticmethod
116 |     @once_differentiable
117 |     def backward(ctx, dz):
118 |         z, var, weight, bias = ctx.saved_tensors
119 |         dz = dz.contiguous()
120 | 
121 |         # Undo activation
122 |         _act_backward(ctx, z, dz)
123 | 
124 |         if ctx.training:
125 |             edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
126 |         else:
127 |             # TODO: implement simplified CUDA backward for inference mode
128 |             edz = dz.new_zeros(dz.size(1))
129 |             eydz = dz.new_zeros(dz.size(1))
130 | 
131 |         dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
132 |         dweight = dweight if ctx.affine else None
133 |         dbias = dbias if ctx.affine else None
134 | 
135 |         return dx, dweight, dbias, None, None, None, None, None, None, None
136 | 
137 | 
138 | class InPlaceABNSync(autograd.Function):
139 |     @classmethod
140 |     def forward(cls, ctx, x, weight, bias, running_mean, running_var,
141 |                 extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
142 |         # Save context
143 |         cls._parse_extra(ctx, extra)
144 |         ctx.training = training
145 |         ctx.momentum = momentum
146 |         ctx.eps = eps
147 |         ctx.activation = activation
148 |         ctx.slope = slope
149 |         ctx.affine = weight is not None and bias is not None
150 | 
151 |         # Prepare inputs
152 |         count = _count_samples(x) * (ctx.master_queue.maxsize + 1)
153 |         x = x.contiguous()
154 |         weight = weight.contiguous() if ctx.affine else x.new_empty(0)
155 |         bias = bias.contiguous() if ctx.affine else x.new_empty(0)
156 | 
157 |         if ctx.training:
158 |             mean, var = _backend.mean_var(x)
159 | 
160 |             if ctx.is_master:
161 |                 means, vars = [mean.unsqueeze(0)], [var.unsqueeze(0)]
162 |                 for _ in range(ctx.master_queue.maxsize):
163 |                     mean_w, var_w = ctx.master_queue.get()
164 |                     ctx.master_queue.task_done()
165 |                     means.append(mean_w.unsqueeze(0))
166 |                     vars.append(var_w.unsqueeze(0))
167 | 
168 |                 means = comm.gather(means)
169 |                 vars = comm.gather(vars)
170 | 
171 |                 mean = means.mean(0)
172 |                 var = (vars + (mean - means) ** 2).mean(0)
173 | 
174 |                 tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
175 |                 for ts, queue in zip(tensors[1:], ctx.worker_queues):
176 |                     queue.put(ts)
177 |             else:
178 |                 ctx.master_queue.put((mean, var))
179 |                 mean, var = ctx.worker_queue.get()
180 |                 ctx.worker_queue.task_done()
181 | 
182 |             # Update running stats
183 |             running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
184 |             running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
185 | 
186 |             # Mark in-place modified tensors
187 |             ctx.mark_dirty(x, running_mean, running_var)
188 |         else:
189 |             mean, var = running_mean.contiguous(), running_var.contiguous()
190 |             ctx.mark_dirty(x)
191 | 
192 |         # BN forward + activation
193 |         _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
194 |         _act_forward(ctx, x)
195 | 
196 |         # Output
197 |         ctx.var = var
198 |         ctx.save_for_backward(x, var, weight, bias)
199 |         return x
200 | 
201 |     @staticmethod
202 |     @once_differentiable
203 |     def backward(ctx, dz):
204 |         z, var, weight, bias = ctx.saved_tensors
205 |         dz = dz.contiguous()
206 | 
207 |         # Undo activation
208 |         _act_backward(ctx, z, dz)
209 | 
210 |         if ctx.training:
211 |             edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
212 | 
213 |             if ctx.is_master:
214 |                 edzs, eydzs = [edz], [eydz]
215 |                 for _ in range(len(ctx.worker_queues)):
216 |                     edz_w, eydz_w = ctx.master_queue.get()
217 |                     ctx.master_queue.task_done()
218 |                     edzs.append(edz_w)
219 |                     eydzs.append(eydz_w)
220 | 
221 |                 edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1)
222 |                 eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1)
223 | 
224 |                 tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids)
225 |                 for ts, queue in zip(tensors[1:], ctx.worker_queues):
226 |                     queue.put(ts)
227 |             else:
228 |                 ctx.master_queue.put((edz, eydz))
229 |                 edz, eydz = ctx.worker_queue.get()
230 |                 ctx.worker_queue.task_done()
231 |         else:
232 |             edz = dz.new_zeros(dz.size(1))
233 |             eydz = dz.new_zeros(dz.size(1))
234 | 
235 |         dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
236 |         dweight = dweight if ctx.affine else None
237 |         dbias = dbias if ctx.affine else None
238 | 
239 |         return dx, dweight, dbias, None, None, None, None, None, None, None, None
240 | 
241 |     @staticmethod
242 |     def _parse_extra(ctx, extra):
243 |         ctx.is_master = extra["is_master"]
244 |         if ctx.is_master:
245 |             ctx.master_queue = extra["master_queue"]
246 |             ctx.worker_queues = extra["worker_queues"]
247 |             ctx.worker_ids = extra["worker_ids"]
248 |         else:
249 |             ctx.master_queue = extra["master_queue"]
250 |             ctx.worker_queue = extra["worker_queue"]
251 | 
252 | 
253 | inplace_abn = InPlaceABN.apply
254 | inplace_abn_sync = InPlaceABNSync.apply
255 | 
256 | __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
257 | 


--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/common.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cuda_runtime_api.h>
  4 | 
  5 | /*
  6 |  * General settings
  7 |  */
  8 | const int WARP_SIZE = 32;
  9 | const int MAX_BLOCK_SIZE = 512;
 10 | 
 11 | template<typename T>
 12 | struct Pair {
 13 |   T v1, v2;
 14 |   __device__ Pair() {}
 15 |   __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
 16 |   __device__ Pair(T v) : v1(v), v2(v) {}
 17 |   __device__ Pair(int v) : v1(v), v2(v) {}
 18 |   __device__ Pair &operator+=(const Pair<T> &a) {
 19 |     v1 += a.v1;
 20 |     v2 += a.v2;
 21 |     return *this;
 22 |   }
 23 | };
 24 | 
 25 | /*
 26 |  * Utility functions
 27 |  */
 28 | template <typename T>
 29 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
 30 |                                            unsigned int mask = 0xffffffff) {
 31 | #if CUDART_VERSION >= 9000
 32 |   return __shfl_xor_sync(mask, value, laneMask, width);
 33 | #else
 34 |   return __shfl_xor(value, laneMask, width);
 35 | #endif
 36 | }
 37 | 
 38 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
 39 | 
 40 | static int getNumThreads(int nElem) {
 41 |   int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE};
 42 |   for (int i = 0; i != 5; ++i) {
 43 |     if (nElem <= threadSizes[i]) {
 44 |       return threadSizes[i];
 45 |     }
 46 |   }
 47 |   return MAX_BLOCK_SIZE;
 48 | }
 49 | 
 50 | template<typename T>
 51 | static __device__ __forceinline__ T warpSum(T val) {
 52 | #if __CUDA_ARCH__ >= 300
 53 |   for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
 54 |     val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
 55 |   }
 56 | #else
 57 |   __shared__ T values[MAX_BLOCK_SIZE];
 58 |   values[threadIdx.x] = val;
 59 |   __threadfence_block();
 60 |   const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
 61 |   for (int i = 1; i < WARP_SIZE; i++) {
 62 |     val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
 63 |   }
 64 | #endif
 65 |   return val;
 66 | }
 67 | 
 68 | template<typename T>
 69 | static __device__ __forceinline__ Pair<T> warpSum(Pair<T> value) {
 70 |   value.v1 = warpSum(value.v1);
 71 |   value.v2 = warpSum(value.v2);
 72 |   return value;
 73 | }
 74 | 
 75 | template <typename T, typename Op>
 76 | __device__ T reduce(Op op, int plane, int N, int C, int S) {
 77 |   T sum = (T)0;
 78 |   for (int batch = 0; batch < N; ++batch) {
 79 |     for (int x = threadIdx.x; x < S; x += blockDim.x) {
 80 |       sum += op(batch, plane, x);
 81 |     }
 82 |   }
 83 | 
 84 |   // sum over NumThreads within a warp
 85 |   sum = warpSum(sum);
 86 | 
 87 |   // 'transpose', and reduce within warp again
 88 |   __shared__ T shared[32];
 89 |   __syncthreads();
 90 |   if (threadIdx.x % WARP_SIZE == 0) {
 91 |     shared[threadIdx.x / WARP_SIZE] = sum;
 92 |   }
 93 |   if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
 94 |     // zero out the other entries in shared
 95 |     shared[threadIdx.x] = (T)0;
 96 |   }
 97 |   __syncthreads();
 98 |   if (threadIdx.x / WARP_SIZE == 0) {
 99 |     sum = warpSum(shared[threadIdx.x]);
100 |     if (threadIdx.x == 0) {
101 |       shared[0] = sum;
102 |     }
103 |   }
104 |   __syncthreads();
105 | 
106 |   // Everyone picks it up, should be broadcast into the whole gradInput
107 |   return shared[0];
108 | }


--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/inplace_abn.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | #include "inplace_abn.h"
 6 | 
 7 | std::vector<at::Tensor> mean_var(at::Tensor x) {
 8 |   if (x.is_cuda()) {
 9 |     return mean_var_cuda(x);
10 |   } else {
11 |     return mean_var_cpu(x);
12 |   }
13 | }
14 | 
15 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
16 |                    bool affine, float eps) {
17 |   if (x.is_cuda()) {
18 |     return forward_cuda(x, mean, var, weight, bias, affine, eps);
19 |   } else {
20 |     return forward_cpu(x, mean, var, weight, bias, affine, eps);
21 |   }
22 | }
23 | 
24 | std::vector<at::Tensor> edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
25 |                                  bool affine, float eps) {
26 |   if (z.is_cuda()) {
27 |     return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
28 |   } else {
29 |     return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
30 |   }
31 | }
32 | 
33 | std::vector<at::Tensor> backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
34 |                                  at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
35 |   if (z.is_cuda()) {
36 |     return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
37 |   } else {
38 |     return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
39 |   }
40 | }
41 | 
42 | void leaky_relu_forward(at::Tensor z, float slope) {
43 |   at::leaky_relu_(z, slope);
44 | }
45 | 
46 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
47 |   if (z.is_cuda()) {
48 |     return leaky_relu_backward_cuda(z, dz, slope);
49 |   } else {
50 |     return leaky_relu_backward_cpu(z, dz, slope);
51 |   }
52 | }
53 | 
54 | void elu_forward(at::Tensor z) {
55 |   at::elu_(z);
56 | }
57 | 
58 | void elu_backward(at::Tensor z, at::Tensor dz) {
59 |   if (z.is_cuda()) {
60 |     return elu_backward_cuda(z, dz);
61 |   } else {
62 |     return elu_backward_cpu(z, dz);
63 |   }
64 | }
65 | 
66 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
67 |   m.def("mean_var", &mean_var, "Mean and variance computation");
68 |   m.def("forward", &forward, "In-place forward computation");
69 |   m.def("edz_eydz", &edz_eydz, "First part of backward computation");
70 |   m.def("backward", &backward, "Second part of backward computation");
71 |   m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
72 |   m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
73 |   m.def("elu_forward", &elu_forward, "Elu forward computation");
74 |   m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
75 | }


--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/inplace_abn.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | #include <vector>
 6 | 
 7 | std::vector<at::Tensor> mean_var_cpu(at::Tensor x);
 8 | std::vector<at::Tensor> mean_var_cuda(at::Tensor x);
 9 | 
10 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
11 |                        bool affine, float eps);
12 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
13 |                         bool affine, float eps);
14 | 
15 | std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
16 |                                      bool affine, float eps);
17 | std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
18 |                                       bool affine, float eps);
19 | 
20 | std::vector<at::Tensor> backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
21 |                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps);
22 | std::vector<at::Tensor> backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
23 |                                       at::Tensor edz, at::Tensor eydz, bool affine, float eps);
24 | 
25 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
26 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
27 | 
28 | void elu_backward_cpu(at::Tensor z, at::Tensor dz);
29 | void elu_backward_cuda(at::Tensor z, at::Tensor dz);


--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/inplace_abn_cpu.cpp:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | 
  3 | #include <vector>
  4 | 
  5 | #include "inplace_abn.h"
  6 | 
  7 | at::Tensor reduce_sum(at::Tensor x) {
  8 |   if (x.ndimension() == 2) {
  9 |     return x.sum(0);
 10 |   } else {
 11 |     auto x_view = x.view({x.size(0), x.size(1), -1});
 12 |     return x_view.sum(-1).sum(0);
 13 |   }
 14 | }
 15 | 
 16 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
 17 |   if (x.ndimension() == 2) {
 18 |     return v;
 19 |   } else {
 20 |     std::vector<int64_t> broadcast_size = {1, -1};
 21 |     for (int64_t i = 2; i < x.ndimension(); ++i)
 22 |       broadcast_size.push_back(1);
 23 | 
 24 |     return v.view(broadcast_size);
 25 |   }
 26 | }
 27 | 
 28 | int64_t count(at::Tensor x) {
 29 |   int64_t count = x.size(0);
 30 |   for (int64_t i = 2; i < x.ndimension(); ++i)
 31 |     count *= x.size(i);
 32 | 
 33 |   return count;
 34 | }
 35 | 
 36 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
 37 |   if (affine) {
 38 |     return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
 39 |   } else {
 40 |     return z;
 41 |   }
 42 | }
 43 | 
 44 | std::vector<at::Tensor> mean_var_cpu(at::Tensor x) {
 45 |   auto num = count(x);
 46 |   auto mean = reduce_sum(x) / num;
 47 |   auto diff = x - broadcast_to(mean, x);
 48 |   auto var = reduce_sum(diff.pow(2)) / num;
 49 | 
 50 |   return {mean, var};
 51 | }
 52 | 
 53 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
 54 |                        bool affine, float eps) {
 55 |   auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
 56 |   auto mul = at::rsqrt(var + eps) * gamma;
 57 | 
 58 |   x.sub_(broadcast_to(mean, x));
 59 |   x.mul_(broadcast_to(mul, x));
 60 |   if (affine) x.add_(broadcast_to(bias, x));
 61 | 
 62 |   return x;
 63 | }
 64 | 
 65 | std::vector<at::Tensor> edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
 66 |                                      bool affine, float eps) {
 67 |   auto edz = reduce_sum(dz);
 68 |   auto y = invert_affine(z, weight, bias, affine, eps);
 69 |   auto eydz = reduce_sum(y * dz);
 70 | 
 71 |   return {edz, eydz};
 72 | }
 73 | 
 74 | std::vector<at::Tensor> backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
 75 |                                      at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
 76 |   auto y = invert_affine(z, weight, bias, affine, eps);
 77 |   auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
 78 | 
 79 |   auto num = count(z);
 80 |   auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
 81 | 
 82 |   auto dweight = at::empty(z.type(), {0});
 83 |   auto dbias = at::empty(z.type(), {0});
 84 |   if (affine) {
 85 |     dweight = eydz * at::sign(weight);
 86 |     dbias = edz;
 87 |   }
 88 | 
 89 |   return {dx, dweight, dbias};
 90 | }
 91 | 
 92 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
 93 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
 94 |     int64_t count = z.numel();
 95 |     auto *_z = z.data<scalar_t>();
 96 |     auto *_dz = dz.data<scalar_t>();
 97 | 
 98 |     for (int64_t i = 0; i < count; ++i) {
 99 |       if (_z[i] < 0) {
100 |         _z[i] *= 1 / slope;
101 |         _dz[i] *= slope;
102 |       }
103 |     }
104 |   }));
105 | }
106 | 
107 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
108 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
109 |     int64_t count = z.numel();
110 |     auto *_z = z.data<scalar_t>();
111 |     auto *_dz = dz.data<scalar_t>();
112 | 
113 |     for (int64_t i = 0; i < count; ++i) {
114 |       if (_z[i] < 0) {
115 |         _z[i] = log1p(_z[i]);
116 |         _dz[i] *= (_z[i] + 1.f);
117 |       }
118 |     }
119 |   }));
120 | }


--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/inplace_abn_cuda.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | 
  3 | #include <thrust/device_ptr.h>
  4 | #include <thrust/transform.h>
  5 | 
  6 | #include <vector>
  7 | 
  8 | #include "common.h"
  9 | #include "inplace_abn.h"
 10 | 
 11 | // Checks
 12 | #ifndef AT_CHECK
 13 |   #define AT_CHECK AT_ASSERT
 14 | #endif
 15 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
 16 | #define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x " must be contiguous")
 17 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
 18 | 
 19 | // Utilities
 20 | void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
 21 |   num = x.size(0);
 22 |   chn = x.size(1);
 23 |   sp = 1;
 24 |   for (int64_t i = 2; i < x.ndimension(); ++i)
 25 |     sp *= x.size(i);
 26 | }
 27 | 
 28 | // Operations for reduce
 29 | template<typename T>
 30 | struct SumOp {
 31 |   __device__ SumOp(const T *t, int c, int s)
 32 |       : tensor(t), chn(c), sp(s) {}
 33 |   __device__ __forceinline__ T operator()(int batch, int plane, int n) {
 34 |     return tensor[(batch * chn + plane) * sp + n];
 35 |   }
 36 |   const T *tensor;
 37 |   const int chn;
 38 |   const int sp;
 39 | };
 40 | 
 41 | template<typename T>
 42 | struct VarOp {
 43 |   __device__ VarOp(T m, const T *t, int c, int s)
 44 |       : mean(m), tensor(t), chn(c), sp(s) {}
 45 |   __device__ __forceinline__ T operator()(int batch, int plane, int n) {
 46 |     T val = tensor[(batch * chn + plane) * sp + n];
 47 |     return (val - mean) * (val - mean);
 48 |   }
 49 |   const T mean;
 50 |   const T *tensor;
 51 |   const int chn;
 52 |   const int sp;
 53 | };
 54 | 
 55 | template<typename T>
 56 | struct GradOp {
 57 |   __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
 58 |       : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
 59 |   __device__ __forceinline__ Pair<T> operator()(int batch, int plane, int n) {
 60 |     T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
 61 |     T _dz = dz[(batch * chn + plane) * sp + n];
 62 |     return Pair<T>(_dz, _y * _dz);
 63 |   }
 64 |   const T weight;
 65 |   const T bias;
 66 |   const T *z;
 67 |   const T *dz;
 68 |   const int chn;
 69 |   const int sp;
 70 | };
 71 | 
 72 | /***********
 73 |  * mean_var
 74 |  ***********/
 75 | 
 76 | template<typename T>
 77 | __global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
 78 |   int plane = blockIdx.x;
 79 |   T norm = T(1) / T(num * sp);
 80 | 
 81 |   T _mean = reduce<T, SumOp<T>>(SumOp<T>(x, chn, sp), plane, num, chn, sp) * norm;
 82 |   __syncthreads();
 83 |   T _var = reduce<T, VarOp<T>>(VarOp<T>(_mean, x, chn, sp), plane, num, chn, sp) * norm;
 84 | 
 85 |   if (threadIdx.x == 0) {
 86 |     mean[plane] = _mean;
 87 |     var[plane] = _var;
 88 |   }
 89 | }
 90 | 
 91 | std::vector<at::Tensor> mean_var_cuda(at::Tensor x) {
 92 |   CHECK_INPUT(x);
 93 | 
 94 |   // Extract dimensions
 95 |   int64_t num, chn, sp;
 96 |   get_dims(x, num, chn, sp);
 97 | 
 98 |   // Prepare output tensors
 99 |   auto mean = at::empty(x.type(), {chn});
100 |   auto var = at::empty(x.type(), {chn});
101 | 
102 |   // Run kernel
103 |   dim3 blocks(chn);
104 |   dim3 threads(getNumThreads(sp));
105 |   AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
106 |     mean_var_kernel<scalar_t><<<blocks, threads>>>(
107 |         x.data<scalar_t>(),
108 |         mean.data<scalar_t>(),
109 |         var.data<scalar_t>(),
110 |         num, chn, sp);
111 |   }));
112 | 
113 |   return {mean, var};
114 | }
115 | 
116 | /**********
117 |  * forward
118 |  **********/
119 | 
120 | template<typename T>
121 | __global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
122 |                                bool affine, float eps, int num, int chn, int sp) {
123 |   int plane = blockIdx.x;
124 | 
125 |   T _mean = mean[plane];
126 |   T _var = var[plane];
127 |   T _weight = affine ? abs(weight[plane]) + eps : T(1);
128 |   T _bias = affine ? bias[plane] : T(0);
129 | 
130 |   T mul = rsqrt(_var + eps) * _weight;
131 | 
132 |   for (int batch = 0; batch < num; ++batch) {
133 |     for (int n = threadIdx.x; n < sp; n += blockDim.x) {
134 |       T _x = x[(batch * chn + plane) * sp + n];
135 |       T _y = (_x - _mean) * mul + _bias;
136 | 
137 |       x[(batch * chn + plane) * sp + n] = _y;
138 |     }
139 |   }
140 | }
141 | 
142 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
143 |                         bool affine, float eps) {
144 |   CHECK_INPUT(x);
145 |   CHECK_INPUT(mean);
146 |   CHECK_INPUT(var);
147 |   CHECK_INPUT(weight);
148 |   CHECK_INPUT(bias);
149 | 
150 |   // Extract dimensions
151 |   int64_t num, chn, sp;
152 |   get_dims(x, num, chn, sp);
153 | 
154 |   // Run kernel
155 |   dim3 blocks(chn);
156 |   dim3 threads(getNumThreads(sp));
157 |   AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
158 |     forward_kernel<scalar_t><<<blocks, threads>>>(
159 |         x.data<scalar_t>(),
160 |         mean.data<scalar_t>(),
161 |         var.data<scalar_t>(),
162 |         weight.data<scalar_t>(),
163 |         bias.data<scalar_t>(),
164 |         affine, eps, num, chn, sp);
165 |   }));
166 | 
167 |   return x;
168 | }
169 | 
170 | /***********
171 |  * edz_eydz
172 |  ***********/
173 | 
174 | template<typename T>
175 | __global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
176 |                                 T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
177 |   int plane = blockIdx.x;
178 | 
179 |   T _weight = affine ? abs(weight[plane]) + eps : 1.f;
180 |   T _bias = affine ? bias[plane] : 0.f;
181 | 
182 |   Pair<T> res = reduce<Pair<T>, GradOp<T>>(GradOp<T>(_weight, _bias, z, dz, chn, sp), plane, num, chn, sp);
183 |   __syncthreads();
184 | 
185 |   if (threadIdx.x == 0) {
186 |     edz[plane] = res.v1;
187 |     eydz[plane] = res.v2;
188 |   }
189 | }
190 | 
191 | std::vector<at::Tensor> edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
192 |                                       bool affine, float eps) {
193 |   CHECK_INPUT(z);
194 |   CHECK_INPUT(dz);
195 |   CHECK_INPUT(weight);
196 |   CHECK_INPUT(bias);
197 | 
198 |   // Extract dimensions
199 |   int64_t num, chn, sp;
200 |   get_dims(z, num, chn, sp);
201 | 
202 |   auto edz = at::empty(z.type(), {chn});
203 |   auto eydz = at::empty(z.type(), {chn});
204 | 
205 |   // Run kernel
206 |   dim3 blocks(chn);
207 |   dim3 threads(getNumThreads(sp));
208 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
209 |     edz_eydz_kernel<scalar_t><<<blocks, threads>>>(
210 |         z.data<scalar_t>(),
211 |         dz.data<scalar_t>(),
212 |         weight.data<scalar_t>(),
213 |         bias.data<scalar_t>(),
214 |         edz.data<scalar_t>(),
215 |         eydz.data<scalar_t>(),
216 |         affine, eps, num, chn, sp);
217 |   }));
218 | 
219 |   return {edz, eydz};
220 | }
221 | 
222 | /***********
223 |  * backward
224 |  ***********/
225 | 
226 | template<typename T>
227 | __global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
228 |                                 const T *eydz, T *dx, T *dweight, T *dbias,
229 |                                 bool affine, float eps, int num, int chn, int sp) {
230 |   int plane = blockIdx.x;
231 | 
232 |   T _weight = affine ? abs(weight[plane]) + eps : 1.f;
233 |   T _bias = affine ? bias[plane] : 0.f;
234 |   T _var = var[plane];
235 |   T _edz = edz[plane];
236 |   T _eydz = eydz[plane];
237 | 
238 |   T _mul = _weight * rsqrt(_var + eps);
239 |   T count = T(num * sp);
240 | 
241 |   for (int batch = 0; batch < num; ++batch) {
242 |     for (int n = threadIdx.x; n < sp; n += blockDim.x) {
243 |       T _dz = dz[(batch * chn + plane) * sp + n];
244 |       T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
245 | 
246 |       dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
247 |     }
248 |   }
249 | 
250 |   if (threadIdx.x == 0) {
251 |     if (affine) {
252 |       dweight[plane] = weight[plane] > 0 ? _eydz : -_eydz;
253 |       dbias[plane] = _edz;
254 |     }
255 |   }
256 | }
257 | 
258 | std::vector<at::Tensor> backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
259 |                                       at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
260 |   CHECK_INPUT(z);
261 |   CHECK_INPUT(dz);
262 |   CHECK_INPUT(var);
263 |   CHECK_INPUT(weight);
264 |   CHECK_INPUT(bias);
265 |   CHECK_INPUT(edz);
266 |   CHECK_INPUT(eydz);
267 | 
268 |   // Extract dimensions
269 |   int64_t num, chn, sp;
270 |   get_dims(z, num, chn, sp);
271 | 
272 |   auto dx = at::zeros_like(z);
273 |   auto dweight = at::zeros_like(weight);
274 |   auto dbias = at::zeros_like(bias);
275 | 
276 |   // Run kernel
277 |   dim3 blocks(chn);
278 |   dim3 threads(getNumThreads(sp));
279 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
280 |     backward_kernel<scalar_t><<<blocks, threads>>>(
281 |         z.data<scalar_t>(),
282 |         dz.data<scalar_t>(),
283 |         var.data<scalar_t>(),
284 |         weight.data<scalar_t>(),
285 |         bias.data<scalar_t>(),
286 |         edz.data<scalar_t>(),
287 |         eydz.data<scalar_t>(),
288 |         dx.data<scalar_t>(),
289 |         dweight.data<scalar_t>(),
290 |         dbias.data<scalar_t>(),
291 |         affine, eps, num, chn, sp);
292 |   }));
293 | 
294 |   return {dx, dweight, dbias};
295 | }
296 | 
297 | /**************
298 |  * activations
299 |  **************/
300 | 
301 | template<typename T>
302 | inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
303 |   // Create thrust pointers
304 |   thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
305 |   thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
306 | 
307 |   thrust::transform_if(th_dz, th_dz + count, th_z, th_dz,
308 |                        [slope] __device__ (const T& dz) { return dz * slope; },
309 |                        [] __device__ (const T& z) { return z < 0; });
310 |   thrust::transform_if(th_z, th_z + count, th_z,
311 |                        [slope] __device__ (const T& z) { return z / slope; },
312 |                        [] __device__ (const T& z) { return z < 0; });
313 | }
314 | 
315 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
316 |   CHECK_INPUT(z);
317 |   CHECK_INPUT(dz);
318 | 
319 |   int64_t count = z.numel();
320 | 
321 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
322 |     leaky_relu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), slope, count);
323 |   }));
324 | }
325 | 
326 | template<typename T>
327 | inline void elu_backward_impl(T *z, T *dz, int64_t count) {
328 |   // Create thrust pointers
329 |   thrust::device_ptr<T> th_z = thrust::device_pointer_cast(z);
330 |   thrust::device_ptr<T> th_dz = thrust::device_pointer_cast(dz);
331 | 
332 |   thrust::transform_if(th_dz, th_dz + count, th_z, th_z, th_dz,
333 |                        [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
334 |                        [] __device__ (const T& z) { return z < 0; });
335 |   thrust::transform_if(th_z, th_z + count, th_z,
336 |                        [] __device__ (const T& z) { return log1p(z); },
337 |                        [] __device__ (const T& z) { return z < 0; });
338 | }
339 | 
340 | void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
341 |   CHECK_INPUT(z);
342 |   CHECK_INPUT(dz);
343 | 
344 |   int64_t count = z.numel();
345 | 
346 |   AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
347 |     elu_backward_impl<scalar_t>(z.data<scalar_t>(), dz.data<scalar_t>(), count);
348 |   }));
349 | }
350 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Jingyi Xie (hsfzxjy@gmail.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | import torch
 8 | import torch.distributed as torch_dist
 9 | 
10 | def is_distributed():
11 |     return torch_dist.is_initialized()
12 | 
13 | def get_world_size():
14 |     if not torch_dist.is_initialized():
15 |         return 1
16 |     return torch_dist.get_world_size()
17 | 
18 | def get_rank():
19 |     if not torch_dist.is_initialized():
20 |         return 0
21 |     return torch_dist.get_rank()


--------------------------------------------------------------------------------
/lib/utils/modelsummary.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import os
 13 | import logging
 14 | from collections import namedtuple
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | 
 19 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False):
 20 |     """
 21 |     :param model:
 22 |     :param input_tensors:
 23 |     :param item_length:
 24 |     :return:
 25 |     """
 26 | 
 27 |     summary = []
 28 | 
 29 |     ModuleDetails = namedtuple(
 30 |         "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"])
 31 |     hooks = []
 32 |     layer_instances = {}
 33 | 
 34 |     def add_hooks(module):
 35 | 
 36 |         def hook(module, input, output):
 37 |             class_name = str(module.__class__.__name__)
 38 | 
 39 |             instance_index = 1
 40 |             if class_name not in layer_instances:
 41 |                 layer_instances[class_name] = instance_index
 42 |             else:
 43 |                 instance_index = layer_instances[class_name] + 1
 44 |                 layer_instances[class_name] = instance_index
 45 | 
 46 |             layer_name = class_name + "_" + str(instance_index)
 47 | 
 48 |             params = 0
 49 | 
 50 |             if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \
 51 |                class_name.find("Linear") != -1:
 52 |                 for param_ in module.parameters():
 53 |                     params += param_.view(-1).size(0)
 54 | 
 55 |             flops = "Not Available"
 56 |             if class_name.find("Conv") != -1 and hasattr(module, "weight"):
 57 |                 flops = (
 58 |                     torch.prod(
 59 |                         torch.LongTensor(list(module.weight.data.size()))) *
 60 |                     torch.prod(
 61 |                         torch.LongTensor(list(output.size())[2:]))).item()
 62 |             elif isinstance(module, nn.Linear):
 63 |                 flops = (torch.prod(torch.LongTensor(list(output.size()))) \
 64 |                          * input[0].size(1)).item()
 65 | 
 66 |             if isinstance(input[0], list):
 67 |                 input = input[0]
 68 |             if isinstance(output, list):
 69 |                 output = output[0]
 70 | 
 71 |             summary.append(
 72 |                 ModuleDetails(
 73 |                     name=layer_name,
 74 |                     input_size=list(input[0].size()),
 75 |                     output_size=list(output.size()),
 76 |                     num_parameters=params,
 77 |                     multiply_adds=flops)
 78 |             )
 79 | 
 80 |         if not isinstance(module, nn.ModuleList) \
 81 |            and not isinstance(module, nn.Sequential) \
 82 |            and module != model:
 83 |             hooks.append(module.register_forward_hook(hook))
 84 | 
 85 |     model.eval()
 86 |     model.apply(add_hooks)
 87 | 
 88 |     space_len = item_length
 89 | 
 90 |     model(*input_tensors)
 91 |     for hook in hooks:
 92 |         hook.remove()
 93 | 
 94 |     details = ''
 95 |     if verbose:
 96 |         details = "Model Summary" + \
 97 |             os.linesep + \
 98 |             "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format(
 99 |                 ' ' * (space_len - len("Name")),
100 |                 ' ' * (space_len - len("Input Size")),
101 |                 ' ' * (space_len - len("Output Size")),
102 |                 ' ' * (space_len - len("Parameters")),
103 |                 ' ' * (space_len - len("Multiply Adds (Flops)"))) \
104 |                 + os.linesep + '-' * space_len * 5 + os.linesep
105 | 
106 |     params_sum = 0
107 |     flops_sum = 0
108 |     for layer in summary:
109 |         params_sum += layer.num_parameters
110 |         if layer.multiply_adds != "Not Available":
111 |             flops_sum += layer.multiply_adds
112 |         if verbose:
113 |             details += "{}{}{}{}{}{}{}{}{}{}".format(
114 |                 layer.name,
115 |                 ' ' * (space_len - len(layer.name)),
116 |                 layer.input_size,
117 |                 ' ' * (space_len - len(str(layer.input_size))),
118 |                 layer.output_size,
119 |                 ' ' * (space_len - len(str(layer.output_size))),
120 |                 layer.num_parameters,
121 |                 ' ' * (space_len - len(str(layer.num_parameters))),
122 |                 layer.multiply_adds,
123 |                 ' ' * (space_len - len(str(layer.multiply_adds)))) \
124 |                 + os.linesep + '-' * space_len * 5 + os.linesep
125 | 
126 |     details += os.linesep \
127 |         + "Total Parameters: {:,}".format(params_sum) \
128 |         + os.linesep + '-' * space_len * 5 + os.linesep
129 |     details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \
130 |         + os.linesep + '-' * space_len * 5 + os.linesep
131 |     details += "Number of Layers" + os.linesep
132 |     for layer in layer_instances:
133 |         details += "{} : {} layers   ".format(layer, layer_instances[layer])
134 | 
135 |     return details


--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import logging
 13 | import time
 14 | from pathlib import Path
 15 | 
 16 | import numpy as np
 17 | 
 18 | import torch
 19 | import torch.nn as nn
 20 | 
 21 | class FullModel(nn.Module):
 22 |   """
 23 |   Distribute the loss on multi-gpu to reduce 
 24 |   the memory cost in the main gpu.
 25 |   You can check the following discussion.
 26 |   https://discuss.pytorch.org/t/dataparallel-imbalanced-memory-usage/22551/21
 27 |   """
 28 |   def __init__(self, model, loss):
 29 |     super(FullModel, self).__init__()
 30 |     self.model = model
 31 |     self.loss = loss
 32 | 
 33 |   def forward(self, inputs, labels, *args, **kwargs):
 34 |     outputs = self.model(inputs, *args, **kwargs)
 35 |     loss = self.loss(outputs, labels)
 36 |     return torch.unsqueeze(loss,0), outputs
 37 | 
 38 | class AverageMeter(object):
 39 |     """Computes and stores the average and current value"""
 40 | 
 41 |     def __init__(self):
 42 |         self.initialized = False
 43 |         self.val = None
 44 |         self.avg = None
 45 |         self.sum = None
 46 |         self.count = None
 47 | 
 48 |     def initialize(self, val, weight):
 49 |         self.val = val
 50 |         self.avg = val
 51 |         self.sum = val * weight
 52 |         self.count = weight
 53 |         self.initialized = True
 54 | 
 55 |     def update(self, val, weight=1):
 56 |         if not self.initialized:
 57 |             self.initialize(val, weight)
 58 |         else:
 59 |             self.add(val, weight)
 60 | 
 61 |     def add(self, val, weight):
 62 |         self.val = val
 63 |         self.sum += val * weight
 64 |         self.count += weight
 65 |         self.avg = self.sum / self.count
 66 | 
 67 |     def value(self):
 68 |         return self.val
 69 | 
 70 |     def average(self):
 71 |         return self.avg
 72 | 
 73 | def create_logger(cfg, cfg_name, phase='train'):
 74 |     root_output_dir = Path(cfg.OUTPUT_DIR)
 75 |     # set up logger
 76 |     if not root_output_dir.exists():
 77 |         print('=> creating {}'.format(root_output_dir))
 78 |         root_output_dir.mkdir()
 79 | 
 80 |     dataset = cfg.DATASET.DATASET
 81 |     model = cfg.MODEL.NAME
 82 |     cfg_name = os.path.basename(cfg_name).split('.')[0]
 83 | 
 84 |     final_output_dir = root_output_dir / dataset / cfg_name
 85 | 
 86 |     print('=> creating {}'.format(final_output_dir))
 87 |     final_output_dir.mkdir(parents=True, exist_ok=True)
 88 | 
 89 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
 90 |     log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase)
 91 |     final_log_file = final_output_dir / log_file
 92 |     head = '%(asctime)-15s %(message)s'
 93 |     logging.basicConfig(filename=str(final_log_file),
 94 |                         format=head)
 95 |     logger = logging.getLogger()
 96 |     logger.setLevel(logging.INFO)
 97 |     console = logging.StreamHandler()
 98 |     logging.getLogger('').addHandler(console)
 99 | 
100 |     tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \
101 |             (cfg_name + '_' + time_str)
102 |     print('=> creating {}'.format(tensorboard_log_dir))
103 |     tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
104 | 
105 |     return logger, str(final_output_dir), str(tensorboard_log_dir)
106 | 
107 | def get_confusion_matrix(label, pred, size, num_class, ignore=-1):
108 |     """
109 |     Calcute the confusion matrix by given label and pred
110 |     """
111 |     output = pred.cpu().numpy().transpose(0, 2, 3, 1)
112 |     seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8)
113 |     seg_gt = np.asarray(
114 |     label.cpu().numpy()[:, :size[-2], :size[-1]], dtype=np.int)
115 | 
116 |     ignore_index = seg_gt != ignore
117 |     seg_gt = seg_gt[ignore_index]
118 |     seg_pred = seg_pred[ignore_index]
119 | 
120 |     index = (seg_gt * num_class + seg_pred).astype('int32')
121 |     label_count = np.bincount(index)
122 |     confusion_matrix = np.zeros((num_class, num_class))
123 | 
124 |     for i_label in range(num_class):
125 |         for i_pred in range(num_class):
126 |             cur_index = i_label * num_class + i_pred
127 |             if cur_index < len(label_count):
128 |                 confusion_matrix[i_label,
129 |                                  i_pred] = label_count[cur_index]
130 |     return confusion_matrix
131 | 
132 | def adjust_learning_rate(optimizer, base_lr, max_iters, 
133 |         cur_iters, power=0.9, nbb_mult=10):
134 |     lr = base_lr*((1-float(cur_iters)/max_iters)**(power))
135 |     optimizer.param_groups[0]['lr'] = lr
136 |     if len(optimizer.param_groups) == 2:
137 |         optimizer.param_groups[1]['lr'] = lr * nbb_mult
138 |     return lr


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | EasyDict==1.7
 2 | shapely
 3 | Cython
 4 | scipy
 5 | pandas
 6 | pyyaml
 7 | json_tricks
 8 | scikit-image
 9 | yacs>=0.1.5
10 | tensorboardX>=1.6
11 | tqdm
12 | ninja
13 | 
14 | 


--------------------------------------------------------------------------------
/run_dist.sh:
--------------------------------------------------------------------------------
 1 | PYTHON="/opt/conda/bin/python"
 2 | GPU_NUM=$1
 3 | CONFIG=$2
 4 | 
 5 | $PYTHON -m pip install -r requirements.txt
 6 | 
 7 | $PYTHON -m torch.distributed.launch \
 8 |         --nproc_per_node=$GPU_NUM \
 9 |         tools/train.py \
10 |         --cfg experiments/$CONFIG.yaml \
11 |         2>&1 | tee local_log.txt
12 | 


--------------------------------------------------------------------------------
/run_local.sh:
--------------------------------------------------------------------------------
 1 | PYTHON="/data/anaconda/envs/pytorch1.7.1/bin/python"
 2 | GPU_NUM=4
 3 | CONFIG="seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle"
 4 | 
 5 | $PYTHON -m pip install -r requirements.txt
 6 | 
 7 | $PYTHON -m torch.distributed.launch \
 8 |         --nproc_per_node=$GPU_NUM \
 9 |         tools/train.py \
10 |         --cfg experiments/pascal_ctx/$CONFIG.yaml \
11 |         2>&1 | tee local_log.txt
12 | 


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import os.path as osp
12 | import sys
13 | 
14 | 
15 | def add_path(path):
16 |     if path not in sys.path:
17 |         sys.path.insert(0, path)
18 | 
19 | this_dir = osp.dirname(__file__)
20 | 
21 | lib_path = osp.join(this_dir, '..', 'lib')
22 | add_path(lib_path)
23 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | import argparse
  8 | import os
  9 | import pprint
 10 | import shutil
 11 | import sys
 12 | 
 13 | import logging
 14 | import time
 15 | import timeit
 16 | from pathlib import Path
 17 | 
 18 | import numpy as np
 19 | 
 20 | import torch
 21 | import torch.nn as nn
 22 | import torch.backends.cudnn as cudnn
 23 | 
 24 | import _init_paths
 25 | import models
 26 | import datasets
 27 | from config import config
 28 | from config import update_config
 29 | from core.function import testval, test
 30 | from utils.modelsummary import get_model_summary
 31 | from utils.utils import create_logger, FullModel
 32 | 
 33 | def parse_args():
 34 |     parser = argparse.ArgumentParser(description='Train segmentation network')
 35 |     
 36 |     parser.add_argument('--cfg',
 37 |                         help='experiment configure file name',
 38 |                         required=True,
 39 |                         type=str)
 40 |     parser.add_argument('opts',
 41 |                         help="Modify config options using the command-line",
 42 |                         default=None,
 43 |                         nargs=argparse.REMAINDER)
 44 | 
 45 |     args = parser.parse_args()
 46 |     update_config(config, args)
 47 | 
 48 |     return args
 49 | 
 50 | def main():
 51 |     args = parse_args()
 52 | 
 53 |     logger, final_output_dir, _ = create_logger(
 54 |         config, args.cfg, 'test')
 55 | 
 56 |     logger.info(pprint.pformat(args))
 57 |     logger.info(pprint.pformat(config))
 58 | 
 59 |     # cudnn related setting
 60 |     cudnn.benchmark = config.CUDNN.BENCHMARK
 61 |     cudnn.deterministic = config.CUDNN.DETERMINISTIC
 62 |     cudnn.enabled = config.CUDNN.ENABLED
 63 | 
 64 |     # build model
 65 |     if torch.__version__.startswith('1'):
 66 |         module = eval('models.'+config.MODEL.NAME)
 67 |         module.BatchNorm2d_class = module.BatchNorm2d = torch.nn.BatchNorm2d
 68 |     model = eval('models.'+config.MODEL.NAME +
 69 |                  '.get_seg_model')(config)
 70 | 
 71 |     dump_input = torch.rand(
 72 |         (1, 3, config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0])
 73 |     )
 74 |     logger.info(get_model_summary(model.cuda(), dump_input.cuda()))
 75 | 
 76 |     if config.TEST.MODEL_FILE:
 77 |         model_state_file = config.TEST.MODEL_FILE
 78 |     else:
 79 |         model_state_file = os.path.join(final_output_dir, 'final_state.pth')        
 80 |     logger.info('=> loading model from {}'.format(model_state_file))
 81 |         
 82 |     pretrained_dict = torch.load(model_state_file)
 83 |     if 'state_dict' in pretrained_dict:
 84 |         pretrained_dict = pretrained_dict['state_dict']
 85 |     model_dict = model.state_dict()
 86 |     pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items()
 87 |                         if k[6:] in model_dict.keys()}
 88 |     for k, _ in pretrained_dict.items():
 89 |         logger.info(
 90 |             '=> loading {} from pretrained model'.format(k))
 91 |     model_dict.update(pretrained_dict)
 92 |     model.load_state_dict(model_dict)
 93 | 
 94 |     gpus = list(config.GPUS)
 95 |     model = nn.DataParallel(model, device_ids=gpus).cuda()
 96 | 
 97 |     # prepare data
 98 |     test_size = (config.TEST.IMAGE_SIZE[1], config.TEST.IMAGE_SIZE[0])
 99 |     test_dataset = eval('datasets.'+config.DATASET.DATASET)(
100 |                         root=config.DATASET.ROOT,
101 |                         list_path=config.DATASET.TEST_SET,
102 |                         num_samples=None,
103 |                         num_classes=config.DATASET.NUM_CLASSES,
104 |                         multi_scale=False,
105 |                         flip=False,
106 |                         ignore_label=config.TRAIN.IGNORE_LABEL,
107 |                         base_size=config.TEST.BASE_SIZE,
108 |                         crop_size=test_size,
109 |                         downsample_rate=1)
110 | 
111 |     testloader = torch.utils.data.DataLoader(
112 |         test_dataset,
113 |         batch_size=1,
114 |         shuffle=False,
115 |         num_workers=config.WORKERS,
116 |         pin_memory=True)
117 |     
118 |     start = timeit.default_timer()
119 |     if 'val' in config.DATASET.TEST_SET:
120 |         mean_IoU, IoU_array, pixel_acc, mean_acc = testval(config, 
121 |                                                            test_dataset, 
122 |                                                            testloader, 
123 |                                                            model)
124 |     
125 |         msg = 'MeanIU: {: 4.4f}, Pixel_Acc: {: 4.4f}, \
126 |             Mean_Acc: {: 4.4f}, Class IoU: '.format(mean_IoU, 
127 |             pixel_acc, mean_acc)
128 |         logging.info(msg)
129 |         logging.info(IoU_array)
130 |     elif 'test' in config.DATASET.TEST_SET:
131 |         test(config, 
132 |              test_dataset, 
133 |              testloader, 
134 |              model,
135 |              sv_dir=final_output_dir)
136 | 
137 |     end = timeit.default_timer()
138 |     logger.info('Mins: %d' % np.int((end-start)/60))
139 |     logger.info('Done')
140 | 
141 | 
142 | if __name__ == '__main__':
143 |     main()
144 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | import argparse
  8 | import os
  9 | import pprint
 10 | import shutil
 11 | import sys
 12 | 
 13 | import logging
 14 | import time
 15 | import timeit
 16 | from pathlib import Path
 17 | 
 18 | import numpy as np
 19 | 
 20 | import torch
 21 | import torch.nn as nn
 22 | import torch.backends.cudnn as cudnn
 23 | import torch.optim
 24 | from tensorboardX import SummaryWriter
 25 | 
 26 | import _init_paths
 27 | import models
 28 | import datasets
 29 | from config import config
 30 | from config import update_config
 31 | from core.criterion import CrossEntropy, OhemCrossEntropy
 32 | from core.function import train, validate
 33 | from utils.modelsummary import get_model_summary
 34 | from utils.utils import create_logger, FullModel
 35 | 
 36 | def parse_args():
 37 |     parser = argparse.ArgumentParser(description='Train segmentation network')
 38 |     
 39 |     parser.add_argument('--cfg',
 40 |                         help='experiment configure file name',
 41 |                         required=True,
 42 |                         type=str)
 43 |     parser.add_argument('--seed', type=int, default=304)
 44 |     parser.add_argument("--local_rank", type=int, default=-1)       
 45 |     parser.add_argument('opts',
 46 |                         help="Modify config options using the command-line",
 47 |                         default=None,
 48 |                         nargs=argparse.REMAINDER)
 49 | 
 50 |     args = parser.parse_args()
 51 |     update_config(config, args)
 52 | 
 53 |     return args
 54 | 
 55 | def get_sampler(dataset):
 56 |     from utils.distributed import is_distributed
 57 |     if is_distributed():
 58 |         from torch.utils.data.distributed import DistributedSampler
 59 |         return DistributedSampler(dataset)
 60 |     else:
 61 |         return None
 62 | 
 63 | def main():
 64 |     args = parse_args()
 65 | 
 66 |     if args.seed > 0:
 67 |         import random
 68 |         print('Seeding with', args.seed)
 69 |         random.seed(args.seed)
 70 |         torch.manual_seed(args.seed)        
 71 | 
 72 |     logger, final_output_dir, tb_log_dir = create_logger(
 73 |         config, args.cfg, 'train')
 74 | 
 75 |     logger.info(pprint.pformat(args))
 76 |     logger.info(config)
 77 | 
 78 |     writer_dict = {
 79 |         'writer': SummaryWriter(tb_log_dir),
 80 |         'train_global_steps': 0,
 81 |         'valid_global_steps': 0,
 82 |     }
 83 | 
 84 |     # cudnn related setting
 85 |     cudnn.benchmark = config.CUDNN.BENCHMARK
 86 |     cudnn.deterministic = config.CUDNN.DETERMINISTIC
 87 |     cudnn.enabled = config.CUDNN.ENABLED
 88 |     gpus = list(config.GPUS)
 89 |     distributed = args.local_rank >= 0
 90 |     if distributed:
 91 |         device = torch.device('cuda:{}'.format(args.local_rank))    
 92 |         torch.cuda.set_device(device)
 93 |         torch.distributed.init_process_group(
 94 |             backend="nccl", init_method="env://",
 95 |         )        
 96 | 
 97 |     # build model
 98 |     model = eval('models.'+config.MODEL.NAME +
 99 |                  '.get_seg_model')(config)
100 | 
101 |     # dump_input = torch.rand(
102 |     #     (1, 3, config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0])
103 |     # )
104 |     # logger.info(get_model_summary(model.cuda(), dump_input.cuda()))
105 | 
106 |     # copy model file
107 |     if distributed and args.local_rank == 0:
108 |         this_dir = os.path.dirname(__file__)
109 |         models_dst_dir = os.path.join(final_output_dir, 'models')
110 |         # if os.path.exists(models_dst_dir):
111 |         #     shutil.rmtree(models_dst_dir)
112 |         # shutil.copytree(os.path.join(this_dir, '../lib/models'), models_dst_dir)
113 | 
114 |     if distributed:
115 |         batch_size = config.TRAIN.BATCH_SIZE_PER_GPU
116 |     else:
117 |         batch_size = config.TRAIN.BATCH_SIZE_PER_GPU * len(gpus)
118 | 
119 |     # prepare data
120 |     crop_size = (config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0])
121 |     train_dataset = eval('datasets.'+config.DATASET.DATASET)(
122 |                         root=config.DATASET.ROOT,
123 |                         list_path=config.DATASET.TRAIN_SET,
124 |                         num_samples=None,
125 |                         num_classes=config.DATASET.NUM_CLASSES,
126 |                         multi_scale=config.TRAIN.MULTI_SCALE,
127 |                         flip=config.TRAIN.FLIP,
128 |                         ignore_label=config.TRAIN.IGNORE_LABEL,
129 |                         base_size=config.TRAIN.BASE_SIZE,
130 |                         crop_size=crop_size,
131 |                         downsample_rate=config.TRAIN.DOWNSAMPLERATE,
132 |                         scale_factor=config.TRAIN.SCALE_FACTOR)
133 | 
134 |     train_sampler = get_sampler(train_dataset)
135 |     trainloader = torch.utils.data.DataLoader(
136 |         train_dataset,
137 |         batch_size=batch_size,
138 |         shuffle=config.TRAIN.SHUFFLE and train_sampler is None,
139 |         num_workers=config.WORKERS,
140 |         pin_memory=True,
141 |         drop_last=True,
142 |         sampler=train_sampler)
143 | 
144 |     extra_epoch_iters = 0
145 |     if config.DATASET.EXTRA_TRAIN_SET:
146 |         extra_train_dataset = eval('datasets.'+config.DATASET.DATASET)(
147 |                     root=config.DATASET.ROOT,
148 |                     list_path=config.DATASET.EXTRA_TRAIN_SET,
149 |                     num_samples=None,
150 |                     num_classes=config.DATASET.NUM_CLASSES,
151 |                     multi_scale=config.TRAIN.MULTI_SCALE,
152 |                     flip=config.TRAIN.FLIP,
153 |                     ignore_label=config.TRAIN.IGNORE_LABEL,
154 |                     base_size=config.TRAIN.BASE_SIZE,
155 |                     crop_size=crop_size,
156 |                     downsample_rate=config.TRAIN.DOWNSAMPLERATE,
157 |                     scale_factor=config.TRAIN.SCALE_FACTOR)
158 |         extra_train_sampler = get_sampler(extra_train_dataset)
159 |         extra_trainloader = torch.utils.data.DataLoader(
160 |             extra_train_dataset,
161 |             batch_size=batch_size,
162 |             shuffle=config.TRAIN.SHUFFLE and extra_train_sampler is None,
163 |             num_workers=config.WORKERS,
164 |             pin_memory=True,
165 |             drop_last=True,
166 |             sampler=extra_train_sampler)
167 |         extra_epoch_iters = np.int(extra_train_dataset.__len__() / 
168 |                         config.TRAIN.BATCH_SIZE_PER_GPU / len(gpus))
169 | 
170 | 
171 |     test_size = (config.TEST.IMAGE_SIZE[1], config.TEST.IMAGE_SIZE[0])
172 |     test_dataset = eval('datasets.'+config.DATASET.DATASET)(
173 |                         root=config.DATASET.ROOT,
174 |                         list_path=config.DATASET.TEST_SET,
175 |                         num_samples=config.TEST.NUM_SAMPLES,
176 |                         num_classes=config.DATASET.NUM_CLASSES,
177 |                         multi_scale=False,
178 |                         flip=False,
179 |                         ignore_label=config.TRAIN.IGNORE_LABEL,
180 |                         base_size=config.TEST.BASE_SIZE,
181 |                         crop_size=test_size,
182 |                         downsample_rate=1)
183 | 
184 |     test_sampler = get_sampler(test_dataset)
185 |     testloader = torch.utils.data.DataLoader(
186 |         test_dataset,
187 |         batch_size=batch_size,
188 |         shuffle=False,
189 |         num_workers=config.WORKERS,
190 |         pin_memory=True,
191 |         sampler=test_sampler)
192 | 
193 |     # criterion
194 |     if config.LOSS.USE_OHEM:
195 |         criterion = OhemCrossEntropy(ignore_label=config.TRAIN.IGNORE_LABEL,
196 |                                         thres=config.LOSS.OHEMTHRES,
197 |                                         min_kept=config.LOSS.OHEMKEEP,
198 |                                         weight=train_dataset.class_weights)
199 |     else:
200 |         criterion = CrossEntropy(ignore_label=config.TRAIN.IGNORE_LABEL,
201 |                                     weight=train_dataset.class_weights)
202 | 
203 |     model = FullModel(model, criterion)
204 |     if distributed:
205 |         model = model.to(device)
206 |         model = torch.nn.parallel.DistributedDataParallel(
207 |             model,
208 |             find_unused_parameters=True,
209 |             device_ids=[args.local_rank],
210 |             output_device=args.local_rank
211 |         )
212 |     else:
213 |         model = nn.DataParallel(model, device_ids=gpus).cuda()
214 |     
215 | 
216 |     # optimizer
217 |     if config.TRAIN.OPTIMIZER == 'sgd':
218 | 
219 |         params_dict = dict(model.named_parameters())
220 |         if config.TRAIN.NONBACKBONE_KEYWORDS:
221 |             bb_lr = []
222 |             nbb_lr = []
223 |             nbb_keys = set()
224 |             for k, param in params_dict.items():
225 |                 if any(part in k for part in config.TRAIN.NONBACKBONE_KEYWORDS):
226 |                     nbb_lr.append(param)
227 |                     nbb_keys.add(k)
228 |                 else:
229 |                     bb_lr.append(param)
230 |             print(nbb_keys)
231 |             params = [{'params': bb_lr, 'lr': config.TRAIN.LR}, {'params': nbb_lr, 'lr': config.TRAIN.LR * config.TRAIN.NONBACKBONE_MULT}]
232 |         else:
233 |             params = [{'params': list(params_dict.values()), 'lr': config.TRAIN.LR}]
234 | 
235 |         optimizer = torch.optim.SGD(params,
236 |                                 lr=config.TRAIN.LR,
237 |                                 momentum=config.TRAIN.MOMENTUM,
238 |                                 weight_decay=config.TRAIN.WD,
239 |                                 nesterov=config.TRAIN.NESTEROV,
240 |                                 )
241 |     else:
242 |         raise ValueError('Only Support SGD optimizer')
243 | 
244 |     epoch_iters = np.int(train_dataset.__len__() / 
245 |                         config.TRAIN.BATCH_SIZE_PER_GPU / len(gpus))
246 |         
247 |     best_mIoU = 0
248 |     last_epoch = 0
249 |     if config.TRAIN.RESUME:
250 |         model_state_file = os.path.join(final_output_dir,
251 |                                         'checkpoint.pth.tar')
252 |         if os.path.isfile(model_state_file):
253 |             checkpoint = torch.load(model_state_file, map_location={'cuda:0': 'cpu'})
254 |             best_mIoU = checkpoint['best_mIoU']
255 |             last_epoch = checkpoint['epoch']
256 |             dct = checkpoint['state_dict']
257 |             
258 |             model.module.model.load_state_dict({k.replace('model.', ''): v for k, v in checkpoint['state_dict'].items() if k.startswith('model.')})
259 |             optimizer.load_state_dict(checkpoint['optimizer'])
260 |             logger.info("=> loaded checkpoint (epoch {})"
261 |                         .format(checkpoint['epoch']))
262 |         if distributed:
263 |             torch.distributed.barrier()
264 | 
265 |     start = timeit.default_timer()
266 |     end_epoch = config.TRAIN.END_EPOCH + config.TRAIN.EXTRA_EPOCH
267 |     num_iters = config.TRAIN.END_EPOCH * epoch_iters
268 |     extra_iters = config.TRAIN.EXTRA_EPOCH * extra_epoch_iters
269 |     
270 |     for epoch in range(last_epoch, end_epoch):
271 | 
272 |         current_trainloader = extra_trainloader if epoch >= config.TRAIN.END_EPOCH else trainloader
273 |         if current_trainloader.sampler is not None and hasattr(current_trainloader.sampler, 'set_epoch'):
274 |             current_trainloader.sampler.set_epoch(epoch)
275 | 
276 |         # valid_loss, mean_IoU, IoU_array = validate(config, 
277 |         #             testloader, model, writer_dict)
278 | 
279 |         if epoch >= config.TRAIN.END_EPOCH:
280 |             train(config, epoch-config.TRAIN.END_EPOCH, 
281 |                   config.TRAIN.EXTRA_EPOCH, extra_epoch_iters, 
282 |                   config.TRAIN.EXTRA_LR, extra_iters, 
283 |                   extra_trainloader, optimizer, model, writer_dict)
284 |         else:
285 |             train(config, epoch, config.TRAIN.END_EPOCH, 
286 |                   epoch_iters, config.TRAIN.LR, num_iters,
287 |                   trainloader, optimizer, model, writer_dict)
288 | 
289 |         valid_loss, mean_IoU, IoU_array = validate(config, 
290 |                     testloader, model, writer_dict)
291 | 
292 |         if args.local_rank <= 0:
293 |             logger.info('=> saving checkpoint to {}'.format(
294 |                 final_output_dir + 'checkpoint.pth.tar'))
295 |             torch.save({
296 |                 'epoch': epoch+1,
297 |                 'best_mIoU': best_mIoU,
298 |                 'state_dict': model.module.state_dict(),
299 |                 'optimizer': optimizer.state_dict(),
300 |             }, os.path.join(final_output_dir,'checkpoint.pth.tar'))
301 |             if mean_IoU > best_mIoU:
302 |                 best_mIoU = mean_IoU
303 |                 torch.save(model.module.state_dict(),
304 |                         os.path.join(final_output_dir, 'best.pth'))
305 |             msg = 'Loss: {:.3f}, MeanIU: {: 4.4f}, Best_mIoU: {: 4.4f}'.format(
306 |                         valid_loss, mean_IoU, best_mIoU)
307 |             logging.info(msg)
308 |             logging.info(IoU_array)
309 | 
310 |     if args.local_rank <= 0:
311 | 
312 |         torch.save(model.module.state_dict(),
313 |                 os.path.join(final_output_dir, 'final_state.pth'))
314 | 
315 |         writer_dict['writer'].close()
316 |         end = timeit.default_timer()
317 |         logger.info('Hours: %d' % np.int((end-start)/3600))
318 |         logger.info('Done')
319 | 
320 | 
321 | if __name__ == '__main__':
322 |     main()
323 | 


--------------------------------------------------------------------------------