├── .gitignore
├── LICENSE
├── README.md
├── data
└── list
│ ├── ade20k
│ ├── testval.lst
│ ├── train.lst
│ ├── trainval.lst
│ └── val.lst
│ ├── cityscapes
│ ├── test.lst
│ ├── train.lst
│ ├── trainval.lst
│ └── val.lst
│ └── cocostuff
│ ├── testval.lst
│ ├── train.lst
│ ├── trainval.lst
│ └── val.lst
├── experiments
├── ade20k
│ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml
│ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml
│ ├── seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml
│ ├── seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml
│ └── seg_hrnet_w48_520x520_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml
├── cityscapes
│ ├── seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
│ ├── seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml
│ ├── seg_hrnet_ocr_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
│ ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
│ ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484_paddle.yaml
│ ├── seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml
│ ├── seg_hrnet_w48_train_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
│ ├── seg_hrnet_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml
│ └── seg_hrnet_w48_trainval_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml
├── cocostuff
│ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml
│ ├── seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml
│ ├── seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml
│ ├── seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml
│ └── seg_hrnet_w48_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml
├── lip
│ ├── seg_hrnet_ocr_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml
│ ├── seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml
│ └── seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150_paddle.yaml
└── pascal_ctx
│ ├── seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml
│ ├── seg_hrnet_ocr_w48_cls60_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml
│ ├── seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml
│ └── seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle.yaml
├── figures
├── OCR.PNG
├── SegmentationTransformerOCR.png
├── SegmentationTransformerOCR1.png
├── SegmentationTransformerOCR2.png
└── seg-hrnet.png
├── hubconf.py
├── lib
├── config
│ ├── __init__.py
│ ├── default.py
│ ├── hrnet_config.py
│ └── models.py
├── core
│ ├── criterion.py
│ └── function.py
├── datasets
│ ├── __init__.py
│ ├── ade20k.py
│ ├── base_dataset.py
│ ├── cityscapes.py
│ ├── cocostuff.py
│ ├── lip.py
│ └── pascal_ctx.py
├── models
│ ├── __init__.py
│ ├── bn_helper.py
│ ├── hrnet.py
│ ├── seg_hrnet.py
│ ├── seg_hrnet_ocr.py
│ └── sync_bn
│ │ ├── LICENSE
│ │ ├── __init__.py
│ │ └── inplace_abn
│ │ ├── __init__.py
│ │ ├── bn.py
│ │ ├── functions.py
│ │ └── src
│ │ ├── common.h
│ │ ├── inplace_abn.cpp
│ │ ├── inplace_abn.h
│ │ ├── inplace_abn_cpu.cpp
│ │ └── inplace_abn_cuda.cu
└── utils
│ ├── __init__.py
│ ├── distributed.py
│ ├── modelsummary.py
│ └── utils.py
├── local_log.txt
├── requirements.txt
├── run_dist.sh
├── run_local.sh
└── tools
├── _init_paths.py
├── test.py
└── train.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | __pycache__/
3 | *.py[co]
4 | data/
5 | log/
6 | output/
7 | pretrained_models
8 | scripts/
9 | detail-api/
10 | data/list
11 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) [2019] [Microsoft]
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | =======================================================================================
24 | 3-clause BSD licenses
25 | =======================================================================================
26 | 1. syncbn - For details, see lib/models/syncbn/LICENSE
27 | Copyright (c) 2017 mapillary
28 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # High-resolution networks and Segmentation Transformer for Semantic Segmentation
2 | ## Branches
3 | - This is the implementation for HRNet + OCR.
4 | - The PyTroch 1.1 version ia available [here](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/pytorch-v1.1).
5 | - The PyTroch 0.4.1 version is available [here](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/master).
6 |
7 | ## News
8 | - [2021/05/04] We rephrase the OCR approach as **Segmentation Transformer** [pdf](https://arxiv.org/pdf/1909.11065.pdf). We will provide the updated implementation soon.
9 | - [2021/02/16] Based on the [PaddleClas](https://github.com/PaddlePaddle/PaddleClas) ImageNet pretrained weights, we achieve **83.22%** on Cityscapes val, **59.62%** on PASCAL-Context val (**new SOTA**), **45.20%** on COCO-Stuff val (**new SOTA**), **58.21%** on LIP val and **47.98%** on ADE20K val. Please checkout [openseg.pytorch](https://github.com/openseg-group/openseg.pytorch/tree/pytorch-1.7) for more details.
10 | - [2020/08/16] [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) has supported our HRNet + OCR.
11 | - [2020/07/20] The researchers from AInnovation have achieved **Rank#1** on [ADE20K Leaderboard](http://sceneparsing.csail.mit.edu/) via training our HRNet + OCR with a semi-supervised learning scheme. More details are in their [Technical Report](https://arxiv.org/pdf/2007.10591.pdf).
12 | - [2020/07/09] Our paper is accepted by ECCV 2020: [Object-Contextual Representations for Semantic Segmentation](https://arxiv.org/pdf/1909.11065.pdf). Notably, the reseachers from Nvidia set a new state-of-the-art performance on Cityscapes leaderboard: [85.4%](https://www.cityscapes-dataset.com/method-details/?submissionID=7836) via combining our HRNet + OCR with a new [hierarchical mult-scale attention scheme](https://arxiv.org/abs/2005.10821).
13 | - [2020/03/13] Our paper is accepted by TPAMI: [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/pdf/1908.07919.pdf).
14 | - HRNet + OCR + SegFix: Rank \#1 (84.5) in [Cityscapes leaderboard](https://www.cityscapes-dataset.com/benchmarks/). OCR: object contextual represenations [pdf](https://arxiv.org/pdf/1909.11065.pdf). ***HRNet + OCR is reproduced [here](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/HRNet-OCR)***.
15 | - Thanks Google and UIUC researchers. A modified HRNet combined with semantic and instance multi-scale context achieves SOTA panoptic segmentation result on the Mapillary Vista challenge. See [the paper](https://arxiv.org/pdf/1910.04751.pdf).
16 | - Small HRNet models for Cityscapes segmentation. Superior to MobileNetV2Plus ....
17 | - Rank \#1 (83.7) in [Cityscapes leaderboard](https://www.cityscapes-dataset.com/benchmarks/). HRNet combined with an extension of [object context](https://arxiv.org/pdf/1809.00916.pdf)
18 |
19 | - Pytorch-v1.1 and the official Sync-BN supported. We have reproduced the cityscapes results on the new codebase. Please check the [pytorch-v1.1 branch](https://github.com/HRNet/HRNet-Semantic-Segmentation/tree/pytorch-v1.1).
20 |
21 | ## Introduction
22 | This is the official code of [high-resolution representations for Semantic Segmentation](https://arxiv.org/abs/1904.04514).
23 | We augment the HRNet with a very simple segmentation head shown in the figure below. We aggregate the output representations at four different resolutions, and then use a 1x1 convolutions to fuse these representations. The output representations is fed into the classifier. We evaluate our methods on three datasets, Cityscapes, PASCAL-Context and LIP.
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 | Besides, we further combine HRNet with [Object Contextual Representation](https://arxiv.org/pdf/1909.11065.pdf) and achieve higher performance on the three datasets. The code of HRNet+OCR is contained in this branch. We illustrate the overall framework of OCR in the Figure and the equivalent Transformer pipelines:
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 | ## Segmentation models
45 | The models are initialized by the weights pretrained on the ImageNet. ''Paddle'' means the results are based on PaddleCls pretrained HRNet models.
46 | You can download the pretrained models from https://github.com/HRNet/HRNet-Image-Classification. *Slightly different, we use align_corners = True for upsampling in HRNet*.
47 |
48 | 1. Performance on the Cityscapes dataset. The models are trained and tested with the input size of 512x1024 and 1024x2048 respectively.
49 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75.
50 |
51 | | model | Train Set | Test Set | OHEM | Multi-scale| Flip | mIoU | Link |
52 | | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: |
53 | | HRNetV2-W48 | Train | Val | No | No | No | 80.9 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_cs_8090_torch11.pth)/[BaiduYun(Access Code:pmix)](https://pan.baidu.com/s/1KyiOUOR0SYxKtJfIlD5o-w)|
54 | | HRNetV2-W48 + OCR | Train | Val | No | No | No | 81.6 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cs_8162_torch11.pth)/[BaiduYun(Access Code:fa6i)](https://pan.baidu.com/s/1BGNt4Xmx3yfXUS8yjde0hQ)|
55 | | HRNetV2-W48 + OCR | Train + Val | Test | No | Yes | Yes | 82.3 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cs_trainval_8227_torch11.pth)/[BaiduYun(Access Code:ycrk)](https://pan.baidu.com/s/16mD81UnGzjUBD-haDQfzIQ)|
56 | | HRNetV2-W48 (Paddle) | Train | Val | No | No | No | 81.6 | ---|
57 | | HRNetV2-W48 + OCR (Paddle) | Train | Val | No | No | No | --- | ---|
58 | | HRNetV2-W48 + OCR (Paddle) | Train + Val | Test | No | Yes | Yes | --- | ---|
59 |
60 | 2. Performance on the LIP dataset. The models are trained and tested with the input size of 473x473.
61 |
62 | | model | OHEM | Multi-scale| Flip | mIoU | Link |
63 | | :--: | :--: | :--: | :--: | :--: | :--: |
64 | | HRNetV2-W48 | No | No | Yes | 55.83 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_lip_5583_torch04.pth)/[BaiduYun(Access Code:fahi)](https://pan.baidu.com/s/15DamFiGEoxwDDF1TwuZdnA)|
65 | | HRNetV2-W48 + OCR | No | No | Yes | 56.48 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_lip_5648_torch04.pth)/[BaiduYun(Access Code:xex2)](https://pan.baidu.com/s/1dFYSR2bahRnvpIOdh88kOQ)|
66 | | HRNetV2-W48 (Paddle) | No | No | Yes | --- | --- |
67 | | HRNetV2-W48 + OCR (Paddle) | No | No | Yes | --- | ---|
68 |
69 |
70 | **Note** Currently we could only reproduce HRNet+OCR results on LIP dataset with PyTorch 0.4.1.
71 |
72 | 3. Performance on the PASCAL-Context dataset. The models are trained and tested with the input size of 520x520.
73 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75,2.0 (the same as EncNet, DANet etc.).
74 |
75 | | model |num classes | OHEM | Multi-scale| Flip | mIoU | Link |
76 | | :--: | :--: | :--: | :--: | :--: | :--: | :--: |
77 | | HRNetV2-W48 | 59 classes | No | Yes | Yes | 54.1 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_pascal_ctx_5410_torch11.pth)/[BaiduYun(Access Code:wz6v)](https://pan.baidu.com/s/1m0MqpHSk0SX380EYEMawSA)|
78 | | HRNetV2-W48 + OCR | 59 classes | No | Yes | Yes | 56.2 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_pascal_ctx_5618_torch11.pth)/[BaiduYun(Access Code:yyxh)](https://pan.baidu.com/s/1XYP54gr3XB76tHmCcKdU9g)|
79 | | HRNetV2-W48 | 60 classes | No | Yes | Yes | 48.3 | [OneDrive](https://1drv.ms/u/s!Aus8VCZ_C_33gQEHDQrZCiv4R5mf)/[BaiduYun(Access Code:9uf8)](https://pan.baidu.com/s/1pgYt8P8ht2HOOzcA0F7Kag)|
80 | | HRNetV2-W48 + OCR | 60 classes | No | Yes | Yes | 50.1 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_pascal_ctx_5410_torch11.pth)/[BaiduYun(Access Code:gtkb)](https://pan.baidu.com/s/13AYjwzh1LJSlipJwNpJ3Uw)|
81 | | HRNetV2-W48 (Paddle) | 59 classes | No | Yes | Yes | --- | ---|
82 | | HRNetV2-W48 (Paddle) | 60 classes | No | Yes | Yes | --- | ---|
83 | | HRNetV2-W48 + OCR (Paddle) | 59 classes | No | Yes | Yes | --- | ---|
84 | | HRNetV2-W48 + OCR (Paddle) | 60 classes | No | Yes | Yes | --- | ---|
85 |
86 | 4. Performance on the COCO-Stuff dataset. The models are trained and tested with the input size of 520x520.
87 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75,2.0 (the same as EncNet, DANet etc.).
88 |
89 | | model | OHEM | Multi-scale| Flip | mIoU | Link |
90 | | :--: | :--: | :--: | :--: | :--: | :--: |
91 | | HRNetV2-W48 | Yes | No | No | 36.2 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_cocostuff_3617_torch04.pth)/[BaiduYun(Access Code:92gw)](https://pan.baidu.com/s/1VAV6KThH1Irzv9HZgLWE2Q)|
92 | | HRNetV2-W48 + OCR | Yes | No | No | 39.7 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cocostuff_3965_torch04.pth)/[BaiduYun(Access Code:sjc4)](https://pan.baidu.com/s/1HFSYyVwKBG3E6y76gcPjDA)|
93 | | HRNetV2-W48 | Yes | Yes | Yes | 37.9 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_cocostuff_3617_torch04.pth)/[BaiduYun(Access Code:92gw)](https://pan.baidu.com/s/1VAV6KThH1Irzv9HZgLWE2Q) |
94 | | HRNetV2-W48 + OCR | Yes | Yes | Yes | 40.6 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_cocostuff_3965_torch04.pth)/[BaiduYun(Access Code:sjc4)](https://pan.baidu.com/s/1HFSYyVwKBG3E6y76gcPjDA) |
95 | | HRNetV2-W48 (Paddle) | Yes | No | No | --- | ---|
96 | | HRNetV2-W48 + OCR (Paddle) | Yes | No | No | --- | ---|
97 | | HRNetV2-W48 (Paddle) | Yes | Yes | Yes | --- | ---|
98 | | HRNetV2-W48 + OCR (Paddle) | Yes | Yes | Yes | --- | ---|
99 |
100 |
101 |
102 | 5. Performance on the ADE20K dataset. The models are trained and tested with the input size of 520x520.
103 | If multi-scale testing is used, we adopt scales: 0.5,0.75,1.0,1.25,1.5,1.75,2.0 (the same as EncNet, DANet etc.).
104 |
105 | | model | OHEM | Multi-scale| Flip | mIoU | Link |
106 | | :--: | :--: | :--: | :--: | :--: | :--: |
107 | | HRNetV2-W48 | Yes | No | No | 43.1 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ade20k_4312_torch04.pth)/[BaiduYun(Access Code:f6xf)](https://pan.baidu.com/s/11neVkzxx27qS2-mPFW9dfg)|
108 | | HRNetV2-W48 + OCR | Yes | No | No | 44.5 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_ade20k_4451_torch04.pth)/[BaiduYun(Access Code:peg4)](https://pan.baidu.com/s/1HLhjiLIdgaOHs0SzEtkgkQ)|
109 | | HRNetV2-W48 | Yes | Yes | Yes | 44.2 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ade20k_4312_torch04.pth)/[BaiduYun(Access Code:f6xf)](https://pan.baidu.com/s/11neVkzxx27qS2-mPFW9dfg) |
110 | | HRNetV2-W48 + OCR | Yes | Yes | Yes | 45.5 | [Github](https://github.com/hsfzxjy/models.storage/releases/download/HRNet-OCR/hrnet_ocr_ade20k_4451_torch04.pth)/[BaiduYun(Access Code:peg4)](https://pan.baidu.com/s/1HLhjiLIdgaOHs0SzEtkgkQ) |
111 | | HRNetV2-W48 (Paddle) | Yes | No | No | --- | ---|
112 | | HRNetV2-W48 + OCR (Paddle) | Yes | No | No | --- | ---|
113 | | HRNetV2-W48 (Paddle) | Yes | Yes | Yes | --- | ---|
114 | | HRNetV2-W48 + OCR (Paddle) | Yes | Yes | Yes | --- | ---|
115 |
116 |
117 |
118 | ## Quick start
119 | ### Install
120 | 1. For LIP dataset, install PyTorch=0.4.1 following the [official instructions](https://pytorch.org/). For Cityscapes and PASCAL-Context, we use PyTorch=1.1.0.
121 | 2. `git clone https://github.com/HRNet/HRNet-Semantic-Segmentation $SEG_ROOT`
122 | 3. Install dependencies: pip install -r requirements.txt
123 |
124 | If you want to train and evaluate our models on PASCAL-Context, you need to install [details](https://github.com/zhanghang1989/detail-api).
125 | ````bash
126 | pip install git+https://github.com/zhanghang1989/detail-api.git#subdirectory=PythonAPI
127 | ````
128 |
129 | ### Data preparation
130 | You need to download the [Cityscapes](https://www.cityscapes-dataset.com/), [LIP](http://sysu-hcp.net/lip/) and [PASCAL-Context](https://cs.stanford.edu/~roozbeh/pascal-context/) datasets.
131 |
132 | Your directory tree should be look like this:
133 | ````bash
134 | $SEG_ROOT/data
135 | ├── cityscapes
136 | │ ├── gtFine
137 | │ │ ├── test
138 | │ │ ├── train
139 | │ │ └── val
140 | │ └── leftImg8bit
141 | │ ├── test
142 | │ ├── train
143 | │ └── val
144 | ├── lip
145 | │ ├── TrainVal_images
146 | │ │ ├── train_images
147 | │ │ └── val_images
148 | │ └── TrainVal_parsing_annotations
149 | │ ├── train_segmentations
150 | │ ├── train_segmentations_reversed
151 | │ └── val_segmentations
152 | ├── pascal_ctx
153 | │ ├── common
154 | │ ├── PythonAPI
155 | │ ├── res
156 | │ └── VOCdevkit
157 | │ └── VOC2010
158 | ├── cocostuff
159 | │ ├── train
160 | │ │ ├── image
161 | │ │ └── label
162 | │ └── val
163 | │ ├── image
164 | │ └── label
165 | ├── ade20k
166 | │ ├── train
167 | │ │ ├── image
168 | │ │ └── label
169 | │ └── val
170 | │ ├── image
171 | │ └── label
172 | ├── list
173 | │ ├── cityscapes
174 | │ │ ├── test.lst
175 | │ │ ├── trainval.lst
176 | │ │ └── val.lst
177 | │ ├── lip
178 | │ │ ├── testvalList.txt
179 | │ │ ├── trainList.txt
180 | │ │ └── valList.txt
181 | ````
182 |
183 | ### Train and Test
184 |
185 | #### PyTorch Version Differences
186 |
187 | Note that the codebase supports both PyTorch 0.4.1 and 1.1.0, and they use different command for training. In the following context, we use `$PY_CMD` to denote different startup command.
188 |
189 | ```bash
190 | # For PyTorch 0.4.1
191 | PY_CMD="python"
192 | # For PyTorch 1.1.0
193 | PY_CMD="python -m torch.distributed.launch --nproc_per_node=4"
194 | ```
195 |
196 | e.g., when training on Cityscapes, we use PyTorch 1.1.0. So the command
197 | ````bash
198 | $PY_CMD tools/train.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
199 | ````
200 | indicates
201 | ````bash
202 | python -m torch.distributed.launch --nproc_per_node=4 tools/train.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
203 | ````
204 | #### Training
205 |
206 | Just specify the configuration file for `tools/train.py`.
207 |
208 | For example, train the HRNet-W48 on Cityscapes with a batch size of 12 on 4 GPUs:
209 | ````bash
210 | $PY_CMD tools/train.py --cfg experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
211 | ````
212 | For example, train the HRNet-W48 + OCR on Cityscapes with a batch size of 12 on 4 GPUs:
213 | ````bash
214 | $PY_CMD tools/train.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml
215 | ````
216 |
217 | Note that we only reproduce HRNet+OCR on LIP dataset using PyTorch 0.4.1. So we recommend to use PyTorch 0.4.1 if you want to train on LIP dataset.
218 |
219 | #### Testing
220 |
221 | For example, evaluating HRNet+OCR on the Cityscapes validation set with multi-scale and flip testing:
222 | ````bash
223 | python tools/test.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml \
224 | TEST.MODEL_FILE hrnet_ocr_cs_8162_torch11.pth \
225 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75 \
226 | TEST.FLIP_TEST True
227 | ````
228 | Evaluating HRNet+OCR on the Cityscapes test set with multi-scale and flip testing:
229 | ````bash
230 | python tools/test.py --cfg experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml \
231 | DATASET.TEST_SET list/cityscapes/test.lst \
232 | TEST.MODEL_FILE hrnet_ocr_trainval_cs_8227_torch11.pth \
233 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75 \
234 | TEST.FLIP_TEST True
235 | ````
236 | Evaluating HRNet+OCR on the PASCAL-Context validation set with multi-scale and flip testing:
237 | ````bash
238 | python tools/test.py --cfg experiments/pascal_ctx/seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml \
239 | DATASET.TEST_SET testval \
240 | TEST.MODEL_FILE hrnet_ocr_pascal_ctx_5618_torch11.pth \
241 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75,2.0 \
242 | TEST.FLIP_TEST True
243 | ````
244 | Evaluating HRNet+OCR on the LIP validation set with flip testing:
245 | ````bash
246 | python tools/test.py --cfg experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml \
247 | DATASET.TEST_SET list/lip/testvalList.txt \
248 | TEST.MODEL_FILE hrnet_ocr_lip_5648_torch04.pth \
249 | TEST.FLIP_TEST True \
250 | TEST.NUM_SAMPLES 0
251 | ````
252 | Evaluating HRNet+OCR on the COCO-Stuff validation set with multi-scale and flip testing:
253 | ````bash
254 | python tools/test.py --cfg experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml \
255 | DATASET.TEST_SET list/cocostuff/testval.lst \
256 | TEST.MODEL_FILE hrnet_ocr_cocostuff_3965_torch04.pth \
257 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75,2.0 \
258 | TEST.MULTI_SCALE True TEST.FLIP_TEST True
259 | ````
260 | Evaluating HRNet+OCR on the ADE20K validation set with multi-scale and flip testing:
261 | ````bash
262 | python tools/test.py --cfg experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml \
263 | DATASET.TEST_SET list/ade20k/testval.lst \
264 | TEST.MODEL_FILE hrnet_ocr_ade20k_4451_torch04.pth \
265 | TEST.SCALE_LIST 0.5,0.75,1.0,1.25,1.5,1.75,2.0 \
266 | TEST.MULTI_SCALE True TEST.FLIP_TEST True
267 | ````
268 |
269 | ## Other applications of HRNet
270 | * [Human pose estimation](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch)
271 | * [Image Classification](https://github.com/HRNet/HRNet-Image-Classification)
272 | * [Object detection](https://github.com/HRNet/HRNet-Object-Detection)
273 | * [Facial landmark detection](https://github.com/HRNet/HRNet-Facial-Landmark-Detection)
274 |
275 | ## Citation
276 | If you find this work or code is helpful in your research, please cite:
277 | ````
278 | @inproceedings{SunXLW19,
279 | title={Deep High-Resolution Representation Learning for Human Pose Estimation},
280 | author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
281 | booktitle={CVPR},
282 | year={2019}
283 | }
284 |
285 | @article{WangSCJDZLMTWLX19,
286 | title={Deep High-Resolution Representation Learning for Visual Recognition},
287 | author={Jingdong Wang and Ke Sun and Tianheng Cheng and
288 | Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
289 | Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
290 | journal={TPAMI},
291 | year={2019}
292 | }
293 |
294 | @article{YuanCW19,
295 | title={Object-Contextual Representations for Semantic Segmentation},
296 | author={Yuhui Yuan and Xilin Chen and Jingdong Wang},
297 | booktitle={ECCV},
298 | year={2020}
299 | }
300 | ````
301 |
302 | ## Reference
303 | [1] Deep High-Resolution Representation Learning for Visual Recognition. Jingdong Wang, Ke Sun, Tianheng Cheng,
304 | Borui Jiang, Chaorui Deng, Yang Zhao, Dong Liu, Yadong Mu, Mingkui Tan, Xinggang Wang, Wenyu Liu, Bin Xiao. Accepted by TPAMI. [download](https://arxiv.org/pdf/1908.07919.pdf)
305 |
306 | [2] Object-Contextual Representations for Semantic Segmentation. Yuhui Yuan, Xilin Chen, Jingdong Wang. [download](https://arxiv.org/pdf/1909.11065.pdf)
307 |
308 | ## Acknowledgement
309 | We adopt sync-bn implemented by [InplaceABN](https://github.com/mapillary/inplace_abn) for PyTorch 0.4.1 experiments and the official
310 | sync-bn provided by PyTorch for PyTorch 1.10 experiments.
311 |
312 | We adopt data precosessing on the PASCAL-Context dataset, implemented by [PASCAL API](https://github.com/zhanghang1989/detail-api).
313 |
--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: ade20k
13 | ROOT: 'data/'
14 | TEST_SET: 'list/ade20k/val.lst'
15 | TRAIN_SET: 'list/ade20k/train.lst'
16 | NUM_CLASSES: 150
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: true
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 520
79 | - 520
80 | BASE_SIZE: 520
81 | BATCH_SIZE_PER_GPU: 4
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 120
85 | RESUME: true
86 | OPTIMIZER: sgd
87 | LR: 0.02
88 | WD: 0.0001
89 | MOMENTUM: 0.9
90 | NESTEROV: false
91 | FLIP: true
92 | MULTI_SCALE: true
93 | DOWNSAMPLERATE: 1
94 | IGNORE_LABEL: 255
95 | SCALE_FACTOR: 16
96 | TEST:
97 | IMAGE_SIZE:
98 | - 520
99 | - 520
100 | BASE_SIZE: 520
101 | BATCH_SIZE_PER_GPU: 1
102 | NUM_SAMPLES: 200
103 | FLIP_TEST: false
104 | MULTI_SCALE: false
105 |
--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3,4,5,6,7)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 8
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: ade20k
13 | ROOT: '../../../../dataset/ade20k/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 150
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: true
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 520
79 | - 520
80 | BASE_SIZE: 520
81 | BATCH_SIZE_PER_GPU: 2
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 120
85 | RESUME: true
86 | OPTIMIZER: sgd
87 | LR: 0.02
88 | WD: 0.0001
89 | MOMENTUM: 0.9
90 | NESTEROV: false
91 | FLIP: true
92 | MULTI_SCALE: true
93 | DOWNSAMPLERATE: 1
94 | IGNORE_LABEL: 255
95 | SCALE_FACTOR: 16
96 | TEST:
97 | IMAGE_SIZE:
98 | - 520
99 | - 520
100 | BASE_SIZE: 520
101 | BATCH_SIZE_PER_GPU: 1
102 | NUM_SAMPLES: 200
103 | FLIP_TEST: false
104 | MULTI_SCALE: false
105 |
--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: ade20k
13 | ROOT: 'data/'
14 | TEST_SET: 'list/ade20k/val.lst'
15 | TRAIN_SET: 'list/ade20k/train.lst'
16 | NUM_CLASSES: 150
17 | MODEL:
18 | NAME: seg_hrnet
19 | NUM_OUTPUTS: 1
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: true
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 520
78 | - 520
79 | BASE_SIZE: 520
80 | BATCH_SIZE_PER_GPU: 4
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 120
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.02
87 | WD: 0.0001
88 | MOMENTUM: 0.9
89 | NESTEROV: false
90 | FLIP: true
91 | MULTI_SCALE: true
92 | DOWNSAMPLERATE: 1
93 | IGNORE_LABEL: 255
94 | SCALE_FACTOR: 11
95 | TEST:
96 | IMAGE_SIZE:
97 | - 520
98 | - 520
99 | BASE_SIZE: 520
100 | BATCH_SIZE_PER_GPU: 1
101 | NUM_SAMPLES: 200
102 | FLIP_TEST: false
103 | MULTI_SCALE: false
104 |
--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_w48_520x520_ohem_sgd_lr2e-2_wd1e-4_bs_16_epoch120_paddle.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3,4,5,6,7)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 8
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: ade20k
13 | ROOT: '../../../../dataset/ade20k/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 150
17 | MODEL:
18 | NAME: seg_hrnet
19 | NUM_OUTPUTS: 1
20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: true
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 520
78 | - 520
79 | BASE_SIZE: 520
80 | BATCH_SIZE_PER_GPU: 2
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 120
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.02
87 | WD: 0.0001
88 | MOMENTUM: 0.9
89 | NESTEROV: false
90 | FLIP: true
91 | MULTI_SCALE: true
92 | DOWNSAMPLERATE: 1
93 | IGNORE_LABEL: 255
94 | SCALE_FACTOR: 11
95 | TEST:
96 | IMAGE_SIZE:
97 | - 520
98 | - 520
99 | BASE_SIZE: 520
100 | BATCH_SIZE_PER_GPU: 1
101 | NUM_SAMPLES: 200
102 | FLIP_TEST: false
103 | MULTI_SCALE: false
104 |
--------------------------------------------------------------------------------
/experiments/ade20k/seg_hrnet_w48_520x520_sgd_lr2e-2_wd1e-4_bs_16_epoch120.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: ade20k
13 | ROOT: 'data/'
14 | TEST_SET: 'list/ade20k/val.lst'
15 | TRAIN_SET: 'list/ade20k/train.lst'
16 | NUM_CLASSES: 150
17 | MODEL:
18 | NAME: seg_hrnet
19 | NUM_OUTPUTS: 1
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 520
78 | - 520
79 | BASE_SIZE: 520
80 | BATCH_SIZE_PER_GPU: 4
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 120
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.02
87 | WD: 0.0001
88 | MOMENTUM: 0.9
89 | NESTEROV: false
90 | FLIP: true
91 | MULTI_SCALE: true
92 | DOWNSAMPLERATE: 1
93 | IGNORE_LABEL: 255
94 | SCALE_FACTOR: 11
95 | TEST:
96 | IMAGE_SIZE:
97 | - 520
98 | - 520
99 | BASE_SIZE: 520
100 | BATCH_SIZE_PER_GPU: 1
101 | NUM_SAMPLES: 200
102 | FLIP_TEST: false
103 | MULTI_SCALE: false
104 |
--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: cityscapes
13 | ROOT: data/
14 | TEST_SET: 'list/cityscapes/val.lst'
15 | TRAIN_SET: 'list/cityscapes/train.lst'
16 | NUM_CLASSES: 19
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: "pretrained_models/hrnetv2_w48_imagenet_pretrained.pth"
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 1024
79 | - 512
80 | BASE_SIZE: 2048
81 | BATCH_SIZE_PER_GPU: 3
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 484
85 | RESUME: true
86 | OPTIMIZER: sgd
87 | LR: 0.01
88 | WD: 0.0005
89 | MOMENTUM: 0.9
90 | NESTEROV: false
91 | FLIP: true
92 | MULTI_SCALE: true
93 | DOWNSAMPLERATE: 1
94 | IGNORE_LABEL: 255
95 | SCALE_FACTOR: 16
96 | TEST:
97 | IMAGE_SIZE:
98 | - 2048
99 | - 1024
100 | BASE_SIZE: 2048
101 | BATCH_SIZE_PER_GPU: 4
102 | FLIP_TEST: false
103 | MULTI_SCALE: false
104 |
--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3,4,5,6,7)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 8
9 | PRINT_FREQ: 100
10 |
11 | DATASET:
12 | DATASET: cityscapes
13 | ROOT: '../../../../dataset/original_cityscapes/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 19
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 1024
79 | - 512
80 | BASE_SIZE: 2048
81 | BATCH_SIZE_PER_GPU: 2
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 484
85 | RESUME: true
86 | OPTIMIZER: sgd
87 | LR: 0.01
88 | WD: 0.0005
89 | MOMENTUM: 0.9
90 | NESTEROV: false
91 | FLIP: true
92 | MULTI_SCALE: true
93 | DOWNSAMPLERATE: 1
94 | IGNORE_LABEL: 255
95 | SCALE_FACTOR: 16
96 | TEST:
97 | IMAGE_SIZE:
98 | - 2048
99 | - 1024
100 | BASE_SIZE: 2048
101 | BATCH_SIZE_PER_GPU: 2
102 | FLIP_TEST: false
103 | MULTI_SCALE: false
104 |
--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_ocr_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: cityscapes
13 | ROOT: data/
14 | TEST_SET: 'list/cityscapes/val.lst'
15 | TRAIN_SET: 'list/cityscapes/trainval.lst'
16 | NUM_CLASSES: 19
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: "pretrained_models/hrnetv2_w48_imagenet_pretrained.pth"
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 1024
79 | - 512
80 | BASE_SIZE: 2048
81 | BATCH_SIZE_PER_GPU: 3
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 484
85 | RESUME: true
86 | OPTIMIZER: sgd
87 | LR: 0.01
88 | WD: 0.0005
89 | MOMENTUM: 0.9
90 | NESTEROV: false
91 | FLIP: true
92 | MULTI_SCALE: true
93 | DOWNSAMPLERATE: 1
94 | IGNORE_LABEL: 255
95 | SCALE_FACTOR: 16
96 | TEST:
97 | IMAGE_SIZE:
98 | - 2048
99 | - 1024
100 | BASE_SIZE: 2048
101 | BATCH_SIZE_PER_GPU: 4
102 | FLIP_TEST: false
103 | MULTI_SCALE: false
104 |
--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 100
10 |
11 | DATASET:
12 | DATASET: cityscapes
13 | ROOT: '../../../../dataset/original_cityscapes/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 19
17 | MODEL:
18 | NAME: seg_hrnet
19 | ALIGN_CORNERS: False
20 | PRETRAINED: '../../../../dataset/pretrained_models/hrnetv2_w48_imagenet_pretrained_top1_21.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 1024
78 | - 512
79 | BASE_SIZE: 2048
80 | BATCH_SIZE_PER_GPU: 3
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 484
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.01
87 | WD: 0.0005
88 | MOMENTUM: 0.9
89 | NESTEROV: false
90 | FLIP: true
91 | MULTI_SCALE: true
92 | DOWNSAMPLERATE: 1
93 | IGNORE_LABEL: 255
94 | SCALE_FACTOR: 16
95 | TEST:
96 | IMAGE_SIZE:
97 | - 2048
98 | - 1024
99 | BASE_SIZE: 2048
100 | BATCH_SIZE_PER_GPU: 4
101 | FLIP_TEST: false
102 | MULTI_SCALE: false
103 |
--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484_paddle.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 100
10 |
11 | DATASET:
12 | DATASET: cityscapes
13 | ROOT: '../../../../dataset/original_cityscapes/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 19
17 | MODEL:
18 | NAME: seg_hrnet
19 | ALIGN_CORNERS: False
20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 1024
78 | - 512
79 | BASE_SIZE: 2048
80 | BATCH_SIZE_PER_GPU: 3
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 484
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.01
87 | WD: 0.0005
88 | MOMENTUM: 0.9
89 | NESTEROV: false
90 | FLIP: true
91 | MULTI_SCALE: true
92 | DOWNSAMPLERATE: 1
93 | IGNORE_LABEL: 255
94 | SCALE_FACTOR: 16
95 | TEST:
96 | IMAGE_SIZE:
97 | - 2048
98 | - 1024
99 | BASE_SIZE: 2048
100 | BATCH_SIZE_PER_GPU: 4
101 | FLIP_TEST: false
102 | MULTI_SCALE: false
103 |
--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_16_epoch484_paddle.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 100
10 |
11 | DATASET:
12 | DATASET: cityscapes
13 | ROOT: '../../../../dataset/original_cityscapes/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 19
17 | MODEL:
18 | NAME: seg_hrnet
19 | ALIGN_CORNERS: False
20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 1024
78 | - 512
79 | BASE_SIZE: 2048
80 | BATCH_SIZE_PER_GPU: 2
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 484
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.01
87 | WD: 0.0005
88 | MOMENTUM: 0.9
89 | NESTEROV: false
90 | FLIP: true
91 | MULTI_SCALE: true
92 | DOWNSAMPLERATE: 1
93 | IGNORE_LABEL: 255
94 | SCALE_FACTOR: 16
95 | TEST:
96 | IMAGE_SIZE:
97 | - 2048
98 | - 1024
99 | BASE_SIZE: 2048
100 | BATCH_SIZE_PER_GPU: 4
101 | FLIP_TEST: false
102 | MULTI_SCALE: false
--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_train_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 100
10 |
11 | DATASET:
12 | DATASET: cityscapes
13 | ROOT: 'data/'
14 | TEST_SET: 'list/cityscapes/val.lst'
15 | TRAIN_SET: 'list/cityscapes/train.lst'
16 | NUM_CLASSES: 19
17 | MODEL:
18 | NAME: seg_hrnet
19 | ALIGN_CORNERS: False
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: true
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 1024
78 | - 512
79 | BASE_SIZE: 2048
80 | BATCH_SIZE_PER_GPU: 3
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 484
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.01
87 | WD: 0.0005
88 | MOMENTUM: 0.9
89 | NESTEROV: false
90 | FLIP: true
91 | MULTI_SCALE: true
92 | DOWNSAMPLERATE: 1
93 | IGNORE_LABEL: 255
94 | SCALE_FACTOR: 16
95 | TEST:
96 | IMAGE_SIZE:
97 | - 2048
98 | - 1024
99 | BASE_SIZE: 2048
100 | BATCH_SIZE_PER_GPU: 4
101 | FLIP_TEST: false
102 | MULTI_SCALE: false
103 |
--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_trainval_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 100
10 |
11 | DATASET:
12 | DATASET: cityscapes
13 | ROOT: 'data/'
14 | TEST_SET: 'list/cityscapes/val.lst'
15 | TRAIN_SET: 'list/cityscapes/train.lst'
16 | EXTRA_TRAIN_SET: 'list/cityscapes/trainval.lst'
17 | NUM_CLASSES: 19
18 | MODEL:
19 | NAME: seg_hrnet
20 | ALIGN_CORNERS: False
21 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
22 | EXTRA:
23 | FINAL_CONV_KERNEL: 1
24 | STAGE1:
25 | NUM_MODULES: 1
26 | NUM_RANCHES: 1
27 | BLOCK: BOTTLENECK
28 | NUM_BLOCKS:
29 | - 4
30 | NUM_CHANNELS:
31 | - 64
32 | FUSE_METHOD: SUM
33 | STAGE2:
34 | NUM_MODULES: 1
35 | NUM_BRANCHES: 2
36 | BLOCK: BASIC
37 | NUM_BLOCKS:
38 | - 4
39 | - 4
40 | NUM_CHANNELS:
41 | - 48
42 | - 96
43 | FUSE_METHOD: SUM
44 | STAGE3:
45 | NUM_MODULES: 4
46 | NUM_BRANCHES: 3
47 | BLOCK: BASIC
48 | NUM_BLOCKS:
49 | - 4
50 | - 4
51 | - 4
52 | NUM_CHANNELS:
53 | - 48
54 | - 96
55 | - 192
56 | FUSE_METHOD: SUM
57 | STAGE4:
58 | NUM_MODULES: 3
59 | NUM_BRANCHES: 4
60 | BLOCK: BASIC
61 | NUM_BLOCKS:
62 | - 4
63 | - 4
64 | - 4
65 | - 4
66 | NUM_CHANNELS:
67 | - 48
68 | - 96
69 | - 192
70 | - 384
71 | FUSE_METHOD: SUM
72 | LOSS:
73 | USE_OHEM: false
74 | OHEMTHRES: 0.9
75 | OHEMKEEP: 131072
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 1024
79 | - 512
80 | BASE_SIZE: 2048
81 | BATCH_SIZE_PER_GPU: 3
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 484
85 | EXTRA_EPOCH: 484
86 | RESUME: true
87 | OPTIMIZER: sgd
88 | LR: 0.01
89 | EXTRA_LR: 0.001
90 | WD: 0.0005
91 | MOMENTUM: 0.9
92 | NESTEROV: false
93 | FLIP: true
94 | MULTI_SCALE: true
95 | DOWNSAMPLERATE: 1
96 | IGNORE_LABEL: 255
97 | SCALE_FACTOR: 16
98 | TEST:
99 | IMAGE_SIZE:
100 | - 2048
101 | - 1024
102 | BASE_SIZE: 2048
103 | BATCH_SIZE_PER_GPU: 4
104 | FLIP_TEST: false
105 | MULTI_SCALE: false
106 |
--------------------------------------------------------------------------------
/experiments/cityscapes/seg_hrnet_w48_trainval_ohem_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484x2.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 100
10 |
11 | DATASET:
12 | DATASET: cityscapes
13 | ROOT: 'data/'
14 | TEST_SET: 'list/cityscapes/val.lst'
15 | TRAIN_SET: 'list/cityscapes/train.lst'
16 | EXTRA_TRAIN_SET: 'list/cityscapes/trainval.lst'
17 | NUM_CLASSES: 19
18 | MODEL:
19 | NAME: seg_hrnet
20 | ALIGN_CORNERS: False
21 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
22 | EXTRA:
23 | FINAL_CONV_KERNEL: 1
24 | STAGE1:
25 | NUM_MODULES: 1
26 | NUM_RANCHES: 1
27 | BLOCK: BOTTLENECK
28 | NUM_BLOCKS:
29 | - 4
30 | NUM_CHANNELS:
31 | - 64
32 | FUSE_METHOD: SUM
33 | STAGE2:
34 | NUM_MODULES: 1
35 | NUM_BRANCHES: 2
36 | BLOCK: BASIC
37 | NUM_BLOCKS:
38 | - 4
39 | - 4
40 | NUM_CHANNELS:
41 | - 48
42 | - 96
43 | FUSE_METHOD: SUM
44 | STAGE3:
45 | NUM_MODULES: 4
46 | NUM_BRANCHES: 3
47 | BLOCK: BASIC
48 | NUM_BLOCKS:
49 | - 4
50 | - 4
51 | - 4
52 | NUM_CHANNELS:
53 | - 48
54 | - 96
55 | - 192
56 | FUSE_METHOD: SUM
57 | STAGE4:
58 | NUM_MODULES: 3
59 | NUM_BRANCHES: 4
60 | BLOCK: BASIC
61 | NUM_BLOCKS:
62 | - 4
63 | - 4
64 | - 4
65 | - 4
66 | NUM_CHANNELS:
67 | - 48
68 | - 96
69 | - 192
70 | - 384
71 | FUSE_METHOD: SUM
72 | LOSS:
73 | USE_OHEM: true
74 | OHEMTHRES: 0.9
75 | OHEMKEEP: 131072
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 1024
79 | - 512
80 | BASE_SIZE: 2048
81 | BATCH_SIZE_PER_GPU: 3
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 484
85 | EXTRA_EPOCH: 484
86 | RESUME: true
87 | OPTIMIZER: sgd
88 | LR: 0.01
89 | EXTRA_LR: 0.001
90 | WD: 0.0005
91 | MOMENTUM: 0.9
92 | NESTEROV: false
93 | FLIP: true
94 | MULTI_SCALE: true
95 | DOWNSAMPLERATE: 1
96 | IGNORE_LABEL: 255
97 | SCALE_FACTOR: 16
98 | TEST:
99 | IMAGE_SIZE:
100 | - 2048
101 | - 1024
102 | BASE_SIZE: 2048
103 | BATCH_SIZE_PER_GPU: 4
104 | FLIP_TEST: false
105 | MULTI_SCALE: false
106 |
--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: cocostuff
13 | ROOT: 'data/'
14 | TEST_SET: 'list/cocostuff/val.lst'
15 | TRAIN_SET: 'list/cocostuff/train.lst'
16 | NUM_CLASSES: 171
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: true
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 520
79 | - 520
80 | BASE_SIZE: 520
81 | BATCH_SIZE_PER_GPU: 4
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 110
85 | RESUME: true
86 | OPTIMIZER: sgd
87 | LR: 0.001
88 | WD: 0.0001
89 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr']
90 | NONBACKBONE_MULT: 10
91 | MOMENTUM: 0.9
92 | NESTEROV: false
93 | FLIP: true
94 | MULTI_SCALE: true
95 | DOWNSAMPLERATE: 1
96 | IGNORE_LABEL: 255
97 | SCALE_FACTOR: 16
98 | TEST:
99 | IMAGE_SIZE:
100 | - 520
101 | - 520
102 | BASE_SIZE: 520
103 | BATCH_SIZE_PER_GPU: 1
104 | NUM_SAMPLES: 200
105 | FLIP_TEST: false
106 | MULTI_SCALE: false
107 |
--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_ocr_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3,4,5,6,7)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 8
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: cocostuff
13 | ROOT: '../../../../dataset/coco_stuff_10k/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 171
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: true
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 520
79 | - 520
80 | BASE_SIZE: 520
81 | BATCH_SIZE_PER_GPU: 2
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 110
85 | RESUME: true
86 | OPTIMIZER: sgd
87 | LR: 0.001
88 | WD: 0.0001
89 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr']
90 | NONBACKBONE_MULT: 10
91 | MOMENTUM: 0.9
92 | NESTEROV: false
93 | FLIP: true
94 | MULTI_SCALE: true
95 | DOWNSAMPLERATE: 1
96 | IGNORE_LABEL: 255
97 | SCALE_FACTOR: 16
98 | TEST:
99 | IMAGE_SIZE:
100 | - 520
101 | - 520
102 | BASE_SIZE: 520
103 | BATCH_SIZE_PER_GPU: 1
104 | NUM_SAMPLES: 200
105 | FLIP_TEST: false
106 | MULTI_SCALE: false
107 |
--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: cocostuff
13 | ROOT: '../../../../dataset/coco_stuff_10k/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 171
17 | MODEL:
18 | NAME: seg_hrnet
19 | NUM_OUTPUTS: 1
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: true
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 520
78 | - 520
79 | BASE_SIZE: 520
80 | BATCH_SIZE_PER_GPU: 4
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 110
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.001
87 | WD: 0.0001
88 | NONBACKBONE_KEYWORDS: ['last_layer']
89 | NONBACKBONE_MULT: 10
90 | MOMENTUM: 0.9
91 | NESTEROV: false
92 | FLIP: true
93 | MULTI_SCALE: true
94 | DOWNSAMPLERATE: 1
95 | IGNORE_LABEL: 255
96 | SCALE_FACTOR: 16
97 | TEST:
98 | IMAGE_SIZE:
99 | - 520
100 | - 520
101 | BASE_SIZE: 520
102 | BATCH_SIZE_PER_GPU: 1
103 | NUM_SAMPLES: 200
104 | FLIP_TEST: false
105 | MULTI_SCALE: false
106 |
--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_w48_520x520_ohem_sgd_lr1e-3_wd1e-4_bs_16_epoch110_paddle.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3,4,5,6,7)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 8
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: cocostuff
13 | ROOT: '../../../../dataset/coco_stuff_10k/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 171
17 | MODEL:
18 | NAME: seg_hrnet
19 | NUM_OUTPUTS: 1
20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: true
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 520
78 | - 520
79 | BASE_SIZE: 520
80 | BATCH_SIZE_PER_GPU: 2
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 110
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.001
87 | WD: 0.0001
88 | NONBACKBONE_KEYWORDS: ['last_layer']
89 | NONBACKBONE_MULT: 10
90 | MOMENTUM: 0.9
91 | NESTEROV: false
92 | FLIP: true
93 | MULTI_SCALE: true
94 | DOWNSAMPLERATE: 1
95 | IGNORE_LABEL: 255
96 | SCALE_FACTOR: 16
97 | TEST:
98 | IMAGE_SIZE:
99 | - 520
100 | - 520
101 | BASE_SIZE: 520
102 | BATCH_SIZE_PER_GPU: 1
103 | NUM_SAMPLES: 200
104 | FLIP_TEST: false
105 | MULTI_SCALE: false
106 |
--------------------------------------------------------------------------------
/experiments/cocostuff/seg_hrnet_w48_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch110.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: cocostuff
13 | ROOT: 'data/'
14 | TEST_SET: 'list/cocostuff/val.lst'
15 | TRAIN_SET: 'list/cocostuff/train.lst'
16 | NUM_CLASSES: 171
17 | MODEL:
18 | NAME: seg_hrnet
19 | NUM_OUTPUTS: 1
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 520
78 | - 520
79 | BASE_SIZE: 520
80 | BATCH_SIZE_PER_GPU: 4
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 110
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.001
87 | WD: 0.0001
88 | NONBACKBONE_KEYWORDS: ['last_layer']
89 | NONBACKBONE_MULT: 10
90 | MOMENTUM: 0.9
91 | NESTEROV: false
92 | FLIP: true
93 | MULTI_SCALE: true
94 | DOWNSAMPLERATE: 1
95 | IGNORE_LABEL: 255
96 | SCALE_FACTOR: 16
97 | TEST:
98 | IMAGE_SIZE:
99 | - 520
100 | - 520
101 | BASE_SIZE: 520
102 | BATCH_SIZE_PER_GPU: 1
103 | NUM_SAMPLES: 200
104 | FLIP_TEST: false
105 | MULTI_SCALE: false
106 |
--------------------------------------------------------------------------------
/experiments/lip/seg_hrnet_ocr_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: lip
13 | ROOT: 'data/'
14 | TEST_SET: 'list/lip/valList.txt'
15 | TRAIN_SET: 'list/lip/trainList.txt'
16 | NUM_CLASSES: 20
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained_2.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 473
79 | - 473
80 | BASE_SIZE: 473
81 | BATCH_SIZE_PER_GPU: 10
82 | SHUFFLE: true
83 | BEGIN_EPOCH: 0
84 | END_EPOCH: 150
85 | RESUME: true
86 | OPTIMIZER: sgd
87 | LR: 0.007
88 | WD: 0.0005
89 | MOMENTUM: 0.9
90 | NESTEROV: false
91 | FLIP: true
92 | MULTI_SCALE: true
93 | DOWNSAMPLERATE: 1
94 | IGNORE_LABEL: 255
95 | SCALE_FACTOR: 11
96 | TEST:
97 | IMAGE_SIZE:
98 | - 473
99 | - 473
100 | BASE_SIZE: 473
101 | BATCH_SIZE_PER_GPU: 10
102 | NUM_SAMPLES: 2000
103 | FLIP_TEST: false
104 | MULTI_SCALE: false
105 |
--------------------------------------------------------------------------------
/experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 100
10 |
11 | DATASET:
12 | DATASET: lip
13 | ROOT: 'data/'
14 | TEST_SET: 'list/lip/valList.txt'
15 | TRAIN_SET: 'list/lip/trainList.txt'
16 | NUM_CLASSES: 20
17 | MODEL:
18 | NAME: seg_hrnet
19 | ALIGN_CORNERS: False
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 473
78 | - 473
79 | BASE_SIZE: 473
80 | BATCH_SIZE_PER_GPU: 10
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 150
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.007
87 | WD: 0.0005
88 | MOMENTUM: 0.9
89 | NESTEROV: false
90 | FLIP: true
91 | MULTI_SCALE: true
92 | DOWNSAMPLERATE: 1
93 | IGNORE_LABEL: 255
94 | SCALE_FACTOR: 11
95 | TEST:
96 | IMAGE_SIZE:
97 | - 473
98 | - 473
99 | BASE_SIZE: 473
100 | BATCH_SIZE_PER_GPU: 16
101 | NUM_SAMPLES: 2000
102 | FLIP_TEST: false
103 | MULTI_SCALE: false
104 |
--------------------------------------------------------------------------------
/experiments/lip/seg_hrnet_w48_473x473_sgd_lr7e-3_wd5e-4_bs_40_epoch150_paddle.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 8
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: lip
13 | ROOT: '../../../../dataset/lip/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 20
17 | MODEL:
18 | NAME: seg_hrnet
19 | ALIGN_CORNERS: False
20 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | TRAIN:
76 | IMAGE_SIZE:
77 | - 473
78 | - 473
79 | BASE_SIZE: 473
80 | BATCH_SIZE_PER_GPU: 10
81 | SHUFFLE: true
82 | BEGIN_EPOCH: 0
83 | END_EPOCH: 150
84 | RESUME: true
85 | OPTIMIZER: sgd
86 | LR: 0.007
87 | WD: 0.0005
88 | MOMENTUM: 0.9
89 | NESTEROV: false
90 | FLIP: true
91 | MULTI_SCALE: true
92 | DOWNSAMPLERATE: 1
93 | IGNORE_LABEL: 255
94 | SCALE_FACTOR: 11
95 | TEST:
96 | IMAGE_SIZE:
97 | - 473
98 | - 473
99 | BASE_SIZE: 473
100 | BATCH_SIZE_PER_GPU: 8
101 | NUM_SAMPLES: 2000
102 | FLIP_TEST: false
103 | MULTI_SCALE: false
104 |
--------------------------------------------------------------------------------
/experiments/pascal_ctx/seg_hrnet_ocr_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: pascal_ctx
13 | ROOT: 'data/'
14 | TEST_SET: 'val'
15 | TRAIN_SET: 'train'
16 | NUM_CLASSES: 59
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 520
79 | - 520
80 | BASE_SIZE: 520
81 | BATCH_SIZE_PER_GPU: 4
82 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr']
83 | NONBACKBONE_MULT: 10
84 | SHUFFLE: true
85 | BEGIN_EPOCH: 0
86 | END_EPOCH: 200
87 | RESUME: true
88 | OPTIMIZER: sgd
89 | LR: 0.001
90 | WD: 0.0001
91 | MOMENTUM: 0.9
92 | NESTEROV: false
93 | FLIP: true
94 | MULTI_SCALE: true
95 | DOWNSAMPLERATE: 1
96 | IGNORE_LABEL: -1
97 | SCALE_FACTOR: 16
98 | TEST:
99 | IMAGE_SIZE:
100 | - 520
101 | - 520
102 | BASE_SIZE: 520
103 | BATCH_SIZE_PER_GPU: 16
104 | FLIP_TEST: false
105 | MULTI_SCALE: false
106 |
--------------------------------------------------------------------------------
/experiments/pascal_ctx/seg_hrnet_ocr_w48_cls60_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: pascal_ctx
13 | ROOT: 'data/'
14 | TEST_SET: 'val'
15 | TRAIN_SET: 'train'
16 | NUM_CLASSES: 60
17 | MODEL:
18 | NAME: seg_hrnet_ocr
19 | NUM_OUTPUTS: 2
20 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
21 | EXTRA:
22 | FINAL_CONV_KERNEL: 1
23 | STAGE1:
24 | NUM_MODULES: 1
25 | NUM_RANCHES: 1
26 | BLOCK: BOTTLENECK
27 | NUM_BLOCKS:
28 | - 4
29 | NUM_CHANNELS:
30 | - 64
31 | FUSE_METHOD: SUM
32 | STAGE2:
33 | NUM_MODULES: 1
34 | NUM_BRANCHES: 2
35 | BLOCK: BASIC
36 | NUM_BLOCKS:
37 | - 4
38 | - 4
39 | NUM_CHANNELS:
40 | - 48
41 | - 96
42 | FUSE_METHOD: SUM
43 | STAGE3:
44 | NUM_MODULES: 4
45 | NUM_BRANCHES: 3
46 | BLOCK: BASIC
47 | NUM_BLOCKS:
48 | - 4
49 | - 4
50 | - 4
51 | NUM_CHANNELS:
52 | - 48
53 | - 96
54 | - 192
55 | FUSE_METHOD: SUM
56 | STAGE4:
57 | NUM_MODULES: 3
58 | NUM_BRANCHES: 4
59 | BLOCK: BASIC
60 | NUM_BLOCKS:
61 | - 4
62 | - 4
63 | - 4
64 | - 4
65 | NUM_CHANNELS:
66 | - 48
67 | - 96
68 | - 192
69 | - 384
70 | FUSE_METHOD: SUM
71 | LOSS:
72 | USE_OHEM: false
73 | OHEMTHRES: 0.9
74 | OHEMKEEP: 131072
75 | BALANCE_WEIGHTS: [0.4, 1]
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 520
79 | - 520
80 | BASE_SIZE: 520
81 | BATCH_SIZE_PER_GPU: 4
82 | NONBACKBONE_KEYWORDS: ['cls', 'aux', 'ocr']
83 | NONBACKBONE_MULT: 10
84 | SHUFFLE: true
85 | BEGIN_EPOCH: 0
86 | END_EPOCH: 200
87 | RESUME: true
88 | OPTIMIZER: sgd
89 | LR: 0.001
90 | WD: 0.0001
91 | MOMENTUM: 0.9
92 | NESTEROV: false
93 | FLIP: true
94 | MULTI_SCALE: true
95 | DOWNSAMPLERATE: 1
96 | IGNORE_LABEL: -1
97 | SCALE_FACTOR: 16
98 | TEST:
99 | IMAGE_SIZE:
100 | - 520
101 | - 520
102 | BASE_SIZE: 520
103 | BATCH_SIZE_PER_GPU: 16
104 | FLIP_TEST: false
105 | MULTI_SCALE: false
106 |
--------------------------------------------------------------------------------
/experiments/pascal_ctx/seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: pascal_ctx
13 | ROOT: 'data/'
14 | TEST_SET: 'val'
15 | TRAIN_SET: 'train'
16 | NUM_CLASSES: 59
17 | MODEL:
18 | NAME: seg_hrnet
19 | ALIGN_CORNERS: False
20 | NUM_OUTPUTS: 1
21 | PRETRAINED: 'pretrained_models/hrnetv2_w48_imagenet_pretrained.pth'
22 | EXTRA:
23 | FINAL_CONV_KERNEL: 1
24 | STAGE1:
25 | NUM_MODULES: 1
26 | NUM_RANCHES: 1
27 | BLOCK: BOTTLENECK
28 | NUM_BLOCKS:
29 | - 4
30 | NUM_CHANNELS:
31 | - 64
32 | FUSE_METHOD: SUM
33 | STAGE2:
34 | NUM_MODULES: 1
35 | NUM_BRANCHES: 2
36 | BLOCK: BASIC
37 | NUM_BLOCKS:
38 | - 4
39 | - 4
40 | NUM_CHANNELS:
41 | - 48
42 | - 96
43 | FUSE_METHOD: SUM
44 | STAGE3:
45 | NUM_MODULES: 4
46 | NUM_BRANCHES: 3
47 | BLOCK: BASIC
48 | NUM_BLOCKS:
49 | - 4
50 | - 4
51 | - 4
52 | NUM_CHANNELS:
53 | - 48
54 | - 96
55 | - 192
56 | FUSE_METHOD: SUM
57 | STAGE4:
58 | NUM_MODULES: 3
59 | NUM_BRANCHES: 4
60 | BLOCK: BASIC
61 | NUM_BLOCKS:
62 | - 4
63 | - 4
64 | - 4
65 | - 4
66 | NUM_CHANNELS:
67 | - 48
68 | - 96
69 | - 192
70 | - 384
71 | FUSE_METHOD: SUM
72 | LOSS:
73 | USE_OHEM: false
74 | OHEMTHRES: 0.9
75 | OHEMKEEP: 131072
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 520
79 | - 520
80 | BASE_SIZE: 520
81 | BATCH_SIZE_PER_GPU: 4
82 | NONBACKBONE_KEYWORDS: ['last_layer']
83 | NONBACKBONE_MULT: 10
84 | SHUFFLE: true
85 | BEGIN_EPOCH: 0
86 | END_EPOCH: 200
87 | RESUME: true
88 | OPTIMIZER: sgd
89 | LR: 0.001
90 | WD: 0.0001
91 | MOMENTUM: 0.9
92 | NESTEROV: false
93 | FLIP: true
94 | MULTI_SCALE: true
95 | DOWNSAMPLERATE: 1
96 | IGNORE_LABEL: -1
97 | SCALE_FACTOR: 16
98 | TEST:
99 | IMAGE_SIZE:
100 | - 520
101 | - 520
102 | BASE_SIZE: 520
103 | BATCH_SIZE_PER_GPU: 16
104 | FLIP_TEST: false
105 | MULTI_SCALE: false
106 |
--------------------------------------------------------------------------------
/experiments/pascal_ctx/seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle.yaml:
--------------------------------------------------------------------------------
1 | CUDNN:
2 | BENCHMARK: true
3 | DETERMINISTIC: false
4 | ENABLED: true
5 | GPUS: (0,1,2,3)
6 | OUTPUT_DIR: 'output'
7 | LOG_DIR: 'log'
8 | WORKERS: 4
9 | PRINT_FREQ: 10
10 |
11 | DATASET:
12 | DATASET: pascal_ctx
13 | ROOT: '../../../../dataset/pascal_context/'
14 | TEST_SET: 'val.lst'
15 | TRAIN_SET: 'train.lst'
16 | NUM_CLASSES: 59
17 | MODEL:
18 | NAME: seg_hrnet
19 | ALIGN_CORNERS: False
20 | NUM_OUTPUTS: 1
21 | PRETRAINED: '../../../../dataset/pretrained_models/HRNet_W48_C_ssld_pretrained.pth'
22 | EXTRA:
23 | FINAL_CONV_KERNEL: 1
24 | STAGE1:
25 | NUM_MODULES: 1
26 | NUM_RANCHES: 1
27 | BLOCK: BOTTLENECK
28 | NUM_BLOCKS:
29 | - 4
30 | NUM_CHANNELS:
31 | - 64
32 | FUSE_METHOD: SUM
33 | STAGE2:
34 | NUM_MODULES: 1
35 | NUM_BRANCHES: 2
36 | BLOCK: BASIC
37 | NUM_BLOCKS:
38 | - 4
39 | - 4
40 | NUM_CHANNELS:
41 | - 48
42 | - 96
43 | FUSE_METHOD: SUM
44 | STAGE3:
45 | NUM_MODULES: 4
46 | NUM_BRANCHES: 3
47 | BLOCK: BASIC
48 | NUM_BLOCKS:
49 | - 4
50 | - 4
51 | - 4
52 | NUM_CHANNELS:
53 | - 48
54 | - 96
55 | - 192
56 | FUSE_METHOD: SUM
57 | STAGE4:
58 | NUM_MODULES: 3
59 | NUM_BRANCHES: 4
60 | BLOCK: BASIC
61 | NUM_BLOCKS:
62 | - 4
63 | - 4
64 | - 4
65 | - 4
66 | NUM_CHANNELS:
67 | - 48
68 | - 96
69 | - 192
70 | - 384
71 | FUSE_METHOD: SUM
72 | LOSS:
73 | USE_OHEM: false
74 | OHEMTHRES: 0.9
75 | OHEMKEEP: 131072
76 | TRAIN:
77 | IMAGE_SIZE:
78 | - 520
79 | - 520
80 | BASE_SIZE: 520
81 | BATCH_SIZE_PER_GPU: 4
82 | NONBACKBONE_KEYWORDS: ['last_layer']
83 | NONBACKBONE_MULT: 10
84 | SHUFFLE: true
85 | BEGIN_EPOCH: 0
86 | END_EPOCH: 200
87 | RESUME: true
88 | OPTIMIZER: sgd
89 | LR: 0.001
90 | WD: 0.0001
91 | MOMENTUM: 0.9
92 | NESTEROV: false
93 | FLIP: true
94 | MULTI_SCALE: true
95 | DOWNSAMPLERATE: 1
96 | IGNORE_LABEL: -1
97 | SCALE_FACTOR: 16
98 | TEST:
99 | IMAGE_SIZE:
100 | - 520
101 | - 520
102 | BASE_SIZE: 520
103 | BATCH_SIZE_PER_GPU: 16
104 | FLIP_TEST: false
105 | MULTI_SCALE: false
106 |
--------------------------------------------------------------------------------
/figures/OCR.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/OCR.PNG
--------------------------------------------------------------------------------
/figures/SegmentationTransformerOCR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/SegmentationTransformerOCR.png
--------------------------------------------------------------------------------
/figures/SegmentationTransformerOCR1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/SegmentationTransformerOCR1.png
--------------------------------------------------------------------------------
/figures/SegmentationTransformerOCR2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/SegmentationTransformerOCR2.png
--------------------------------------------------------------------------------
/figures/seg-hrnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/figures/seg-hrnet.png
--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
1 | """File for accessing HRNet via PyTorch Hub https://pytorch.org/hub/
2 |
3 | Usage:
4 | import torch
5 | model = torch.hub.load('AlexeyAB/PyTorch_YOLOv4:u5_preview', 'yolov4_pacsp_s', pretrained=True, channels=3, classes=80)
6 | """
7 |
8 | dependencies = ['torch']
9 | import torch
10 | from lib.models.seg_hrnet import get_seg_model
11 |
12 |
13 | state_dict_url = 'https://github.com/huawei-noah/ghostnet/raw/master/pytorch/models/state_dict_93.98.pth'
14 |
15 |
16 | def hrnet_w48_cityscapes(pretrained=False, **kwargs):
17 | """ # This docstring shows up in hub.help()
18 | HRNetW48 model pretrained on Cityscapes
19 | pretrained (bool): kwargs, load pretrained weights into the model
20 | """
21 | model = ghostnet(num_classes=1000, width=1.0, dropout=0.2)
22 | if pretrained:
23 | state_dict = torch.hub.load_state_dict_from_url(state_dict_url, progress=True)
24 | model.load_state_dict(state_dict)
25 | return model
--------------------------------------------------------------------------------
/lib/config/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 | from __future__ import absolute_import
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | from .default import _C as config
11 | from .default import update_config
12 | from .models import MODEL_EXTRAS
13 |
--------------------------------------------------------------------------------
/lib/config/default.py:
--------------------------------------------------------------------------------
1 |
2 | # ------------------------------------------------------------------------------
3 | # Copyright (c) Microsoft
4 | # Licensed under the MIT License.
5 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import os
13 |
14 | from yacs.config import CfgNode as CN
15 |
16 |
17 | _C = CN()
18 |
19 | _C.OUTPUT_DIR = ''
20 | _C.LOG_DIR = ''
21 | _C.GPUS = (0,)
22 | _C.WORKERS = 4
23 | _C.PRINT_FREQ = 20
24 | _C.AUTO_RESUME = False
25 | _C.PIN_MEMORY = True
26 | _C.RANK = 0
27 |
28 | # Cudnn related params
29 | _C.CUDNN = CN()
30 | _C.CUDNN.BENCHMARK = True
31 | _C.CUDNN.DETERMINISTIC = False
32 | _C.CUDNN.ENABLED = True
33 |
34 | # common params for NETWORK
35 | _C.MODEL = CN()
36 | _C.MODEL.NAME = 'seg_hrnet'
37 | _C.MODEL.PRETRAINED = ''
38 | _C.MODEL.ALIGN_CORNERS = True
39 | _C.MODEL.NUM_OUTPUTS = 1
40 | _C.MODEL.EXTRA = CN(new_allowed=True)
41 |
42 |
43 | _C.MODEL.OCR = CN()
44 | _C.MODEL.OCR.MID_CHANNELS = 512
45 | _C.MODEL.OCR.KEY_CHANNELS = 256
46 | _C.MODEL.OCR.DROPOUT = 0.05
47 | _C.MODEL.OCR.SCALE = 1
48 |
49 | _C.LOSS = CN()
50 | _C.LOSS.USE_OHEM = False
51 | _C.LOSS.OHEMTHRES = 0.9
52 | _C.LOSS.OHEMKEEP = 100000
53 | _C.LOSS.CLASS_BALANCE = False
54 | _C.LOSS.BALANCE_WEIGHTS = [1]
55 |
56 | # DATASET related params
57 | _C.DATASET = CN()
58 | _C.DATASET.ROOT = ''
59 | _C.DATASET.DATASET = 'cityscapes'
60 | _C.DATASET.NUM_CLASSES = 19
61 | _C.DATASET.TRAIN_SET = 'list/cityscapes/train.lst'
62 | _C.DATASET.EXTRA_TRAIN_SET = ''
63 | _C.DATASET.TEST_SET = 'list/cityscapes/val.lst'
64 |
65 | # training
66 | _C.TRAIN = CN()
67 |
68 | _C.TRAIN.FREEZE_LAYERS = ''
69 | _C.TRAIN.FREEZE_EPOCHS = -1
70 | _C.TRAIN.NONBACKBONE_KEYWORDS = []
71 | _C.TRAIN.NONBACKBONE_MULT = 10
72 |
73 | _C.TRAIN.IMAGE_SIZE = [1024, 512] # width * height
74 | _C.TRAIN.BASE_SIZE = 2048
75 | _C.TRAIN.DOWNSAMPLERATE = 1
76 | _C.TRAIN.FLIP = True
77 | _C.TRAIN.MULTI_SCALE = True
78 | _C.TRAIN.SCALE_FACTOR = 16
79 |
80 | _C.TRAIN.RANDOM_BRIGHTNESS = False
81 | _C.TRAIN.RANDOM_BRIGHTNESS_SHIFT_VALUE = 10
82 |
83 | _C.TRAIN.LR_FACTOR = 0.1
84 | _C.TRAIN.LR_STEP = [90, 110]
85 | _C.TRAIN.LR = 0.01
86 | _C.TRAIN.EXTRA_LR = 0.001
87 |
88 | _C.TRAIN.OPTIMIZER = 'sgd'
89 | _C.TRAIN.MOMENTUM = 0.9
90 | _C.TRAIN.WD = 0.0001
91 | _C.TRAIN.NESTEROV = False
92 | _C.TRAIN.IGNORE_LABEL = -1
93 |
94 | _C.TRAIN.BEGIN_EPOCH = 0
95 | _C.TRAIN.END_EPOCH = 484
96 | _C.TRAIN.EXTRA_EPOCH = 0
97 |
98 | _C.TRAIN.RESUME = False
99 |
100 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32
101 | _C.TRAIN.SHUFFLE = True
102 | # only using some training samples
103 | _C.TRAIN.NUM_SAMPLES = 0
104 |
105 | # testing
106 | _C.TEST = CN()
107 |
108 | _C.TEST.IMAGE_SIZE = [2048, 1024] # width * height
109 | _C.TEST.BASE_SIZE = 2048
110 |
111 | _C.TEST.BATCH_SIZE_PER_GPU = 32
112 | # only testing some samples
113 | _C.TEST.NUM_SAMPLES = 0
114 |
115 | _C.TEST.MODEL_FILE = ''
116 | _C.TEST.FLIP_TEST = False
117 | _C.TEST.MULTI_SCALE = False
118 | _C.TEST.SCALE_LIST = [1]
119 |
120 | _C.TEST.OUTPUT_INDEX = -1
121 |
122 | # debug
123 | _C.DEBUG = CN()
124 | _C.DEBUG.DEBUG = False
125 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False
126 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False
127 | _C.DEBUG.SAVE_HEATMAPS_GT = False
128 | _C.DEBUG.SAVE_HEATMAPS_PRED = False
129 |
130 |
131 | def update_config(cfg, args):
132 | cfg.defrost()
133 |
134 | cfg.merge_from_file(args.cfg)
135 | cfg.merge_from_list(args.opts)
136 |
137 | cfg.freeze()
138 |
139 |
140 | if __name__ == '__main__':
141 | import sys
142 | with open(sys.argv[1], 'w') as f:
143 | print(_C, file=f)
144 |
145 |
--------------------------------------------------------------------------------
/lib/config/hrnet_config.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Create by Bin Xiao (Bin.Xiao@microsoft.com)
5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn), Rainbowsecret (yuyua@microsoft.com)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | from yacs.config import CfgNode as CN
13 |
14 |
15 | # configs for HRNet48
16 | HRNET_48 = CN()
17 | HRNET_48.FINAL_CONV_KERNEL = 1
18 |
19 | HRNET_48.STAGE1 = CN()
20 | HRNET_48.STAGE1.NUM_MODULES = 1
21 | HRNET_48.STAGE1.NUM_BRANCHES = 1
22 | HRNET_48.STAGE1.NUM_BLOCKS = [4]
23 | HRNET_48.STAGE1.NUM_CHANNELS = [64]
24 | HRNET_48.STAGE1.BLOCK = 'BOTTLENECK'
25 | HRNET_48.STAGE1.FUSE_METHOD = 'SUM'
26 |
27 | HRNET_48.STAGE2 = CN()
28 | HRNET_48.STAGE2.NUM_MODULES = 1
29 | HRNET_48.STAGE2.NUM_BRANCHES = 2
30 | HRNET_48.STAGE2.NUM_BLOCKS = [4, 4]
31 | HRNET_48.STAGE2.NUM_CHANNELS = [48, 96]
32 | HRNET_48.STAGE2.BLOCK = 'BASIC'
33 | HRNET_48.STAGE2.FUSE_METHOD = 'SUM'
34 |
35 | HRNET_48.STAGE3 = CN()
36 | HRNET_48.STAGE3.NUM_MODULES = 4
37 | HRNET_48.STAGE3.NUM_BRANCHES = 3
38 | HRNET_48.STAGE3.NUM_BLOCKS = [4, 4, 4]
39 | HRNET_48.STAGE3.NUM_CHANNELS = [48, 96, 192]
40 | HRNET_48.STAGE3.BLOCK = 'BASIC'
41 | HRNET_48.STAGE3.FUSE_METHOD = 'SUM'
42 |
43 | HRNET_48.STAGE4 = CN()
44 | HRNET_48.STAGE4.NUM_MODULES = 3
45 | HRNET_48.STAGE4.NUM_BRANCHES = 4
46 | HRNET_48.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
47 | HRNET_48.STAGE4.NUM_CHANNELS = [48, 96, 192, 384]
48 | HRNET_48.STAGE4.BLOCK = 'BASIC'
49 | HRNET_48.STAGE4.FUSE_METHOD = 'SUM'
50 |
51 |
52 | # configs for HRNet32
53 | HRNET_32 = CN()
54 | HRNET_32.FINAL_CONV_KERNEL = 1
55 |
56 | HRNET_32.STAGE1 = CN()
57 | HRNET_32.STAGE1.NUM_MODULES = 1
58 | HRNET_32.STAGE1.NUM_BRANCHES = 1
59 | HRNET_32.STAGE1.NUM_BLOCKS = [4]
60 | HRNET_32.STAGE1.NUM_CHANNELS = [64]
61 | HRNET_32.STAGE1.BLOCK = 'BOTTLENECK'
62 | HRNET_32.STAGE1.FUSE_METHOD = 'SUM'
63 |
64 | HRNET_32.STAGE2 = CN()
65 | HRNET_32.STAGE2.NUM_MODULES = 1
66 | HRNET_32.STAGE2.NUM_BRANCHES = 2
67 | HRNET_32.STAGE2.NUM_BLOCKS = [4, 4]
68 | HRNET_32.STAGE2.NUM_CHANNELS = [32, 64]
69 | HRNET_32.STAGE2.BLOCK = 'BASIC'
70 | HRNET_32.STAGE2.FUSE_METHOD = 'SUM'
71 |
72 | HRNET_32.STAGE3 = CN()
73 | HRNET_32.STAGE3.NUM_MODULES = 4
74 | HRNET_32.STAGE3.NUM_BRANCHES = 3
75 | HRNET_32.STAGE3.NUM_BLOCKS = [4, 4, 4]
76 | HRNET_32.STAGE3.NUM_CHANNELS = [32, 64, 128]
77 | HRNET_32.STAGE3.BLOCK = 'BASIC'
78 | HRNET_32.STAGE3.FUSE_METHOD = 'SUM'
79 |
80 | HRNET_32.STAGE4 = CN()
81 | HRNET_32.STAGE4.NUM_MODULES = 3
82 | HRNET_32.STAGE4.NUM_BRANCHES = 4
83 | HRNET_32.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
84 | HRNET_32.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
85 | HRNET_32.STAGE4.BLOCK = 'BASIC'
86 | HRNET_32.STAGE4.FUSE_METHOD = 'SUM'
87 |
88 |
89 | # configs for HRNet18
90 | HRNET_18 = CN()
91 | HRNET_18.FINAL_CONV_KERNEL = 1
92 |
93 | HRNET_18.STAGE1 = CN()
94 | HRNET_18.STAGE1.NUM_MODULES = 1
95 | HRNET_18.STAGE1.NUM_BRANCHES = 1
96 | HRNET_18.STAGE1.NUM_BLOCKS = [4]
97 | HRNET_18.STAGE1.NUM_CHANNELS = [64]
98 | HRNET_18.STAGE1.BLOCK = 'BOTTLENECK'
99 | HRNET_18.STAGE1.FUSE_METHOD = 'SUM'
100 |
101 | HRNET_18.STAGE2 = CN()
102 | HRNET_18.STAGE2.NUM_MODULES = 1
103 | HRNET_18.STAGE2.NUM_BRANCHES = 2
104 | HRNET_18.STAGE2.NUM_BLOCKS = [4, 4]
105 | HRNET_18.STAGE2.NUM_CHANNELS = [18, 36]
106 | HRNET_18.STAGE2.BLOCK = 'BASIC'
107 | HRNET_18.STAGE2.FUSE_METHOD = 'SUM'
108 |
109 | HRNET_18.STAGE3 = CN()
110 | HRNET_18.STAGE3.NUM_MODULES = 4
111 | HRNET_18.STAGE3.NUM_BRANCHES = 3
112 | HRNET_18.STAGE3.NUM_BLOCKS = [4, 4, 4]
113 | HRNET_18.STAGE3.NUM_CHANNELS = [18, 36, 72]
114 | HRNET_18.STAGE3.BLOCK = 'BASIC'
115 | HRNET_18.STAGE3.FUSE_METHOD = 'SUM'
116 |
117 | HRNET_18.STAGE4 = CN()
118 | HRNET_18.STAGE4.NUM_MODULES = 3
119 | HRNET_18.STAGE4.NUM_BRANCHES = 4
120 | HRNET_18.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
121 | HRNET_18.STAGE4.NUM_CHANNELS = [18, 36, 72, 144]
122 | HRNET_18.STAGE4.BLOCK = 'BASIC'
123 | HRNET_18.STAGE4.FUSE_METHOD = 'SUM'
124 |
125 |
126 | MODEL_CONFIGS = {
127 | 'hrnet18': HRNET_18,
128 | 'hrnet32': HRNET_32,
129 | 'hrnet48': HRNET_48,
130 | }
--------------------------------------------------------------------------------
/lib/config/models.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | from yacs.config import CfgNode as CN
12 |
13 | # high_resoluton_net related params for segmentation
14 | HIGH_RESOLUTION_NET = CN()
15 | HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*']
16 | HIGH_RESOLUTION_NET.STEM_INPLANES = 64
17 | HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1
18 | HIGH_RESOLUTION_NET.WITH_HEAD = True
19 |
20 | HIGH_RESOLUTION_NET.STAGE2 = CN()
21 | HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1
22 | HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2
23 | HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4]
24 | HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64]
25 | HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC'
26 | HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM'
27 |
28 | HIGH_RESOLUTION_NET.STAGE3 = CN()
29 | HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1
30 | HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3
31 | HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4]
32 | HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128]
33 | HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC'
34 | HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM'
35 |
36 | HIGH_RESOLUTION_NET.STAGE4 = CN()
37 | HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1
38 | HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4
39 | HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
40 | HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
41 | HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC'
42 | HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM'
43 |
44 | MODEL_EXTRAS = {
45 | 'seg_hrnet': HIGH_RESOLUTION_NET,
46 | }
47 |
--------------------------------------------------------------------------------
/lib/core/criterion.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import torch
8 | import torch.nn as nn
9 | from torch.nn import functional as F
10 | import logging
11 | from config import config
12 |
13 |
14 | class CrossEntropy(nn.Module):
15 | def __init__(self, ignore_label=-1, weight=None):
16 | super(CrossEntropy, self).__init__()
17 | self.ignore_label = ignore_label
18 | self.criterion = nn.CrossEntropyLoss(
19 | weight=weight,
20 | ignore_index=ignore_label
21 | )
22 |
23 | def _forward(self, score, target):
24 | ph, pw = score.size(2), score.size(3)
25 | h, w = target.size(1), target.size(2)
26 | if ph != h or pw != w:
27 | score = F.interpolate(input=score, size=(
28 | h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS)
29 |
30 | loss = self.criterion(score, target)
31 |
32 | return loss
33 |
34 | def forward(self, score, target):
35 |
36 | if config.MODEL.NUM_OUTPUTS == 1:
37 | score = [score]
38 |
39 | weights = config.LOSS.BALANCE_WEIGHTS
40 | assert len(weights) == len(score)
41 |
42 | return sum([w * self._forward(x, target) for (w, x) in zip(weights, score)])
43 |
44 |
45 | class OhemCrossEntropy(nn.Module):
46 | def __init__(self, ignore_label=-1, thres=0.7,
47 | min_kept=100000, weight=None):
48 | super(OhemCrossEntropy, self).__init__()
49 | self.thresh = thres
50 | self.min_kept = max(1, min_kept)
51 | self.ignore_label = ignore_label
52 | self.criterion = nn.CrossEntropyLoss(
53 | weight=weight,
54 | ignore_index=ignore_label,
55 | reduction='none'
56 | )
57 |
58 | def _ce_forward(self, score, target):
59 | ph, pw = score.size(2), score.size(3)
60 | h, w = target.size(1), target.size(2)
61 | if ph != h or pw != w:
62 | score = F.interpolate(input=score, size=(
63 | h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS)
64 |
65 | loss = self.criterion(score, target)
66 |
67 | return loss
68 |
69 | def _ohem_forward(self, score, target, **kwargs):
70 | ph, pw = score.size(2), score.size(3)
71 | h, w = target.size(1), target.size(2)
72 | if ph != h or pw != w:
73 | score = F.interpolate(input=score, size=(
74 | h, w), mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS)
75 | pred = F.softmax(score, dim=1)
76 | pixel_losses = self.criterion(score, target).contiguous().view(-1)
77 | mask = target.contiguous().view(-1) != self.ignore_label
78 |
79 | tmp_target = target.clone()
80 | tmp_target[tmp_target == self.ignore_label] = 0
81 | pred = pred.gather(1, tmp_target.unsqueeze(1))
82 | pred, ind = pred.contiguous().view(-1,)[mask].contiguous().sort()
83 | min_value = pred[min(self.min_kept, pred.numel() - 1)]
84 | threshold = max(min_value, self.thresh)
85 |
86 | pixel_losses = pixel_losses[mask][ind]
87 | pixel_losses = pixel_losses[pred < threshold]
88 | return pixel_losses.mean()
89 |
90 | def forward(self, score, target):
91 |
92 | if config.MODEL.NUM_OUTPUTS == 1:
93 | score = [score]
94 |
95 | weights = config.LOSS.BALANCE_WEIGHTS
96 | assert len(weights) == len(score)
97 |
98 | functions = [self._ce_forward] * \
99 | (len(weights) - 1) + [self._ohem_forward]
100 | return sum([
101 | w * func(x, target)
102 | for (w, x, func) in zip(weights, score, functions)
103 | ])
104 |
--------------------------------------------------------------------------------
/lib/core/function.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import logging
8 | import os
9 | import time
10 |
11 | import numpy as np
12 | import numpy.ma as ma
13 | from tqdm import tqdm
14 |
15 | import torch
16 | import torch.nn as nn
17 | from torch.nn import functional as F
18 |
19 | from utils.utils import AverageMeter
20 | from utils.utils import get_confusion_matrix
21 | from utils.utils import adjust_learning_rate
22 |
23 | import utils.distributed as dist
24 |
25 |
26 | def reduce_tensor(inp):
27 | """
28 | Reduce the loss from all processes so that
29 | process with rank 0 has the averaged results.
30 | """
31 | world_size = dist.get_world_size()
32 | if world_size < 2:
33 | return inp
34 | with torch.no_grad():
35 | reduced_inp = inp
36 | torch.distributed.reduce(reduced_inp, dst=0)
37 | return reduced_inp / world_size
38 |
39 |
40 | def train(config, epoch, num_epoch, epoch_iters, base_lr,
41 | num_iters, trainloader, optimizer, model, writer_dict):
42 | # Training
43 | model.train()
44 |
45 | batch_time = AverageMeter()
46 | ave_loss = AverageMeter()
47 | tic = time.time()
48 | cur_iters = epoch*epoch_iters
49 | writer = writer_dict['writer']
50 | global_steps = writer_dict['train_global_steps']
51 |
52 | for i_iter, batch in enumerate(trainloader, 0):
53 | images, labels, _, _ = batch
54 | images = images.cuda()
55 | labels = labels.long().cuda()
56 |
57 | losses, _ = model(images, labels)
58 | loss = losses.mean()
59 |
60 | if dist.is_distributed():
61 | reduced_loss = reduce_tensor(loss)
62 | else:
63 | reduced_loss = loss
64 |
65 | model.zero_grad()
66 | loss.backward()
67 | optimizer.step()
68 |
69 | # measure elapsed time
70 | batch_time.update(time.time() - tic)
71 | tic = time.time()
72 |
73 | # update average loss
74 | ave_loss.update(reduced_loss.item())
75 |
76 | lr = adjust_learning_rate(optimizer,
77 | base_lr,
78 | num_iters,
79 | i_iter+cur_iters)
80 |
81 | if i_iter % config.PRINT_FREQ == 0 and dist.get_rank() == 0:
82 | msg = 'Epoch: [{}/{}] Iter:[{}/{}], Time: {:.2f}, ' \
83 | 'lr: {}, Loss: {:.6f}' .format(
84 | epoch, num_epoch, i_iter, epoch_iters,
85 | batch_time.average(), [x['lr'] for x in optimizer.param_groups], ave_loss.average())
86 | logging.info(msg)
87 |
88 | writer.add_scalar('train_loss', ave_loss.average(), global_steps)
89 | writer_dict['train_global_steps'] = global_steps + 1
90 |
91 | def validate(config, testloader, model, writer_dict):
92 | model.eval()
93 | ave_loss = AverageMeter()
94 | nums = config.MODEL.NUM_OUTPUTS
95 | confusion_matrix = np.zeros(
96 | (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES, nums))
97 | with torch.no_grad():
98 | for idx, batch in enumerate(testloader):
99 | image, label, _, _ = batch
100 | size = label.size()
101 | image = image.cuda()
102 | label = label.long().cuda()
103 |
104 | losses, pred = model(image, label)
105 | if not isinstance(pred, (list, tuple)):
106 | pred = [pred]
107 | for i, x in enumerate(pred):
108 | x = F.interpolate(
109 | input=x, size=size[-2:],
110 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
111 | )
112 |
113 | confusion_matrix[..., i] += get_confusion_matrix(
114 | label,
115 | x,
116 | size,
117 | config.DATASET.NUM_CLASSES,
118 | config.TRAIN.IGNORE_LABEL
119 | )
120 |
121 | if idx % 10 == 0:
122 | print(idx)
123 |
124 | loss = losses.mean()
125 | if dist.is_distributed():
126 | reduced_loss = reduce_tensor(loss)
127 | else:
128 | reduced_loss = loss
129 | ave_loss.update(reduced_loss.item())
130 |
131 | if dist.is_distributed():
132 | confusion_matrix = torch.from_numpy(confusion_matrix).cuda()
133 | reduced_confusion_matrix = reduce_tensor(confusion_matrix)
134 | confusion_matrix = reduced_confusion_matrix.cpu().numpy()
135 |
136 | for i in range(nums):
137 | pos = confusion_matrix[..., i].sum(1)
138 | res = confusion_matrix[..., i].sum(0)
139 | tp = np.diag(confusion_matrix[..., i])
140 | IoU_array = (tp / np.maximum(1.0, pos + res - tp))
141 | mean_IoU = IoU_array.mean()
142 | if dist.get_rank() <= 0:
143 | logging.info('{} {} {}'.format(i, IoU_array, mean_IoU))
144 |
145 | writer = writer_dict['writer']
146 | global_steps = writer_dict['valid_global_steps']
147 | writer.add_scalar('valid_loss', ave_loss.average(), global_steps)
148 | writer.add_scalar('valid_mIoU', mean_IoU, global_steps)
149 | writer_dict['valid_global_steps'] = global_steps + 1
150 | return ave_loss.average(), mean_IoU, IoU_array
151 |
152 |
153 | def testval(config, test_dataset, testloader, model,
154 | sv_dir='', sv_pred=False):
155 | model.eval()
156 | confusion_matrix = np.zeros(
157 | (config.DATASET.NUM_CLASSES, config.DATASET.NUM_CLASSES))
158 | with torch.no_grad():
159 | for index, batch in enumerate(tqdm(testloader)):
160 | image, label, _, name, *border_padding = batch
161 | size = label.size()
162 | pred = test_dataset.multi_scale_inference(
163 | config,
164 | model,
165 | image,
166 | scales=config.TEST.SCALE_LIST,
167 | flip=config.TEST.FLIP_TEST)
168 |
169 | if len(border_padding) > 0:
170 | border_padding = border_padding[0]
171 | pred = pred[:, :, 0:pred.size(2) - border_padding[0], 0:pred.size(3) - border_padding[1]]
172 |
173 | if pred.size()[-2] != size[-2] or pred.size()[-1] != size[-1]:
174 | pred = F.interpolate(
175 | pred, size[-2:],
176 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
177 | )
178 |
179 | confusion_matrix += get_confusion_matrix(
180 | label,
181 | pred,
182 | size,
183 | config.DATASET.NUM_CLASSES,
184 | config.TRAIN.IGNORE_LABEL)
185 |
186 | if sv_pred:
187 | sv_path = os.path.join(sv_dir, 'test_results')
188 | if not os.path.exists(sv_path):
189 | os.mkdir(sv_path)
190 | test_dataset.save_pred(pred, sv_path, name)
191 |
192 | if index % 100 == 0:
193 | logging.info('processing: %d images' % index)
194 | pos = confusion_matrix.sum(1)
195 | res = confusion_matrix.sum(0)
196 | tp = np.diag(confusion_matrix)
197 | IoU_array = (tp / np.maximum(1.0, pos + res - tp))
198 | mean_IoU = IoU_array.mean()
199 | logging.info('mIoU: %.4f' % (mean_IoU))
200 |
201 | pos = confusion_matrix.sum(1)
202 | res = confusion_matrix.sum(0)
203 | tp = np.diag(confusion_matrix)
204 | pixel_acc = tp.sum()/pos.sum()
205 | mean_acc = (tp/np.maximum(1.0, pos)).mean()
206 | IoU_array = (tp / np.maximum(1.0, pos + res - tp))
207 | mean_IoU = IoU_array.mean()
208 |
209 | return mean_IoU, IoU_array, pixel_acc, mean_acc
210 |
211 |
212 | def test(config, test_dataset, testloader, model,
213 | sv_dir='', sv_pred=True):
214 | model.eval()
215 | with torch.no_grad():
216 | for _, batch in enumerate(tqdm(testloader)):
217 | image, size, name = batch
218 | size = size[0]
219 | pred = test_dataset.multi_scale_inference(
220 | config,
221 | model,
222 | image,
223 | scales=config.TEST.SCALE_LIST,
224 | flip=config.TEST.FLIP_TEST)
225 |
226 | if pred.size()[-2] != size[0] or pred.size()[-1] != size[1]:
227 | pred = F.interpolate(
228 | pred, size[-2:],
229 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
230 | )
231 |
232 | if sv_pred:
233 | sv_path = os.path.join(sv_dir, 'test_results')
234 | if not os.path.exists(sv_path):
235 | os.mkdir(sv_path)
236 | test_dataset.save_pred(pred, sv_path, name)
237 |
--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | from .cityscapes import Cityscapes as cityscapes
12 | from .lip import LIP as lip
13 | from .pascal_ctx import PASCALContext as pascal_ctx
14 | from .ade20k import ADE20K as ade20k
15 | from .cocostuff import COCOStuff as cocostuff
--------------------------------------------------------------------------------
/lib/datasets/ade20k.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import os
8 |
9 | import cv2
10 | import numpy as np
11 |
12 | import torch
13 | from torch.nn import functional as F
14 | from PIL import Image
15 |
16 | from .base_dataset import BaseDataset
17 |
18 |
19 | class ADE20K(BaseDataset):
20 | def __init__(self,
21 | root,
22 | list_path,
23 | num_samples=None,
24 | num_classes=150,
25 | multi_scale=True,
26 | flip=True,
27 | ignore_label=-1,
28 | base_size=520,
29 | crop_size=(520, 520),
30 | downsample_rate=1,
31 | scale_factor=11,
32 | mean=[0.485, 0.456, 0.406],
33 | std=[0.229, 0.224, 0.225]):
34 |
35 | super(ADE20K, self).__init__(ignore_label, base_size,
36 | crop_size, downsample_rate, scale_factor, mean, std)
37 |
38 | self.root = root
39 | self.num_classes = num_classes
40 | self.list_path = list_path
41 | self.class_weights = None
42 |
43 | self.multi_scale = multi_scale
44 | self.flip = flip
45 | self.img_list = [line.strip().split() for line in open(root+list_path)]
46 |
47 | self.files = self.read_files()
48 | if num_samples:
49 | self.files = self.files[:num_samples]
50 |
51 | def read_files(self):
52 | files = []
53 | for item in self.img_list:
54 | image_path, label_path = item
55 | name = os.path.splitext(os.path.basename(label_path))[0]
56 | sample = {
57 | 'img': image_path,
58 | 'label': label_path,
59 | 'name': name
60 | }
61 | files.append(sample)
62 | return files
63 |
64 | def resize_image(self, image, label, size):
65 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
66 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST)
67 | return image, label
68 |
69 | def __getitem__(self, index):
70 | item = self.files[index]
71 | name = item["name"]
72 | # image_path = os.path.join(self.root, 'ade20k', item['img'])
73 | # label_path = os.path.join(self.root, 'ade20k', item['label'])
74 | image_path = os.path.join(self.root, item['img'])
75 | label_path = os.path.join(self.root, item['label'])
76 | image = cv2.imread(
77 | image_path,
78 | cv2.IMREAD_COLOR
79 | )
80 | label = np.array(
81 | Image.open(label_path).convert('P')
82 | )
83 | label = self.reduce_zero_label(label)
84 | size = label.shape
85 |
86 | if 'testval' in self.list_path:
87 | image = self.resize_short_length(
88 | image,
89 | short_length=self.base_size,
90 | fit_stride=8
91 | )
92 | image = self.input_transform(image)
93 | image = image.transpose((2, 0, 1))
94 |
95 | return image.copy(), label.copy(), np.array(size), name
96 |
97 | if 'val' in self.list_path:
98 | image, label = self.resize_short_length(
99 | image,
100 | label=label,
101 | short_length=self.base_size,
102 | fit_stride=8
103 | )
104 | image, label = self.rand_crop(image, label)
105 | image = self.input_transform(image)
106 | image = image.transpose((2, 0, 1))
107 |
108 | return image.copy(), label.copy(), np.array(size), name
109 |
110 | image, label = self.resize_short_length(image, label, short_length=self.base_size)
111 | image, label = self.gen_sample(image, label, self.multi_scale, self.flip)
112 |
113 | return image.copy(), label.copy(), np.array(size), name
--------------------------------------------------------------------------------
/lib/datasets/base_dataset.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import os
8 |
9 | import cv2
10 | import numpy as np
11 | import random
12 |
13 | import torch
14 | from torch.nn import functional as F
15 | from torch.utils import data
16 |
17 | from config import config
18 |
19 |
20 | class BaseDataset(data.Dataset):
21 | def __init__(self,
22 | ignore_label=-1,
23 | base_size=2048,
24 | crop_size=(512, 1024),
25 | downsample_rate=1,
26 | scale_factor=16,
27 | mean=[0.485, 0.456, 0.406],
28 | std=[0.229, 0.224, 0.225]):
29 |
30 | self.base_size = base_size
31 | self.crop_size = crop_size
32 | self.ignore_label = ignore_label
33 |
34 | self.mean = mean
35 | self.std = std
36 | self.scale_factor = scale_factor
37 | self.downsample_rate = 1./downsample_rate
38 |
39 | self.files = []
40 |
41 | def __len__(self):
42 | return len(self.files)
43 |
44 | def input_transform(self, image):
45 | image = image.astype(np.float32)[:, :, ::-1]
46 | image = image / 255.0
47 | image -= self.mean
48 | image /= self.std
49 | return image
50 |
51 | def label_transform(self, label):
52 | return np.array(label).astype('int32')
53 |
54 | def pad_image(self, image, h, w, size, padvalue):
55 | pad_image = image.copy()
56 | pad_h = max(size[0] - h, 0)
57 | pad_w = max(size[1] - w, 0)
58 | if pad_h > 0 or pad_w > 0:
59 | pad_image = cv2.copyMakeBorder(image, 0, pad_h, 0,
60 | pad_w, cv2.BORDER_CONSTANT,
61 | value=padvalue)
62 |
63 | return pad_image
64 |
65 | def rand_crop(self, image, label):
66 | h, w = image.shape[:-1]
67 | image = self.pad_image(image, h, w, self.crop_size,
68 | (0.0, 0.0, 0.0))
69 | label = self.pad_image(label, h, w, self.crop_size,
70 | (self.ignore_label,))
71 |
72 | new_h, new_w = label.shape
73 | x = random.randint(0, new_w - self.crop_size[1])
74 | y = random.randint(0, new_h - self.crop_size[0])
75 | image = image[y:y+self.crop_size[0], x:x+self.crop_size[1]]
76 | label = label[y:y+self.crop_size[0], x:x+self.crop_size[1]]
77 |
78 | return image, label
79 |
80 | def multi_scale_aug(self, image, label=None,
81 | rand_scale=1, rand_crop=True):
82 | long_size = np.int(self.base_size * rand_scale + 0.5)
83 | h, w = image.shape[:2]
84 | if h > w:
85 | new_h = long_size
86 | new_w = np.int(w * long_size / h + 0.5)
87 | else:
88 | new_w = long_size
89 | new_h = np.int(h * long_size / w + 0.5)
90 |
91 | image = cv2.resize(image, (new_w, new_h),
92 | interpolation=cv2.INTER_LINEAR)
93 | if label is not None:
94 | label = cv2.resize(label, (new_w, new_h),
95 | interpolation=cv2.INTER_NEAREST)
96 | else:
97 | return image
98 |
99 | if rand_crop:
100 | image, label = self.rand_crop(image, label)
101 |
102 | return image, label
103 |
104 | def resize_short_length(self, image, label=None, short_length=None, fit_stride=None, return_padding=False):
105 | h, w = image.shape[:2]
106 | if h < w:
107 | new_h = short_length
108 | new_w = np.int(w * short_length / h + 0.5)
109 | else:
110 | new_w = short_length
111 | new_h = np.int(h * short_length / w + 0.5)
112 | image = cv2.resize(image, (new_w, new_h),
113 | interpolation=cv2.INTER_LINEAR)
114 | pad_w, pad_h = 0, 0
115 | if fit_stride is not None:
116 | pad_w = 0 if (new_w % fit_stride == 0) else fit_stride - (new_w % fit_stride)
117 | pad_h = 0 if (new_h % fit_stride == 0) else fit_stride - (new_h % fit_stride)
118 | image = cv2.copyMakeBorder(
119 | image, 0, pad_h, 0, pad_w,
120 | cv2.BORDER_CONSTANT, value=tuple(x * 255 for x in self.mean[::-1])
121 | )
122 |
123 | if label is not None:
124 | label = cv2.resize(
125 | label, (new_w, new_h),
126 | interpolation=cv2.INTER_NEAREST)
127 | if pad_h > 0 or pad_w > 0:
128 | label = cv2.copyMakeBorder(
129 | label, 0, pad_h, 0, pad_w,
130 | cv2.BORDER_CONSTANT, value=self.ignore_label
131 | )
132 | if return_padding:
133 | return image, label, (pad_h, pad_w)
134 | else:
135 | return image, label
136 | else:
137 | if return_padding:
138 | return image, (pad_h, pad_w)
139 | else:
140 | return image
141 |
142 | def random_brightness(self, img):
143 | if not config.TRAIN.RANDOM_BRIGHTNESS:
144 | return img
145 | if random.random() < 0.5:
146 | return img
147 | self.shift_value = config.TRAIN.RANDOM_BRIGHTNESS_SHIFT_VALUE
148 | img = img.astype(np.float32)
149 | shift = random.randint(-self.shift_value, self.shift_value)
150 | img[:, :, :] += shift
151 | img = np.around(img)
152 | img = np.clip(img, 0, 255).astype(np.uint8)
153 | return img
154 |
155 | def gen_sample(self, image, label,
156 | multi_scale=True, is_flip=True):
157 | if multi_scale:
158 | rand_scale = 0.5 + random.randint(0, self.scale_factor) / 10.0
159 | image, label = self.multi_scale_aug(image, label,
160 | rand_scale=rand_scale)
161 |
162 | image = self.random_brightness(image)
163 | image = self.input_transform(image)
164 | label = self.label_transform(label)
165 |
166 | image = image.transpose((2, 0, 1))
167 |
168 | if is_flip:
169 | flip = np.random.choice(2) * 2 - 1
170 | image = image[:, :, ::flip]
171 | label = label[:, ::flip]
172 |
173 | if self.downsample_rate != 1:
174 | label = cv2.resize(
175 | label,
176 | None,
177 | fx=self.downsample_rate,
178 | fy=self.downsample_rate,
179 | interpolation=cv2.INTER_NEAREST
180 | )
181 |
182 | return image, label
183 |
184 | def reduce_zero_label(self, labelmap):
185 | labelmap = np.array(labelmap)
186 | encoded_labelmap = labelmap - 1
187 |
188 | return encoded_labelmap
189 |
190 | def inference(self, config, model, image, flip=False):
191 | size = image.size()
192 | pred = model(image)
193 |
194 | if config.MODEL.NUM_OUTPUTS > 1:
195 | pred = pred[config.TEST.OUTPUT_INDEX]
196 |
197 | pred = F.interpolate(
198 | input=pred, size=size[-2:],
199 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
200 | )
201 |
202 | if flip:
203 | flip_img = image.numpy()[:, :, :, ::-1]
204 | flip_output = model(torch.from_numpy(flip_img.copy()))
205 |
206 | if config.MODEL.NUM_OUTPUTS > 1:
207 | flip_output = flip_output[config.TEST.OUTPUT_INDEX]
208 |
209 | flip_output = F.interpolate(
210 | input=flip_output, size=size[-2:],
211 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
212 | )
213 |
214 | flip_pred = flip_output.cpu().numpy().copy()
215 | flip_pred = torch.from_numpy(
216 | flip_pred[:, :, :, ::-1].copy()).cuda()
217 | pred += flip_pred
218 | pred = pred * 0.5
219 | return pred.exp()
220 |
221 | def multi_scale_inference(self, config, model, image, scales=[1], flip=False):
222 | batch, _, ori_height, ori_width = image.size()
223 | assert batch == 1, "only supporting batchsize 1."
224 | image = image.numpy()[0].transpose((1, 2, 0)).copy()
225 | stride_h = np.int(self.crop_size[0] * 2.0 / 3.0)
226 | stride_w = np.int(self.crop_size[1] * 2.0 / 3.0)
227 | final_pred = torch.zeros([1, self.num_classes,
228 | ori_height, ori_width]).cuda()
229 | padvalue = -1.0 * np.array(self.mean) / np.array(self.std)
230 | for scale in scales:
231 | new_img = self.multi_scale_aug(image=image,
232 | rand_scale=scale,
233 | rand_crop=False)
234 | height, width = new_img.shape[:-1]
235 |
236 | if max(height, width) <= np.min(self.crop_size):
237 | new_img = self.pad_image(new_img, height, width,
238 | self.crop_size, padvalue)
239 | new_img = new_img.transpose((2, 0, 1))
240 | new_img = np.expand_dims(new_img, axis=0)
241 | new_img = torch.from_numpy(new_img)
242 | preds = self.inference(config, model, new_img, flip)
243 | preds = preds[:, :, 0:height, 0:width]
244 | else:
245 | if height < self.crop_size[0] or width < self.crop_size[1]:
246 | new_img = self.pad_image(new_img, height, width,
247 | self.crop_size, padvalue)
248 | new_h, new_w = new_img.shape[:-1]
249 | rows = np.int(np.ceil(1.0 * (new_h -
250 | self.crop_size[0]) / stride_h)) + 1
251 | cols = np.int(np.ceil(1.0 * (new_w -
252 | self.crop_size[1]) / stride_w)) + 1
253 | preds = torch.zeros([1, self.num_classes,
254 | new_h, new_w]).cuda()
255 | count = torch.zeros([1, 1, new_h, new_w]).cuda()
256 |
257 | for r in range(rows):
258 | for c in range(cols):
259 | h0 = r * stride_h
260 | w0 = c * stride_w
261 | h1 = min(h0 + self.crop_size[0], new_h)
262 | w1 = min(w0 + self.crop_size[1], new_w)
263 | crop_img = new_img[h0:h1, w0:w1, :]
264 | if h1 == new_h or w1 == new_w:
265 | crop_img = self.pad_image(crop_img,
266 | h1-h0,
267 | w1-w0,
268 | self.crop_size,
269 | padvalue)
270 | crop_img = crop_img.transpose((2, 0, 1))
271 | crop_img = np.expand_dims(crop_img, axis=0)
272 | crop_img = torch.from_numpy(crop_img)
273 | pred = self.inference(config, model, crop_img, flip)
274 | preds[:, :, h0:h1, w0:w1] += pred[:, :, 0:h1-h0, 0:w1-w0]
275 | count[:, :, h0:h1, w0:w1] += 1
276 | preds = preds / count
277 | preds = preds[:, :, :height, :width]
278 |
279 | preds = F.interpolate(
280 | preds, (ori_height, ori_width),
281 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
282 | )
283 | final_pred += preds
284 | return final_pred
285 |
--------------------------------------------------------------------------------
/lib/datasets/cityscapes.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import os
8 |
9 | import cv2
10 | import numpy as np
11 | from PIL import Image
12 |
13 | import torch
14 | from torch.nn import functional as F
15 |
16 | from .base_dataset import BaseDataset
17 |
18 | class Cityscapes(BaseDataset):
19 | def __init__(self,
20 | root,
21 | list_path,
22 | num_samples=None,
23 | num_classes=19,
24 | multi_scale=True,
25 | flip=True,
26 | ignore_label=-1,
27 | base_size=2048,
28 | crop_size=(512, 1024),
29 | downsample_rate=1,
30 | scale_factor=16,
31 | mean=[0.485, 0.456, 0.406],
32 | std=[0.229, 0.224, 0.225]):
33 |
34 | super(Cityscapes, self).__init__(ignore_label, base_size,
35 | crop_size, downsample_rate, scale_factor, mean, std,)
36 |
37 | self.root = root
38 | self.list_path = list_path
39 | self.num_classes = num_classes
40 |
41 | self.multi_scale = multi_scale
42 | self.flip = flip
43 |
44 | self.img_list = [line.strip().split() for line in open(root+list_path)]
45 |
46 | self.files = self.read_files()
47 | if num_samples:
48 | self.files = self.files[:num_samples]
49 |
50 | self.label_mapping = {-1: ignore_label, 0: ignore_label,
51 | 1: ignore_label, 2: ignore_label,
52 | 3: ignore_label, 4: ignore_label,
53 | 5: ignore_label, 6: ignore_label,
54 | 7: 0, 8: 1, 9: ignore_label,
55 | 10: ignore_label, 11: 2, 12: 3,
56 | 13: 4, 14: ignore_label, 15: ignore_label,
57 | 16: ignore_label, 17: 5, 18: ignore_label,
58 | 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11,
59 | 25: 12, 26: 13, 27: 14, 28: 15,
60 | 29: ignore_label, 30: ignore_label,
61 | 31: 16, 32: 17, 33: 18}
62 | self.class_weights = torch.FloatTensor([0.8373, 0.918, 0.866, 1.0345,
63 | 1.0166, 0.9969, 0.9754, 1.0489,
64 | 0.8786, 1.0023, 0.9539, 0.9843,
65 | 1.1116, 0.9037, 1.0865, 1.0955,
66 | 1.0865, 1.1529, 1.0507]).cuda()
67 |
68 | def read_files(self):
69 | files = []
70 | if 'test' in self.list_path:
71 | for item in self.img_list:
72 | image_path = item
73 | name = os.path.splitext(os.path.basename(image_path[0]))[0]
74 | files.append({
75 | "img": image_path[0],
76 | "name": name,
77 | })
78 | else:
79 | for item in self.img_list:
80 | image_path, label_path = item
81 | name = os.path.splitext(os.path.basename(label_path))[0]
82 | files.append({
83 | "img": image_path,
84 | "label": label_path,
85 | "name": name,
86 | "weight": 1
87 | })
88 | return files
89 |
90 | def convert_label(self, label, inverse=False):
91 | temp = label.copy()
92 | if inverse:
93 | for v, k in self.label_mapping.items():
94 | label[temp == k] = v
95 | else:
96 | for k, v in self.label_mapping.items():
97 | label[temp == k] = v
98 | return label
99 |
100 | def __getitem__(self, index):
101 | item = self.files[index]
102 | name = item["name"]
103 | # image = cv2.imread(os.path.join(self.root,'cityscapes',item["img"]),
104 | # cv2.IMREAD_COLOR)
105 | image = cv2.imread(os.path.join(self.root, item["img"]),
106 | cv2.IMREAD_COLOR)
107 | size = image.shape
108 |
109 | if 'test' in self.list_path:
110 | image = self.input_transform(image)
111 | image = image.transpose((2, 0, 1))
112 |
113 | return image.copy(), np.array(size), name
114 |
115 | # label = cv2.imread(os.path.join(self.root,'cityscapes',item["label"]),
116 | # cv2.IMREAD_GRAYSCALE)
117 | label = cv2.imread(os.path.join(self.root, item["label"]),
118 | cv2.IMREAD_GRAYSCALE)
119 | label = self.convert_label(label)
120 |
121 | image, label = self.gen_sample(image, label,
122 | self.multi_scale, self.flip)
123 |
124 | return image.copy(), label.copy(), np.array(size), name
125 |
126 | def multi_scale_inference(self, config, model, image, scales=[1], flip=False):
127 | batch, _, ori_height, ori_width = image.size()
128 | assert batch == 1, "only supporting batchsize 1."
129 | image = image.numpy()[0].transpose((1,2,0)).copy()
130 | stride_h = np.int(self.crop_size[0] * 1.0)
131 | stride_w = np.int(self.crop_size[1] * 1.0)
132 | final_pred = torch.zeros([1, self.num_classes,
133 | ori_height,ori_width]).cuda()
134 | for scale in scales:
135 | new_img = self.multi_scale_aug(image=image,
136 | rand_scale=scale,
137 | rand_crop=False)
138 | height, width = new_img.shape[:-1]
139 |
140 | if scale <= 1.0:
141 | new_img = new_img.transpose((2, 0, 1))
142 | new_img = np.expand_dims(new_img, axis=0)
143 | new_img = torch.from_numpy(new_img)
144 | preds = self.inference(config, model, new_img, flip)
145 | preds = preds[:, :, 0:height, 0:width]
146 | else:
147 | new_h, new_w = new_img.shape[:-1]
148 | rows = np.int(np.ceil(1.0 * (new_h -
149 | self.crop_size[0]) / stride_h)) + 1
150 | cols = np.int(np.ceil(1.0 * (new_w -
151 | self.crop_size[1]) / stride_w)) + 1
152 | preds = torch.zeros([1, self.num_classes,
153 | new_h,new_w]).cuda()
154 | count = torch.zeros([1,1, new_h, new_w]).cuda()
155 |
156 | for r in range(rows):
157 | for c in range(cols):
158 | h0 = r * stride_h
159 | w0 = c * stride_w
160 | h1 = min(h0 + self.crop_size[0], new_h)
161 | w1 = min(w0 + self.crop_size[1], new_w)
162 | h0 = max(int(h1 - self.crop_size[0]), 0)
163 | w0 = max(int(w1 - self.crop_size[1]), 0)
164 | crop_img = new_img[h0:h1, w0:w1, :]
165 | crop_img = crop_img.transpose((2, 0, 1))
166 | crop_img = np.expand_dims(crop_img, axis=0)
167 | crop_img = torch.from_numpy(crop_img)
168 | pred = self.inference(config, model, crop_img, flip)
169 | preds[:,:,h0:h1,w0:w1] += pred[:,:, 0:h1-h0, 0:w1-w0]
170 | count[:,:,h0:h1,w0:w1] += 1
171 | preds = preds / count
172 | preds = preds[:,:,:height,:width]
173 |
174 | preds = F.interpolate(
175 | preds, (ori_height, ori_width),
176 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
177 | )
178 | final_pred += preds
179 | return final_pred
180 |
181 | def get_palette(self, n):
182 | palette = [0] * (n * 3)
183 | for j in range(0, n):
184 | lab = j
185 | palette[j * 3 + 0] = 0
186 | palette[j * 3 + 1] = 0
187 | palette[j * 3 + 2] = 0
188 | i = 0
189 | while lab:
190 | palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
191 | palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
192 | palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
193 | i += 1
194 | lab >>= 3
195 | return palette
196 |
197 | def save_pred(self, preds, sv_path, name):
198 | palette = self.get_palette(256)
199 | preds = np.asarray(np.argmax(preds.cpu(), axis=1), dtype=np.uint8)
200 | for i in range(preds.shape[0]):
201 | pred = self.convert_label(preds[i], inverse=True)
202 | save_img = Image.fromarray(pred)
203 | save_img.putpalette(palette)
204 | save_img.save(os.path.join(sv_path, name[i]+'.png'))
205 |
206 |
207 |
208 |
--------------------------------------------------------------------------------
/lib/datasets/cocostuff.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import os
8 |
9 | import cv2
10 | import numpy as np
11 |
12 | import torch
13 | from torch.nn import functional as F
14 | from PIL import Image
15 |
16 | from .base_dataset import BaseDataset
17 |
18 |
19 | class COCOStuff(BaseDataset):
20 | def __init__(self,
21 | root,
22 | list_path,
23 | num_samples=None,
24 | num_classes=171,
25 | multi_scale=True,
26 | flip=True,
27 | ignore_label=-1,
28 | base_size=520,
29 | crop_size=(520, 520),
30 | downsample_rate=1,
31 | scale_factor=11,
32 | mean=[0.485, 0.456, 0.406],
33 | std=[0.229, 0.224, 0.225]):
34 |
35 | super(COCOStuff, self).__init__(ignore_label, base_size,
36 | crop_size, downsample_rate, scale_factor, mean, std)
37 |
38 | self.root = root
39 | self.num_classes = num_classes
40 | self.list_path = list_path
41 | self.class_weights = None
42 |
43 | self.multi_scale = multi_scale
44 | self.flip = flip
45 | self.crop_size = crop_size
46 | self.img_list = [line.strip().split() for line in open(root+list_path)]
47 |
48 | self.files = self.read_files()
49 | if num_samples:
50 | self.files = self.files[:num_samples]
51 | self.mapping = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
52 | 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39,
53 | 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
54 | 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77,
55 | 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96,
56 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
57 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128,
58 | 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
59 | 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
60 | 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176,
61 | 177, 178, 179, 180, 181, 182]
62 |
63 | def read_files(self):
64 | files = []
65 | for item in self.img_list:
66 | image_path, label_path = item
67 | name = os.path.splitext(os.path.basename(label_path))[0]
68 | sample = {
69 | 'img': image_path,
70 | 'label': label_path,
71 | 'name': name
72 | }
73 | files.append(sample)
74 | return files
75 |
76 | def encode_label(self, labelmap):
77 | ret = np.ones_like(labelmap) * 255
78 | for idx, label in enumerate(self.mapping):
79 | ret[labelmap == label] = idx
80 |
81 | return ret
82 |
83 | def resize_image(self, image, label, size):
84 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
85 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST)
86 | return image, label
87 |
88 | def __getitem__(self, index):
89 | item = self.files[index]
90 | name = item["name"]
91 | image_path = os.path.join(self.root, item['img'])
92 | label_path = os.path.join(self.root, item['label'])
93 | image = cv2.imread(
94 | image_path,
95 | cv2.IMREAD_COLOR
96 | )
97 | label = np.array(
98 | Image.open(label_path).convert('P')
99 | )
100 | label = self.encode_label(label)
101 | label = self.reduce_zero_label(label)
102 | size = label.shape
103 |
104 | if 'testval' in self.list_path:
105 | image, border_padding = self.resize_short_length(
106 | image,
107 | short_length=self.base_size,
108 | fit_stride=8,
109 | return_padding=True
110 | )
111 | image = self.input_transform(image)
112 | image = image.transpose((2, 0, 1))
113 |
114 | return image.copy(), label.copy(), np.array(size), name, border_padding
115 |
116 | if 'val' in self.list_path:
117 | image, label = self.resize_short_length(
118 | image,
119 | label=label,
120 | short_length=self.base_size,
121 | fit_stride=8
122 | )
123 | image, label = self.rand_crop(image, label)
124 | image = self.input_transform(image)
125 | image = image.transpose((2, 0, 1))
126 |
127 | return image.copy(), label.copy(), np.array(size), name
128 |
129 | image, label = self.resize_short_length(image, label, short_length=self.base_size)
130 | image, label = self.gen_sample(image, label, self.multi_scale, self.flip)
131 |
132 | return image.copy(), label.copy(), np.array(size), name
--------------------------------------------------------------------------------
/lib/datasets/lip.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import os
8 |
9 | import cv2
10 | import numpy as np
11 |
12 | import torch
13 | from torch.nn import functional as F
14 | from PIL import Image
15 |
16 | from .base_dataset import BaseDataset
17 |
18 |
19 | class LIP(BaseDataset):
20 | def __init__(self,
21 | root,
22 | list_path,
23 | num_samples=None,
24 | num_classes=20,
25 | multi_scale=True,
26 | flip=True,
27 | ignore_label=-1,
28 | base_size=473,
29 | crop_size=(473, 473),
30 | downsample_rate=1,
31 | scale_factor=11,
32 | mean=[0.485, 0.456, 0.406],
33 | std=[0.229, 0.224, 0.225]):
34 |
35 | super(LIP, self).__init__(ignore_label, base_size,
36 | crop_size, downsample_rate, scale_factor, mean, std)
37 |
38 | self.root = root
39 | self.num_classes = num_classes
40 | self.list_path = list_path
41 | self.class_weights = None
42 |
43 | self.multi_scale = multi_scale
44 | self.flip = flip
45 | self.img_list = [line.strip().split() for line in open(root+list_path)]
46 |
47 | self.files = self.read_files()
48 | if num_samples:
49 | self.files = self.files[:num_samples]
50 |
51 | def read_files(self):
52 | files = []
53 | for item in self.img_list:
54 | if 'train' in self.list_path:
55 | image_path, label_path, _ = item
56 | name = os.path.splitext(os.path.basename(label_path))[0]
57 | sample = {"img": image_path,
58 | "label": label_path,
59 | "name": name, }
60 | elif 'val' in self.list_path:
61 | image_path, label_path = item
62 | name = os.path.splitext(os.path.basename(label_path))[0]
63 | sample = {"img": image_path,
64 | "label": label_path,
65 | "name": name, }
66 | else:
67 | raise NotImplementedError('Unknown subset.')
68 | files.append(sample)
69 | return files
70 |
71 | def resize_image(self, image, label, size):
72 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
73 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST)
74 | return image, label
75 |
76 | def __getitem__(self, index):
77 | item = self.files[index]
78 | name = item["name"]
79 | image_path = os.path.join(self.root, item['img'])
80 | label_path = os.path.join(self.root, item['label'])
81 | image = cv2.imread(
82 | image_path,
83 | cv2.IMREAD_COLOR
84 | )
85 | label = np.array(
86 | Image.open(label_path).convert('P')
87 | )
88 |
89 | size = label.shape
90 | if 'testval' in self.list_path:
91 | image = cv2.resize(image, self.crop_size,
92 | interpolation=cv2.INTER_LINEAR)
93 | image = self.input_transform(image)
94 | image = image.transpose((2, 0, 1))
95 |
96 | return image.copy(), label.copy(), np.array(size), name
97 |
98 | if self.flip:
99 | flip = np.random.choice(2) * 2 - 1
100 | image = image[:, ::flip, :]
101 | label = label[:, ::flip]
102 |
103 | if flip == -1:
104 | right_idx = [15, 17, 19]
105 | left_idx = [14, 16, 18]
106 | for i in range(0, 3):
107 | right_pos = np.where(label == right_idx[i])
108 | left_pos = np.where(label == left_idx[i])
109 | label[right_pos[0], right_pos[1]] = left_idx[i]
110 | label[left_pos[0], left_pos[1]] = right_idx[i]
111 |
112 | image, label = self.resize_image(image, label, self.crop_size)
113 | image, label = self.gen_sample(image, label,
114 | self.multi_scale, False)
115 |
116 | return image.copy(), label.copy(), np.array(size), name
117 |
118 | def inference(self, config, model, image, flip):
119 | size = image.size()
120 | pred = model(image)
121 | if config.MODEL.NUM_OUTPUTS > 1:
122 | pred = pred[config.TEST.OUTPUT_INDEX]
123 |
124 | pred = F.interpolate(
125 | input=pred, size=size[-2:],
126 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
127 | )
128 |
129 | if flip:
130 | flip_img = image.numpy()[:, :, :, ::-1]
131 | flip_output = model(torch.from_numpy(flip_img.copy()))
132 |
133 | if config.MODEL.NUM_OUTPUTS > 1:
134 | flip_output = flip_output[config.TEST.OUTPUT_INDEX]
135 |
136 | flip_output = F.interpolate(
137 | input=flip_output, size=size[-2:],
138 | mode='bilinear', align_corners=config.MODEL.ALIGN_CORNERS
139 | )
140 |
141 | flip_output = flip_output.cpu()
142 | flip_pred = flip_output.cpu().numpy().copy()
143 | flip_pred[:, 14, :, :] = flip_output[:, 15, :, :]
144 | flip_pred[:, 15, :, :] = flip_output[:, 14, :, :]
145 | flip_pred[:, 16, :, :] = flip_output[:, 17, :, :]
146 | flip_pred[:, 17, :, :] = flip_output[:, 16, :, :]
147 | flip_pred[:, 18, :, :] = flip_output[:, 19, :, :]
148 | flip_pred[:, 19, :, :] = flip_output[:, 18, :, :]
149 | flip_pred = torch.from_numpy(
150 | flip_pred[:, :, :, ::-1].copy()).cuda()
151 | pred += flip_pred
152 | pred = pred * 0.5
153 | return pred.exp()
154 |
--------------------------------------------------------------------------------
/lib/datasets/pascal_ctx.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # Referring to the implementation in
6 | # https://github.com/zhanghang1989/PyTorch-Encoding
7 | # ------------------------------------------------------------------------------
8 |
9 | import os
10 |
11 | import cv2
12 | import numpy as np
13 |
14 | import torch
15 | from torch.nn import functional as F
16 | from PIL import Image
17 |
18 | from .base_dataset import BaseDataset
19 |
20 | class PASCALContext(BaseDataset):
21 | def __init__(self,
22 | root,
23 | list_path,
24 | num_samples=None,
25 | num_classes=59,
26 | multi_scale=True,
27 | flip=True,
28 | ignore_label=-1,
29 | base_size=520,
30 | crop_size=(480, 480),
31 | downsample_rate=1,
32 | scale_factor=16,
33 | mean=[0.485, 0.456, 0.406],
34 | std=[0.229, 0.224, 0.225]):
35 |
36 | super(PASCALContext, self).__init__(ignore_label, base_size,
37 | crop_size, downsample_rate, scale_factor, mean, std)
38 |
39 | self.root = root
40 | self.num_classes = num_classes
41 | self.list_path = list_path
42 | self.class_weights = None
43 |
44 | self.multi_scale = multi_scale
45 | self.flip = flip
46 | self.crop_size = crop_size
47 | self.img_list = [line.strip().split() for line in open(root+list_path)]
48 |
49 | self.files = self.read_files()
50 | if num_samples:
51 | self.files = self.files[:num_samples]
52 |
53 | def read_files(self):
54 | files = []
55 | for item in self.img_list:
56 | image_path, label_path = item
57 | name = os.path.splitext(os.path.basename(label_path))[0]
58 | sample = {
59 | 'img': image_path,
60 | 'label': label_path,
61 | 'name': name
62 | }
63 | files.append(sample)
64 | return files
65 |
66 | def resize_image(self, image, label, size):
67 | image = cv2.resize(image, size, interpolation=cv2.INTER_LINEAR)
68 | label = cv2.resize(label, size, interpolation=cv2.INTER_NEAREST)
69 | return image, label
70 |
71 | def __getitem__(self, index):
72 | item = self.files[index]
73 | name = item["name"]
74 | image_path = os.path.join(self.root, item['img'])
75 | label_path = os.path.join(self.root, item['label'])
76 | image = cv2.imread(
77 | image_path,
78 | cv2.IMREAD_COLOR
79 | )
80 | label = np.array(
81 | Image.open(label_path).convert('P')
82 | )
83 | if self.num_classes == 59:
84 | label = self.reduce_zero_label(label)
85 | size = label.shape
86 |
87 | if 'testval' in self.list_path:
88 | image, border_padding = self.resize_short_length(
89 | image,
90 | short_length=self.base_size,
91 | fit_stride=8,
92 | return_padding=True
93 | )
94 | image = self.input_transform(image)
95 | image = image.transpose((2, 0, 1))
96 |
97 | return image.copy(), label.copy(), np.array(size), name, border_padding
98 |
99 | if 'val' in self.list_path:
100 | image, label = self.resize_short_length(
101 | image,
102 | label=label,
103 | short_length=self.base_size,
104 | fit_stride=8
105 | )
106 | image, label = self.rand_crop(image, label)
107 | image = self.input_transform(image)
108 | image = image.transpose((2, 0, 1))
109 |
110 | return image.copy(), label.copy(), np.array(size), name
111 |
112 | image, label = self.resize_short_length(image, label, short_length=self.base_size)
113 | image, label = self.gen_sample(image, label, self.multi_scale, self.flip)
114 |
115 | return image.copy(), label.copy(), np.array(size), name
--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import models.seg_hrnet
12 | import models.seg_hrnet_ocr
--------------------------------------------------------------------------------
/lib/models/bn_helper.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import functools
3 |
4 | if torch.__version__.startswith('0'):
5 | from .sync_bn.inplace_abn.bn import InPlaceABNSync
6 | BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
7 | BatchNorm2d_class = InPlaceABNSync
8 | relu_inplace = False
9 | else:
10 | BatchNorm2d_class = BatchNorm2d = torch.nn.SyncBatchNorm
11 | relu_inplace = True
--------------------------------------------------------------------------------
/lib/models/sync_bn/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | BSD 3-Clause License
3 |
4 | Copyright (c) 2017, mapillary
5 | All rights reserved.
6 |
7 | Redistribution and use in source and binary forms, with or without
8 | modification, are permitted provided that the following conditions are met:
9 |
10 | * Redistributions of source code must retain the above copyright notice, this
11 | list of conditions and the following disclaimer.
12 |
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 | this list of conditions and the following disclaimer in the documentation
15 | and/or other materials provided with the distribution.
16 |
17 | * Neither the name of the copyright holder nor the names of its
18 | contributors may be used to endorse or promote products derived from
19 | this software without specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
--------------------------------------------------------------------------------
/lib/models/sync_bn/__init__.py:
--------------------------------------------------------------------------------
1 | from .inplace_abn import bn
--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/__init__.py:
--------------------------------------------------------------------------------
1 | from .bn import ABN, InPlaceABN, InPlaceABNSync
2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
3 |
--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/bn.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as functional
5 |
6 | try:
7 | from queue import Queue
8 | except ImportError:
9 | from Queue import Queue
10 |
11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
12 | sys.path.append(BASE_DIR)
13 | sys.path.append(os.path.join(BASE_DIR, '../src'))
14 | from functions import *
15 |
16 |
17 | class ABN(nn.Module):
18 | """Activated Batch Normalization
19 |
20 | This gathers a `BatchNorm2d` and an activation function in a single module
21 | """
22 |
23 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
24 | """Creates an Activated Batch Normalization module
25 |
26 | Parameters
27 | ----------
28 | num_features : int
29 | Number of feature channels in the input and output.
30 | eps : float
31 | Small constant to prevent numerical issues.
32 | momentum : float
33 | Momentum factor applied to compute running statistics as.
34 | affine : bool
35 | If `True` apply learned scale and shift transformation after normalization.
36 | activation : str
37 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
38 | slope : float
39 | Negative slope for the `leaky_relu` activation.
40 | """
41 | super(ABN, self).__init__()
42 | self.num_features = num_features
43 | self.affine = affine
44 | self.eps = eps
45 | self.momentum = momentum
46 | self.activation = activation
47 | self.slope = slope
48 | if self.affine:
49 | self.weight = nn.Parameter(torch.ones(num_features))
50 | self.bias = nn.Parameter(torch.zeros(num_features))
51 | else:
52 | self.register_parameter('weight', None)
53 | self.register_parameter('bias', None)
54 | self.register_buffer('running_mean', torch.zeros(num_features))
55 | self.register_buffer('running_var', torch.ones(num_features))
56 | self.reset_parameters()
57 |
58 | def reset_parameters(self):
59 | nn.init.constant_(self.running_mean, 0)
60 | nn.init.constant_(self.running_var, 1)
61 | if self.affine:
62 | nn.init.constant_(self.weight, 1)
63 | nn.init.constant_(self.bias, 0)
64 |
65 | def forward(self, x):
66 | x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
67 | self.training, self.momentum, self.eps)
68 |
69 | if self.activation == ACT_RELU:
70 | return functional.relu(x, inplace=True)
71 | elif self.activation == ACT_LEAKY_RELU:
72 | return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
73 | elif self.activation == ACT_ELU:
74 | return functional.elu(x, inplace=True)
75 | else:
76 | return x
77 |
78 | def __repr__(self):
79 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
80 | ' affine={affine}, activation={activation}'
81 | if self.activation == "leaky_relu":
82 | rep += ', slope={slope})'
83 | else:
84 | rep += ')'
85 | return rep.format(name=self.__class__.__name__, **self.__dict__)
86 |
87 |
88 | class InPlaceABN(ABN):
89 | """InPlace Activated Batch Normalization"""
90 |
91 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
92 | """Creates an InPlace Activated Batch Normalization module
93 |
94 | Parameters
95 | ----------
96 | num_features : int
97 | Number of feature channels in the input and output.
98 | eps : float
99 | Small constant to prevent numerical issues.
100 | momentum : float
101 | Momentum factor applied to compute running statistics as.
102 | affine : bool
103 | If `True` apply learned scale and shift transformation after normalization.
104 | activation : str
105 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
106 | slope : float
107 | Negative slope for the `leaky_relu` activation.
108 | """
109 | super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
110 |
111 | def forward(self, x):
112 | return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
113 | self.training, self.momentum, self.eps, self.activation, self.slope)
114 |
115 |
116 | class InPlaceABNSync(ABN):
117 | """InPlace Activated Batch Normalization with cross-GPU synchronization
118 |
119 | This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DataParallel`.
120 | """
121 |
122 | def __init__(self, num_features, devices=None, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu",
123 | slope=0.01):
124 | """Creates a synchronized, InPlace Activated Batch Normalization module
125 |
126 | Parameters
127 | ----------
128 | num_features : int
129 | Number of feature channels in the input and output.
130 | devices : list of int or None
131 | IDs of the GPUs that will run the replicas of this module.
132 | eps : float
133 | Small constant to prevent numerical issues.
134 | momentum : float
135 | Momentum factor applied to compute running statistics as.
136 | affine : bool
137 | If `True` apply learned scale and shift transformation after normalization.
138 | activation : str
139 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
140 | slope : float
141 | Negative slope for the `leaky_relu` activation.
142 | """
143 | super(InPlaceABNSync, self).__init__(num_features, eps, momentum, affine, activation, slope)
144 | self.devices = devices if devices else list(range(torch.cuda.device_count()))
145 |
146 | # Initialize queues
147 | self.worker_ids = self.devices[1:]
148 | self.master_queue = Queue(len(self.worker_ids))
149 | self.worker_queues = [Queue(1) for _ in self.worker_ids]
150 |
151 | def forward(self, x):
152 | if x.get_device() == self.devices[0]:
153 | # Master mode
154 | extra = {
155 | "is_master": True,
156 | "master_queue": self.master_queue,
157 | "worker_queues": self.worker_queues,
158 | "worker_ids": self.worker_ids
159 | }
160 | else:
161 | # Worker mode
162 | extra = {
163 | "is_master": False,
164 | "master_queue": self.master_queue,
165 | "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())]
166 | }
167 |
168 | return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
169 | extra, self.training, self.momentum, self.eps, self.activation, self.slope)
170 |
171 | def __repr__(self):
172 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
173 | ' affine={affine}, devices={devices}, activation={activation}'
174 | if self.activation == "leaky_relu":
175 | rep += ', slope={slope})'
176 | else:
177 | rep += ')'
178 | return rep.format(name=self.__class__.__name__, **self.__dict__)
179 |
--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/functions.py:
--------------------------------------------------------------------------------
1 | from os import path
2 |
3 | import torch.autograd as autograd
4 | import torch.cuda.comm as comm
5 | from torch.autograd.function import once_differentiable
6 | from torch.utils.cpp_extension import load
7 |
8 | _src_path = path.join(path.dirname(path.abspath(__file__)), "src")
9 | _backend = load(name="inplace_abn",
10 | extra_cflags=["-O3"],
11 | sources=[path.join(_src_path, f) for f in [
12 | "inplace_abn.cpp",
13 | "inplace_abn_cpu.cpp",
14 | "inplace_abn_cuda.cu"
15 | ]],
16 | extra_cuda_cflags=["--expt-extended-lambda"])
17 |
18 | # Activation names
19 | ACT_RELU = "relu"
20 | ACT_LEAKY_RELU = "leaky_relu"
21 | ACT_ELU = "elu"
22 | ACT_NONE = "none"
23 |
24 |
25 | def _check(fn, *args, **kwargs):
26 | success = fn(*args, **kwargs)
27 | if not success:
28 | raise RuntimeError("CUDA Error encountered in {}".format(fn))
29 |
30 |
31 | def _broadcast_shape(x):
32 | out_size = []
33 | for i, s in enumerate(x.size()):
34 | if i != 1:
35 | out_size.append(1)
36 | else:
37 | out_size.append(s)
38 | return out_size
39 |
40 |
41 | def _reduce(x):
42 | if len(x.size()) == 2:
43 | return x.sum(dim=0)
44 | else:
45 | n, c = x.size()[0:2]
46 | return x.contiguous().view((n, c, -1)).sum(2).sum(0)
47 |
48 |
49 | def _count_samples(x):
50 | count = 1
51 | for i, s in enumerate(x.size()):
52 | if i != 1:
53 | count *= s
54 | return count
55 |
56 |
57 | def _act_forward(ctx, x):
58 | if ctx.activation == ACT_LEAKY_RELU:
59 | _backend.leaky_relu_forward(x, ctx.slope)
60 | elif ctx.activation == ACT_ELU:
61 | _backend.elu_forward(x)
62 | elif ctx.activation == ACT_NONE:
63 | pass
64 |
65 |
66 | def _act_backward(ctx, x, dx):
67 | if ctx.activation == ACT_LEAKY_RELU:
68 | _backend.leaky_relu_backward(x, dx, ctx.slope)
69 | elif ctx.activation == ACT_ELU:
70 | _backend.elu_backward(x, dx)
71 | elif ctx.activation == ACT_NONE:
72 | pass
73 |
74 |
75 | class InPlaceABN(autograd.Function):
76 | @staticmethod
77 | def forward(ctx, x, weight, bias, running_mean, running_var,
78 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
79 | # Save context
80 | ctx.training = training
81 | ctx.momentum = momentum
82 | ctx.eps = eps
83 | ctx.activation = activation
84 | ctx.slope = slope
85 | ctx.affine = weight is not None and bias is not None
86 |
87 | # Prepare inputs
88 | count = _count_samples(x)
89 | x = x.contiguous()
90 | weight = weight.contiguous() if ctx.affine else x.new_empty(0)
91 | bias = bias.contiguous() if ctx.affine else x.new_empty(0)
92 |
93 | if ctx.training:
94 | mean, var = _backend.mean_var(x)
95 |
96 | # Update running stats
97 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
98 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
99 |
100 | # Mark in-place modified tensors
101 | ctx.mark_dirty(x, running_mean, running_var)
102 | else:
103 | mean, var = running_mean.contiguous(), running_var.contiguous()
104 | ctx.mark_dirty(x)
105 |
106 | # BN forward + activation
107 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
108 | _act_forward(ctx, x)
109 |
110 | # Output
111 | ctx.var = var
112 | ctx.save_for_backward(x, var, weight, bias)
113 | return x
114 |
115 | @staticmethod
116 | @once_differentiable
117 | def backward(ctx, dz):
118 | z, var, weight, bias = ctx.saved_tensors
119 | dz = dz.contiguous()
120 |
121 | # Undo activation
122 | _act_backward(ctx, z, dz)
123 |
124 | if ctx.training:
125 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
126 | else:
127 | # TODO: implement simplified CUDA backward for inference mode
128 | edz = dz.new_zeros(dz.size(1))
129 | eydz = dz.new_zeros(dz.size(1))
130 |
131 | dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
132 | dweight = dweight if ctx.affine else None
133 | dbias = dbias if ctx.affine else None
134 |
135 | return dx, dweight, dbias, None, None, None, None, None, None, None
136 |
137 |
138 | class InPlaceABNSync(autograd.Function):
139 | @classmethod
140 | def forward(cls, ctx, x, weight, bias, running_mean, running_var,
141 | extra, training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
142 | # Save context
143 | cls._parse_extra(ctx, extra)
144 | ctx.training = training
145 | ctx.momentum = momentum
146 | ctx.eps = eps
147 | ctx.activation = activation
148 | ctx.slope = slope
149 | ctx.affine = weight is not None and bias is not None
150 |
151 | # Prepare inputs
152 | count = _count_samples(x) * (ctx.master_queue.maxsize + 1)
153 | x = x.contiguous()
154 | weight = weight.contiguous() if ctx.affine else x.new_empty(0)
155 | bias = bias.contiguous() if ctx.affine else x.new_empty(0)
156 |
157 | if ctx.training:
158 | mean, var = _backend.mean_var(x)
159 |
160 | if ctx.is_master:
161 | means, vars = [mean.unsqueeze(0)], [var.unsqueeze(0)]
162 | for _ in range(ctx.master_queue.maxsize):
163 | mean_w, var_w = ctx.master_queue.get()
164 | ctx.master_queue.task_done()
165 | means.append(mean_w.unsqueeze(0))
166 | vars.append(var_w.unsqueeze(0))
167 |
168 | means = comm.gather(means)
169 | vars = comm.gather(vars)
170 |
171 | mean = means.mean(0)
172 | var = (vars + (mean - means) ** 2).mean(0)
173 |
174 | tensors = comm.broadcast_coalesced((mean, var), [mean.get_device()] + ctx.worker_ids)
175 | for ts, queue in zip(tensors[1:], ctx.worker_queues):
176 | queue.put(ts)
177 | else:
178 | ctx.master_queue.put((mean, var))
179 | mean, var = ctx.worker_queue.get()
180 | ctx.worker_queue.task_done()
181 |
182 | # Update running stats
183 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
184 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
185 |
186 | # Mark in-place modified tensors
187 | ctx.mark_dirty(x, running_mean, running_var)
188 | else:
189 | mean, var = running_mean.contiguous(), running_var.contiguous()
190 | ctx.mark_dirty(x)
191 |
192 | # BN forward + activation
193 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
194 | _act_forward(ctx, x)
195 |
196 | # Output
197 | ctx.var = var
198 | ctx.save_for_backward(x, var, weight, bias)
199 | return x
200 |
201 | @staticmethod
202 | @once_differentiable
203 | def backward(ctx, dz):
204 | z, var, weight, bias = ctx.saved_tensors
205 | dz = dz.contiguous()
206 |
207 | # Undo activation
208 | _act_backward(ctx, z, dz)
209 |
210 | if ctx.training:
211 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
212 |
213 | if ctx.is_master:
214 | edzs, eydzs = [edz], [eydz]
215 | for _ in range(len(ctx.worker_queues)):
216 | edz_w, eydz_w = ctx.master_queue.get()
217 | ctx.master_queue.task_done()
218 | edzs.append(edz_w)
219 | eydzs.append(eydz_w)
220 |
221 | edz = comm.reduce_add(edzs) / (ctx.master_queue.maxsize + 1)
222 | eydz = comm.reduce_add(eydzs) / (ctx.master_queue.maxsize + 1)
223 |
224 | tensors = comm.broadcast_coalesced((edz, eydz), [edz.get_device()] + ctx.worker_ids)
225 | for ts, queue in zip(tensors[1:], ctx.worker_queues):
226 | queue.put(ts)
227 | else:
228 | ctx.master_queue.put((edz, eydz))
229 | edz, eydz = ctx.worker_queue.get()
230 | ctx.worker_queue.task_done()
231 | else:
232 | edz = dz.new_zeros(dz.size(1))
233 | eydz = dz.new_zeros(dz.size(1))
234 |
235 | dx, dweight, dbias = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
236 | dweight = dweight if ctx.affine else None
237 | dbias = dbias if ctx.affine else None
238 |
239 | return dx, dweight, dbias, None, None, None, None, None, None, None, None
240 |
241 | @staticmethod
242 | def _parse_extra(ctx, extra):
243 | ctx.is_master = extra["is_master"]
244 | if ctx.is_master:
245 | ctx.master_queue = extra["master_queue"]
246 | ctx.worker_queues = extra["worker_queues"]
247 | ctx.worker_ids = extra["worker_ids"]
248 | else:
249 | ctx.master_queue = extra["master_queue"]
250 | ctx.worker_queue = extra["worker_queue"]
251 |
252 |
253 | inplace_abn = InPlaceABN.apply
254 | inplace_abn_sync = InPlaceABNSync.apply
255 |
256 | __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
257 |
--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/common.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | /*
6 | * General settings
7 | */
8 | const int WARP_SIZE = 32;
9 | const int MAX_BLOCK_SIZE = 512;
10 |
11 | template
12 | struct Pair {
13 | T v1, v2;
14 | __device__ Pair() {}
15 | __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
16 | __device__ Pair(T v) : v1(v), v2(v) {}
17 | __device__ Pair(int v) : v1(v), v2(v) {}
18 | __device__ Pair &operator+=(const Pair &a) {
19 | v1 += a.v1;
20 | v2 += a.v2;
21 | return *this;
22 | }
23 | };
24 |
25 | /*
26 | * Utility functions
27 | */
28 | template
29 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
30 | unsigned int mask = 0xffffffff) {
31 | #if CUDART_VERSION >= 9000
32 | return __shfl_xor_sync(mask, value, laneMask, width);
33 | #else
34 | return __shfl_xor(value, laneMask, width);
35 | #endif
36 | }
37 |
38 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
39 |
40 | static int getNumThreads(int nElem) {
41 | int threadSizes[5] = {32, 64, 128, 256, MAX_BLOCK_SIZE};
42 | for (int i = 0; i != 5; ++i) {
43 | if (nElem <= threadSizes[i]) {
44 | return threadSizes[i];
45 | }
46 | }
47 | return MAX_BLOCK_SIZE;
48 | }
49 |
50 | template
51 | static __device__ __forceinline__ T warpSum(T val) {
52 | #if __CUDA_ARCH__ >= 300
53 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
54 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
55 | }
56 | #else
57 | __shared__ T values[MAX_BLOCK_SIZE];
58 | values[threadIdx.x] = val;
59 | __threadfence_block();
60 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
61 | for (int i = 1; i < WARP_SIZE; i++) {
62 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
63 | }
64 | #endif
65 | return val;
66 | }
67 |
68 | template
69 | static __device__ __forceinline__ Pair warpSum(Pair value) {
70 | value.v1 = warpSum(value.v1);
71 | value.v2 = warpSum(value.v2);
72 | return value;
73 | }
74 |
75 | template
76 | __device__ T reduce(Op op, int plane, int N, int C, int S) {
77 | T sum = (T)0;
78 | for (int batch = 0; batch < N; ++batch) {
79 | for (int x = threadIdx.x; x < S; x += blockDim.x) {
80 | sum += op(batch, plane, x);
81 | }
82 | }
83 |
84 | // sum over NumThreads within a warp
85 | sum = warpSum(sum);
86 |
87 | // 'transpose', and reduce within warp again
88 | __shared__ T shared[32];
89 | __syncthreads();
90 | if (threadIdx.x % WARP_SIZE == 0) {
91 | shared[threadIdx.x / WARP_SIZE] = sum;
92 | }
93 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
94 | // zero out the other entries in shared
95 | shared[threadIdx.x] = (T)0;
96 | }
97 | __syncthreads();
98 | if (threadIdx.x / WARP_SIZE == 0) {
99 | sum = warpSum(shared[threadIdx.x]);
100 | if (threadIdx.x == 0) {
101 | shared[0] = sum;
102 | }
103 | }
104 | __syncthreads();
105 |
106 | // Everyone picks it up, should be broadcast into the whole gradInput
107 | return shared[0];
108 | }
--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/inplace_abn.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 |
5 | #include "inplace_abn.h"
6 |
7 | std::vector mean_var(at::Tensor x) {
8 | if (x.is_cuda()) {
9 | return mean_var_cuda(x);
10 | } else {
11 | return mean_var_cpu(x);
12 | }
13 | }
14 |
15 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
16 | bool affine, float eps) {
17 | if (x.is_cuda()) {
18 | return forward_cuda(x, mean, var, weight, bias, affine, eps);
19 | } else {
20 | return forward_cpu(x, mean, var, weight, bias, affine, eps);
21 | }
22 | }
23 |
24 | std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
25 | bool affine, float eps) {
26 | if (z.is_cuda()) {
27 | return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
28 | } else {
29 | return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
30 | }
31 | }
32 |
33 | std::vector backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
34 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
35 | if (z.is_cuda()) {
36 | return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
37 | } else {
38 | return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
39 | }
40 | }
41 |
42 | void leaky_relu_forward(at::Tensor z, float slope) {
43 | at::leaky_relu_(z, slope);
44 | }
45 |
46 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
47 | if (z.is_cuda()) {
48 | return leaky_relu_backward_cuda(z, dz, slope);
49 | } else {
50 | return leaky_relu_backward_cpu(z, dz, slope);
51 | }
52 | }
53 |
54 | void elu_forward(at::Tensor z) {
55 | at::elu_(z);
56 | }
57 |
58 | void elu_backward(at::Tensor z, at::Tensor dz) {
59 | if (z.is_cuda()) {
60 | return elu_backward_cuda(z, dz);
61 | } else {
62 | return elu_backward_cpu(z, dz);
63 | }
64 | }
65 |
66 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
67 | m.def("mean_var", &mean_var, "Mean and variance computation");
68 | m.def("forward", &forward, "In-place forward computation");
69 | m.def("edz_eydz", &edz_eydz, "First part of backward computation");
70 | m.def("backward", &backward, "Second part of backward computation");
71 | m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
72 | m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
73 | m.def("elu_forward", &elu_forward, "Elu forward computation");
74 | m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
75 | }
--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/inplace_abn.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | #include
6 |
7 | std::vector mean_var_cpu(at::Tensor x);
8 | std::vector mean_var_cuda(at::Tensor x);
9 |
10 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
11 | bool affine, float eps);
12 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
13 | bool affine, float eps);
14 |
15 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
16 | bool affine, float eps);
17 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
18 | bool affine, float eps);
19 |
20 | std::vector backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
21 | at::Tensor edz, at::Tensor eydz, bool affine, float eps);
22 | std::vector backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
23 | at::Tensor edz, at::Tensor eydz, bool affine, float eps);
24 |
25 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
26 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
27 |
28 | void elu_backward_cpu(at::Tensor z, at::Tensor dz);
29 | void elu_backward_cuda(at::Tensor z, at::Tensor dz);
--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/inplace_abn_cpu.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 |
5 | #include "inplace_abn.h"
6 |
7 | at::Tensor reduce_sum(at::Tensor x) {
8 | if (x.ndimension() == 2) {
9 | return x.sum(0);
10 | } else {
11 | auto x_view = x.view({x.size(0), x.size(1), -1});
12 | return x_view.sum(-1).sum(0);
13 | }
14 | }
15 |
16 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
17 | if (x.ndimension() == 2) {
18 | return v;
19 | } else {
20 | std::vector broadcast_size = {1, -1};
21 | for (int64_t i = 2; i < x.ndimension(); ++i)
22 | broadcast_size.push_back(1);
23 |
24 | return v.view(broadcast_size);
25 | }
26 | }
27 |
28 | int64_t count(at::Tensor x) {
29 | int64_t count = x.size(0);
30 | for (int64_t i = 2; i < x.ndimension(); ++i)
31 | count *= x.size(i);
32 |
33 | return count;
34 | }
35 |
36 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
37 | if (affine) {
38 | return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
39 | } else {
40 | return z;
41 | }
42 | }
43 |
44 | std::vector mean_var_cpu(at::Tensor x) {
45 | auto num = count(x);
46 | auto mean = reduce_sum(x) / num;
47 | auto diff = x - broadcast_to(mean, x);
48 | auto var = reduce_sum(diff.pow(2)) / num;
49 |
50 | return {mean, var};
51 | }
52 |
53 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
54 | bool affine, float eps) {
55 | auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
56 | auto mul = at::rsqrt(var + eps) * gamma;
57 |
58 | x.sub_(broadcast_to(mean, x));
59 | x.mul_(broadcast_to(mul, x));
60 | if (affine) x.add_(broadcast_to(bias, x));
61 |
62 | return x;
63 | }
64 |
65 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
66 | bool affine, float eps) {
67 | auto edz = reduce_sum(dz);
68 | auto y = invert_affine(z, weight, bias, affine, eps);
69 | auto eydz = reduce_sum(y * dz);
70 |
71 | return {edz, eydz};
72 | }
73 |
74 | std::vector backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
75 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
76 | auto y = invert_affine(z, weight, bias, affine, eps);
77 | auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
78 |
79 | auto num = count(z);
80 | auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
81 |
82 | auto dweight = at::empty(z.type(), {0});
83 | auto dbias = at::empty(z.type(), {0});
84 | if (affine) {
85 | dweight = eydz * at::sign(weight);
86 | dbias = edz;
87 | }
88 |
89 | return {dx, dweight, dbias};
90 | }
91 |
92 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
93 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
94 | int64_t count = z.numel();
95 | auto *_z = z.data();
96 | auto *_dz = dz.data();
97 |
98 | for (int64_t i = 0; i < count; ++i) {
99 | if (_z[i] < 0) {
100 | _z[i] *= 1 / slope;
101 | _dz[i] *= slope;
102 | }
103 | }
104 | }));
105 | }
106 |
107 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
108 | AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
109 | int64_t count = z.numel();
110 | auto *_z = z.data();
111 | auto *_dz = dz.data();
112 |
113 | for (int64_t i = 0; i < count; ++i) {
114 | if (_z[i] < 0) {
115 | _z[i] = log1p(_z[i]);
116 | _dz[i] *= (_z[i] + 1.f);
117 | }
118 | }
119 | }));
120 | }
--------------------------------------------------------------------------------
/lib/models/sync_bn/inplace_abn/src/inplace_abn_cuda.cu:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 | #include
5 |
6 | #include
7 |
8 | #include "common.h"
9 | #include "inplace_abn.h"
10 |
11 | // Checks
12 | #ifndef AT_CHECK
13 | #define AT_CHECK AT_ASSERT
14 | #endif
15 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
16 | #define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x " must be contiguous")
17 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
18 |
19 | // Utilities
20 | void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
21 | num = x.size(0);
22 | chn = x.size(1);
23 | sp = 1;
24 | for (int64_t i = 2; i < x.ndimension(); ++i)
25 | sp *= x.size(i);
26 | }
27 |
28 | // Operations for reduce
29 | template
30 | struct SumOp {
31 | __device__ SumOp(const T *t, int c, int s)
32 | : tensor(t), chn(c), sp(s) {}
33 | __device__ __forceinline__ T operator()(int batch, int plane, int n) {
34 | return tensor[(batch * chn + plane) * sp + n];
35 | }
36 | const T *tensor;
37 | const int chn;
38 | const int sp;
39 | };
40 |
41 | template
42 | struct VarOp {
43 | __device__ VarOp(T m, const T *t, int c, int s)
44 | : mean(m), tensor(t), chn(c), sp(s) {}
45 | __device__ __forceinline__ T operator()(int batch, int plane, int n) {
46 | T val = tensor[(batch * chn + plane) * sp + n];
47 | return (val - mean) * (val - mean);
48 | }
49 | const T mean;
50 | const T *tensor;
51 | const int chn;
52 | const int sp;
53 | };
54 |
55 | template
56 | struct GradOp {
57 | __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
58 | : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
59 | __device__ __forceinline__ Pair operator()(int batch, int plane, int n) {
60 | T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
61 | T _dz = dz[(batch * chn + plane) * sp + n];
62 | return Pair(_dz, _y * _dz);
63 | }
64 | const T weight;
65 | const T bias;
66 | const T *z;
67 | const T *dz;
68 | const int chn;
69 | const int sp;
70 | };
71 |
72 | /***********
73 | * mean_var
74 | ***********/
75 |
76 | template
77 | __global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
78 | int plane = blockIdx.x;
79 | T norm = T(1) / T(num * sp);
80 |
81 | T _mean = reduce>(SumOp(x, chn, sp), plane, num, chn, sp) * norm;
82 | __syncthreads();
83 | T _var = reduce>(VarOp(_mean, x, chn, sp), plane, num, chn, sp) * norm;
84 |
85 | if (threadIdx.x == 0) {
86 | mean[plane] = _mean;
87 | var[plane] = _var;
88 | }
89 | }
90 |
91 | std::vector mean_var_cuda(at::Tensor x) {
92 | CHECK_INPUT(x);
93 |
94 | // Extract dimensions
95 | int64_t num, chn, sp;
96 | get_dims(x, num, chn, sp);
97 |
98 | // Prepare output tensors
99 | auto mean = at::empty(x.type(), {chn});
100 | auto var = at::empty(x.type(), {chn});
101 |
102 | // Run kernel
103 | dim3 blocks(chn);
104 | dim3 threads(getNumThreads(sp));
105 | AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
106 | mean_var_kernel<<>>(
107 | x.data(),
108 | mean.data(),
109 | var.data(),
110 | num, chn, sp);
111 | }));
112 |
113 | return {mean, var};
114 | }
115 |
116 | /**********
117 | * forward
118 | **********/
119 |
120 | template
121 | __global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
122 | bool affine, float eps, int num, int chn, int sp) {
123 | int plane = blockIdx.x;
124 |
125 | T _mean = mean[plane];
126 | T _var = var[plane];
127 | T _weight = affine ? abs(weight[plane]) + eps : T(1);
128 | T _bias = affine ? bias[plane] : T(0);
129 |
130 | T mul = rsqrt(_var + eps) * _weight;
131 |
132 | for (int batch = 0; batch < num; ++batch) {
133 | for (int n = threadIdx.x; n < sp; n += blockDim.x) {
134 | T _x = x[(batch * chn + plane) * sp + n];
135 | T _y = (_x - _mean) * mul + _bias;
136 |
137 | x[(batch * chn + plane) * sp + n] = _y;
138 | }
139 | }
140 | }
141 |
142 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
143 | bool affine, float eps) {
144 | CHECK_INPUT(x);
145 | CHECK_INPUT(mean);
146 | CHECK_INPUT(var);
147 | CHECK_INPUT(weight);
148 | CHECK_INPUT(bias);
149 |
150 | // Extract dimensions
151 | int64_t num, chn, sp;
152 | get_dims(x, num, chn, sp);
153 |
154 | // Run kernel
155 | dim3 blocks(chn);
156 | dim3 threads(getNumThreads(sp));
157 | AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
158 | forward_kernel<<>>(
159 | x.data(),
160 | mean.data(),
161 | var.data(),
162 | weight.data(),
163 | bias.data(),
164 | affine, eps, num, chn, sp);
165 | }));
166 |
167 | return x;
168 | }
169 |
170 | /***********
171 | * edz_eydz
172 | ***********/
173 |
174 | template
175 | __global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
176 | T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
177 | int plane = blockIdx.x;
178 |
179 | T _weight = affine ? abs(weight[plane]) + eps : 1.f;
180 | T _bias = affine ? bias[plane] : 0.f;
181 |
182 | Pair res = reduce, GradOp>(GradOp(_weight, _bias, z, dz, chn, sp), plane, num, chn, sp);
183 | __syncthreads();
184 |
185 | if (threadIdx.x == 0) {
186 | edz[plane] = res.v1;
187 | eydz[plane] = res.v2;
188 | }
189 | }
190 |
191 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
192 | bool affine, float eps) {
193 | CHECK_INPUT(z);
194 | CHECK_INPUT(dz);
195 | CHECK_INPUT(weight);
196 | CHECK_INPUT(bias);
197 |
198 | // Extract dimensions
199 | int64_t num, chn, sp;
200 | get_dims(z, num, chn, sp);
201 |
202 | auto edz = at::empty(z.type(), {chn});
203 | auto eydz = at::empty(z.type(), {chn});
204 |
205 | // Run kernel
206 | dim3 blocks(chn);
207 | dim3 threads(getNumThreads(sp));
208 | AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
209 | edz_eydz_kernel<<>>(
210 | z.data(),
211 | dz.data(),
212 | weight.data(),
213 | bias.data(),
214 | edz.data(),
215 | eydz.data(),
216 | affine, eps, num, chn, sp);
217 | }));
218 |
219 | return {edz, eydz};
220 | }
221 |
222 | /***********
223 | * backward
224 | ***********/
225 |
226 | template
227 | __global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
228 | const T *eydz, T *dx, T *dweight, T *dbias,
229 | bool affine, float eps, int num, int chn, int sp) {
230 | int plane = blockIdx.x;
231 |
232 | T _weight = affine ? abs(weight[plane]) + eps : 1.f;
233 | T _bias = affine ? bias[plane] : 0.f;
234 | T _var = var[plane];
235 | T _edz = edz[plane];
236 | T _eydz = eydz[plane];
237 |
238 | T _mul = _weight * rsqrt(_var + eps);
239 | T count = T(num * sp);
240 |
241 | for (int batch = 0; batch < num; ++batch) {
242 | for (int n = threadIdx.x; n < sp; n += blockDim.x) {
243 | T _dz = dz[(batch * chn + plane) * sp + n];
244 | T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
245 |
246 | dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
247 | }
248 | }
249 |
250 | if (threadIdx.x == 0) {
251 | if (affine) {
252 | dweight[plane] = weight[plane] > 0 ? _eydz : -_eydz;
253 | dbias[plane] = _edz;
254 | }
255 | }
256 | }
257 |
258 | std::vector backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
259 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
260 | CHECK_INPUT(z);
261 | CHECK_INPUT(dz);
262 | CHECK_INPUT(var);
263 | CHECK_INPUT(weight);
264 | CHECK_INPUT(bias);
265 | CHECK_INPUT(edz);
266 | CHECK_INPUT(eydz);
267 |
268 | // Extract dimensions
269 | int64_t num, chn, sp;
270 | get_dims(z, num, chn, sp);
271 |
272 | auto dx = at::zeros_like(z);
273 | auto dweight = at::zeros_like(weight);
274 | auto dbias = at::zeros_like(bias);
275 |
276 | // Run kernel
277 | dim3 blocks(chn);
278 | dim3 threads(getNumThreads(sp));
279 | AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
280 | backward_kernel<<>>(
281 | z.data(),
282 | dz.data(),
283 | var.data(),
284 | weight.data(),
285 | bias.data(),
286 | edz.data(),
287 | eydz.data(),
288 | dx.data(),
289 | dweight.data(),
290 | dbias.data(),
291 | affine, eps, num, chn, sp);
292 | }));
293 |
294 | return {dx, dweight, dbias};
295 | }
296 |
297 | /**************
298 | * activations
299 | **************/
300 |
301 | template
302 | inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
303 | // Create thrust pointers
304 | thrust::device_ptr th_z = thrust::device_pointer_cast(z);
305 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz);
306 |
307 | thrust::transform_if(th_dz, th_dz + count, th_z, th_dz,
308 | [slope] __device__ (const T& dz) { return dz * slope; },
309 | [] __device__ (const T& z) { return z < 0; });
310 | thrust::transform_if(th_z, th_z + count, th_z,
311 | [slope] __device__ (const T& z) { return z / slope; },
312 | [] __device__ (const T& z) { return z < 0; });
313 | }
314 |
315 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
316 | CHECK_INPUT(z);
317 | CHECK_INPUT(dz);
318 |
319 | int64_t count = z.numel();
320 |
321 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
322 | leaky_relu_backward_impl(z.data(), dz.data(), slope, count);
323 | }));
324 | }
325 |
326 | template
327 | inline void elu_backward_impl(T *z, T *dz, int64_t count) {
328 | // Create thrust pointers
329 | thrust::device_ptr th_z = thrust::device_pointer_cast(z);
330 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz);
331 |
332 | thrust::transform_if(th_dz, th_dz + count, th_z, th_z, th_dz,
333 | [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
334 | [] __device__ (const T& z) { return z < 0; });
335 | thrust::transform_if(th_z, th_z + count, th_z,
336 | [] __device__ (const T& z) { return log1p(z); },
337 | [] __device__ (const T& z) { return z < 0; });
338 | }
339 |
340 | void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
341 | CHECK_INPUT(z);
342 | CHECK_INPUT(dz);
343 |
344 | int64_t count = z.numel();
345 |
346 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
347 | elu_backward_impl(z.data(), dz.data(), count);
348 | }));
349 | }
350 |
--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HRNet/HRNet-Semantic-Segmentation/0bbb2880446ddff2d78f8dd7e8c4c610151d5a51/lib/utils/__init__.py
--------------------------------------------------------------------------------
/lib/utils/distributed.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Jingyi Xie (hsfzxjy@gmail.com)
5 | # ------------------------------------------------------------------------------
6 |
7 | import torch
8 | import torch.distributed as torch_dist
9 |
10 | def is_distributed():
11 | return torch_dist.is_initialized()
12 |
13 | def get_world_size():
14 | if not torch_dist.is_initialized():
15 | return 1
16 | return torch_dist.get_world_size()
17 |
18 | def get_rank():
19 | if not torch_dist.is_initialized():
20 | return 0
21 | return torch_dist.get_rank()
--------------------------------------------------------------------------------
/lib/utils/modelsummary.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
5 | # Modified by Ke Sun (sunk@mail.ustc.edu.cn)
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import os
13 | import logging
14 | from collections import namedtuple
15 |
16 | import torch
17 | import torch.nn as nn
18 |
19 | def get_model_summary(model, *input_tensors, item_length=26, verbose=False):
20 | """
21 | :param model:
22 | :param input_tensors:
23 | :param item_length:
24 | :return:
25 | """
26 |
27 | summary = []
28 |
29 | ModuleDetails = namedtuple(
30 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"])
31 | hooks = []
32 | layer_instances = {}
33 |
34 | def add_hooks(module):
35 |
36 | def hook(module, input, output):
37 | class_name = str(module.__class__.__name__)
38 |
39 | instance_index = 1
40 | if class_name not in layer_instances:
41 | layer_instances[class_name] = instance_index
42 | else:
43 | instance_index = layer_instances[class_name] + 1
44 | layer_instances[class_name] = instance_index
45 |
46 | layer_name = class_name + "_" + str(instance_index)
47 |
48 | params = 0
49 |
50 | if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \
51 | class_name.find("Linear") != -1:
52 | for param_ in module.parameters():
53 | params += param_.view(-1).size(0)
54 |
55 | flops = "Not Available"
56 | if class_name.find("Conv") != -1 and hasattr(module, "weight"):
57 | flops = (
58 | torch.prod(
59 | torch.LongTensor(list(module.weight.data.size()))) *
60 | torch.prod(
61 | torch.LongTensor(list(output.size())[2:]))).item()
62 | elif isinstance(module, nn.Linear):
63 | flops = (torch.prod(torch.LongTensor(list(output.size()))) \
64 | * input[0].size(1)).item()
65 |
66 | if isinstance(input[0], list):
67 | input = input[0]
68 | if isinstance(output, list):
69 | output = output[0]
70 |
71 | summary.append(
72 | ModuleDetails(
73 | name=layer_name,
74 | input_size=list(input[0].size()),
75 | output_size=list(output.size()),
76 | num_parameters=params,
77 | multiply_adds=flops)
78 | )
79 |
80 | if not isinstance(module, nn.ModuleList) \
81 | and not isinstance(module, nn.Sequential) \
82 | and module != model:
83 | hooks.append(module.register_forward_hook(hook))
84 |
85 | model.eval()
86 | model.apply(add_hooks)
87 |
88 | space_len = item_length
89 |
90 | model(*input_tensors)
91 | for hook in hooks:
92 | hook.remove()
93 |
94 | details = ''
95 | if verbose:
96 | details = "Model Summary" + \
97 | os.linesep + \
98 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format(
99 | ' ' * (space_len - len("Name")),
100 | ' ' * (space_len - len("Input Size")),
101 | ' ' * (space_len - len("Output Size")),
102 | ' ' * (space_len - len("Parameters")),
103 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \
104 | + os.linesep + '-' * space_len * 5 + os.linesep
105 |
106 | params_sum = 0
107 | flops_sum = 0
108 | for layer in summary:
109 | params_sum += layer.num_parameters
110 | if layer.multiply_adds != "Not Available":
111 | flops_sum += layer.multiply_adds
112 | if verbose:
113 | details += "{}{}{}{}{}{}{}{}{}{}".format(
114 | layer.name,
115 | ' ' * (space_len - len(layer.name)),
116 | layer.input_size,
117 | ' ' * (space_len - len(str(layer.input_size))),
118 | layer.output_size,
119 | ' ' * (space_len - len(str(layer.output_size))),
120 | layer.num_parameters,
121 | ' ' * (space_len - len(str(layer.num_parameters))),
122 | layer.multiply_adds,
123 | ' ' * (space_len - len(str(layer.multiply_adds)))) \
124 | + os.linesep + '-' * space_len * 5 + os.linesep
125 |
126 | details += os.linesep \
127 | + "Total Parameters: {:,}".format(params_sum) \
128 | + os.linesep + '-' * space_len * 5 + os.linesep
129 | details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \
130 | + os.linesep + '-' * space_len * 5 + os.linesep
131 | details += "Number of Layers" + os.linesep
132 | for layer in layer_instances:
133 | details += "{} : {} layers ".format(layer, layer_instances[layer])
134 |
135 | return details
--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import os
12 | import logging
13 | import time
14 | from pathlib import Path
15 |
16 | import numpy as np
17 |
18 | import torch
19 | import torch.nn as nn
20 |
21 | class FullModel(nn.Module):
22 | """
23 | Distribute the loss on multi-gpu to reduce
24 | the memory cost in the main gpu.
25 | You can check the following discussion.
26 | https://discuss.pytorch.org/t/dataparallel-imbalanced-memory-usage/22551/21
27 | """
28 | def __init__(self, model, loss):
29 | super(FullModel, self).__init__()
30 | self.model = model
31 | self.loss = loss
32 |
33 | def forward(self, inputs, labels, *args, **kwargs):
34 | outputs = self.model(inputs, *args, **kwargs)
35 | loss = self.loss(outputs, labels)
36 | return torch.unsqueeze(loss,0), outputs
37 |
38 | class AverageMeter(object):
39 | """Computes and stores the average and current value"""
40 |
41 | def __init__(self):
42 | self.initialized = False
43 | self.val = None
44 | self.avg = None
45 | self.sum = None
46 | self.count = None
47 |
48 | def initialize(self, val, weight):
49 | self.val = val
50 | self.avg = val
51 | self.sum = val * weight
52 | self.count = weight
53 | self.initialized = True
54 |
55 | def update(self, val, weight=1):
56 | if not self.initialized:
57 | self.initialize(val, weight)
58 | else:
59 | self.add(val, weight)
60 |
61 | def add(self, val, weight):
62 | self.val = val
63 | self.sum += val * weight
64 | self.count += weight
65 | self.avg = self.sum / self.count
66 |
67 | def value(self):
68 | return self.val
69 |
70 | def average(self):
71 | return self.avg
72 |
73 | def create_logger(cfg, cfg_name, phase='train'):
74 | root_output_dir = Path(cfg.OUTPUT_DIR)
75 | # set up logger
76 | if not root_output_dir.exists():
77 | print('=> creating {}'.format(root_output_dir))
78 | root_output_dir.mkdir()
79 |
80 | dataset = cfg.DATASET.DATASET
81 | model = cfg.MODEL.NAME
82 | cfg_name = os.path.basename(cfg_name).split('.')[0]
83 |
84 | final_output_dir = root_output_dir / dataset / cfg_name
85 |
86 | print('=> creating {}'.format(final_output_dir))
87 | final_output_dir.mkdir(parents=True, exist_ok=True)
88 |
89 | time_str = time.strftime('%Y-%m-%d-%H-%M')
90 | log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase)
91 | final_log_file = final_output_dir / log_file
92 | head = '%(asctime)-15s %(message)s'
93 | logging.basicConfig(filename=str(final_log_file),
94 | format=head)
95 | logger = logging.getLogger()
96 | logger.setLevel(logging.INFO)
97 | console = logging.StreamHandler()
98 | logging.getLogger('').addHandler(console)
99 |
100 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \
101 | (cfg_name + '_' + time_str)
102 | print('=> creating {}'.format(tensorboard_log_dir))
103 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
104 |
105 | return logger, str(final_output_dir), str(tensorboard_log_dir)
106 |
107 | def get_confusion_matrix(label, pred, size, num_class, ignore=-1):
108 | """
109 | Calcute the confusion matrix by given label and pred
110 | """
111 | output = pred.cpu().numpy().transpose(0, 2, 3, 1)
112 | seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8)
113 | seg_gt = np.asarray(
114 | label.cpu().numpy()[:, :size[-2], :size[-1]], dtype=np.int)
115 |
116 | ignore_index = seg_gt != ignore
117 | seg_gt = seg_gt[ignore_index]
118 | seg_pred = seg_pred[ignore_index]
119 |
120 | index = (seg_gt * num_class + seg_pred).astype('int32')
121 | label_count = np.bincount(index)
122 | confusion_matrix = np.zeros((num_class, num_class))
123 |
124 | for i_label in range(num_class):
125 | for i_pred in range(num_class):
126 | cur_index = i_label * num_class + i_pred
127 | if cur_index < len(label_count):
128 | confusion_matrix[i_label,
129 | i_pred] = label_count[cur_index]
130 | return confusion_matrix
131 |
132 | def adjust_learning_rate(optimizer, base_lr, max_iters,
133 | cur_iters, power=0.9, nbb_mult=10):
134 | lr = base_lr*((1-float(cur_iters)/max_iters)**(power))
135 | optimizer.param_groups[0]['lr'] = lr
136 | if len(optimizer.param_groups) == 2:
137 | optimizer.param_groups[1]['lr'] = lr * nbb_mult
138 | return lr
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | EasyDict==1.7
2 | shapely
3 | Cython
4 | scipy
5 | pandas
6 | pyyaml
7 | json_tricks
8 | scikit-image
9 | yacs>=0.1.5
10 | tensorboardX>=1.6
11 | tqdm
12 | ninja
13 |
14 |
--------------------------------------------------------------------------------
/run_dist.sh:
--------------------------------------------------------------------------------
1 | PYTHON="/opt/conda/bin/python"
2 | GPU_NUM=$1
3 | CONFIG=$2
4 |
5 | $PYTHON -m pip install -r requirements.txt
6 |
7 | $PYTHON -m torch.distributed.launch \
8 | --nproc_per_node=$GPU_NUM \
9 | tools/train.py \
10 | --cfg experiments/$CONFIG.yaml \
11 | 2>&1 | tee local_log.txt
12 |
--------------------------------------------------------------------------------
/run_local.sh:
--------------------------------------------------------------------------------
1 | PYTHON="/data/anaconda/envs/pytorch1.7.1/bin/python"
2 | GPU_NUM=4
3 | CONFIG="seg_hrnet_w48_cls59_520x520_sgd_lr1e-3_wd1e-4_bs_16_epoch200_paddle"
4 |
5 | $PYTHON -m pip install -r requirements.txt
6 |
7 | $PYTHON -m torch.distributed.launch \
8 | --nproc_per_node=$GPU_NUM \
9 | tools/train.py \
10 | --cfg experiments/pascal_ctx/$CONFIG.yaml \
11 | 2>&1 | tee local_log.txt
12 |
--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import os.path as osp
12 | import sys
13 |
14 |
15 | def add_path(path):
16 | if path not in sys.path:
17 | sys.path.insert(0, path)
18 |
19 | this_dir = osp.dirname(__file__)
20 |
21 | lib_path = osp.join(this_dir, '..', 'lib')
22 | add_path(lib_path)
23 |
--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import argparse
8 | import os
9 | import pprint
10 | import shutil
11 | import sys
12 |
13 | import logging
14 | import time
15 | import timeit
16 | from pathlib import Path
17 |
18 | import numpy as np
19 |
20 | import torch
21 | import torch.nn as nn
22 | import torch.backends.cudnn as cudnn
23 |
24 | import _init_paths
25 | import models
26 | import datasets
27 | from config import config
28 | from config import update_config
29 | from core.function import testval, test
30 | from utils.modelsummary import get_model_summary
31 | from utils.utils import create_logger, FullModel
32 |
33 | def parse_args():
34 | parser = argparse.ArgumentParser(description='Train segmentation network')
35 |
36 | parser.add_argument('--cfg',
37 | help='experiment configure file name',
38 | required=True,
39 | type=str)
40 | parser.add_argument('opts',
41 | help="Modify config options using the command-line",
42 | default=None,
43 | nargs=argparse.REMAINDER)
44 |
45 | args = parser.parse_args()
46 | update_config(config, args)
47 |
48 | return args
49 |
50 | def main():
51 | args = parse_args()
52 |
53 | logger, final_output_dir, _ = create_logger(
54 | config, args.cfg, 'test')
55 |
56 | logger.info(pprint.pformat(args))
57 | logger.info(pprint.pformat(config))
58 |
59 | # cudnn related setting
60 | cudnn.benchmark = config.CUDNN.BENCHMARK
61 | cudnn.deterministic = config.CUDNN.DETERMINISTIC
62 | cudnn.enabled = config.CUDNN.ENABLED
63 |
64 | # build model
65 | if torch.__version__.startswith('1'):
66 | module = eval('models.'+config.MODEL.NAME)
67 | module.BatchNorm2d_class = module.BatchNorm2d = torch.nn.BatchNorm2d
68 | model = eval('models.'+config.MODEL.NAME +
69 | '.get_seg_model')(config)
70 |
71 | dump_input = torch.rand(
72 | (1, 3, config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0])
73 | )
74 | logger.info(get_model_summary(model.cuda(), dump_input.cuda()))
75 |
76 | if config.TEST.MODEL_FILE:
77 | model_state_file = config.TEST.MODEL_FILE
78 | else:
79 | model_state_file = os.path.join(final_output_dir, 'final_state.pth')
80 | logger.info('=> loading model from {}'.format(model_state_file))
81 |
82 | pretrained_dict = torch.load(model_state_file)
83 | if 'state_dict' in pretrained_dict:
84 | pretrained_dict = pretrained_dict['state_dict']
85 | model_dict = model.state_dict()
86 | pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items()
87 | if k[6:] in model_dict.keys()}
88 | for k, _ in pretrained_dict.items():
89 | logger.info(
90 | '=> loading {} from pretrained model'.format(k))
91 | model_dict.update(pretrained_dict)
92 | model.load_state_dict(model_dict)
93 |
94 | gpus = list(config.GPUS)
95 | model = nn.DataParallel(model, device_ids=gpus).cuda()
96 |
97 | # prepare data
98 | test_size = (config.TEST.IMAGE_SIZE[1], config.TEST.IMAGE_SIZE[0])
99 | test_dataset = eval('datasets.'+config.DATASET.DATASET)(
100 | root=config.DATASET.ROOT,
101 | list_path=config.DATASET.TEST_SET,
102 | num_samples=None,
103 | num_classes=config.DATASET.NUM_CLASSES,
104 | multi_scale=False,
105 | flip=False,
106 | ignore_label=config.TRAIN.IGNORE_LABEL,
107 | base_size=config.TEST.BASE_SIZE,
108 | crop_size=test_size,
109 | downsample_rate=1)
110 |
111 | testloader = torch.utils.data.DataLoader(
112 | test_dataset,
113 | batch_size=1,
114 | shuffle=False,
115 | num_workers=config.WORKERS,
116 | pin_memory=True)
117 |
118 | start = timeit.default_timer()
119 | if 'val' in config.DATASET.TEST_SET:
120 | mean_IoU, IoU_array, pixel_acc, mean_acc = testval(config,
121 | test_dataset,
122 | testloader,
123 | model)
124 |
125 | msg = 'MeanIU: {: 4.4f}, Pixel_Acc: {: 4.4f}, \
126 | Mean_Acc: {: 4.4f}, Class IoU: '.format(mean_IoU,
127 | pixel_acc, mean_acc)
128 | logging.info(msg)
129 | logging.info(IoU_array)
130 | elif 'test' in config.DATASET.TEST_SET:
131 | test(config,
132 | test_dataset,
133 | testloader,
134 | model,
135 | sv_dir=final_output_dir)
136 |
137 | end = timeit.default_timer()
138 | logger.info('Mins: %d' % np.int((end-start)/60))
139 | logger.info('Done')
140 |
141 |
142 | if __name__ == '__main__':
143 | main()
144 |
--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Ke Sun (sunk@mail.ustc.edu.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import argparse
8 | import os
9 | import pprint
10 | import shutil
11 | import sys
12 |
13 | import logging
14 | import time
15 | import timeit
16 | from pathlib import Path
17 |
18 | import numpy as np
19 |
20 | import torch
21 | import torch.nn as nn
22 | import torch.backends.cudnn as cudnn
23 | import torch.optim
24 | from tensorboardX import SummaryWriter
25 |
26 | import _init_paths
27 | import models
28 | import datasets
29 | from config import config
30 | from config import update_config
31 | from core.criterion import CrossEntropy, OhemCrossEntropy
32 | from core.function import train, validate
33 | from utils.modelsummary import get_model_summary
34 | from utils.utils import create_logger, FullModel
35 |
36 | def parse_args():
37 | parser = argparse.ArgumentParser(description='Train segmentation network')
38 |
39 | parser.add_argument('--cfg',
40 | help='experiment configure file name',
41 | required=True,
42 | type=str)
43 | parser.add_argument('--seed', type=int, default=304)
44 | parser.add_argument("--local_rank", type=int, default=-1)
45 | parser.add_argument('opts',
46 | help="Modify config options using the command-line",
47 | default=None,
48 | nargs=argparse.REMAINDER)
49 |
50 | args = parser.parse_args()
51 | update_config(config, args)
52 |
53 | return args
54 |
55 | def get_sampler(dataset):
56 | from utils.distributed import is_distributed
57 | if is_distributed():
58 | from torch.utils.data.distributed import DistributedSampler
59 | return DistributedSampler(dataset)
60 | else:
61 | return None
62 |
63 | def main():
64 | args = parse_args()
65 |
66 | if args.seed > 0:
67 | import random
68 | print('Seeding with', args.seed)
69 | random.seed(args.seed)
70 | torch.manual_seed(args.seed)
71 |
72 | logger, final_output_dir, tb_log_dir = create_logger(
73 | config, args.cfg, 'train')
74 |
75 | logger.info(pprint.pformat(args))
76 | logger.info(config)
77 |
78 | writer_dict = {
79 | 'writer': SummaryWriter(tb_log_dir),
80 | 'train_global_steps': 0,
81 | 'valid_global_steps': 0,
82 | }
83 |
84 | # cudnn related setting
85 | cudnn.benchmark = config.CUDNN.BENCHMARK
86 | cudnn.deterministic = config.CUDNN.DETERMINISTIC
87 | cudnn.enabled = config.CUDNN.ENABLED
88 | gpus = list(config.GPUS)
89 | distributed = args.local_rank >= 0
90 | if distributed:
91 | device = torch.device('cuda:{}'.format(args.local_rank))
92 | torch.cuda.set_device(device)
93 | torch.distributed.init_process_group(
94 | backend="nccl", init_method="env://",
95 | )
96 |
97 | # build model
98 | model = eval('models.'+config.MODEL.NAME +
99 | '.get_seg_model')(config)
100 |
101 | # dump_input = torch.rand(
102 | # (1, 3, config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0])
103 | # )
104 | # logger.info(get_model_summary(model.cuda(), dump_input.cuda()))
105 |
106 | # copy model file
107 | if distributed and args.local_rank == 0:
108 | this_dir = os.path.dirname(__file__)
109 | models_dst_dir = os.path.join(final_output_dir, 'models')
110 | # if os.path.exists(models_dst_dir):
111 | # shutil.rmtree(models_dst_dir)
112 | # shutil.copytree(os.path.join(this_dir, '../lib/models'), models_dst_dir)
113 |
114 | if distributed:
115 | batch_size = config.TRAIN.BATCH_SIZE_PER_GPU
116 | else:
117 | batch_size = config.TRAIN.BATCH_SIZE_PER_GPU * len(gpus)
118 |
119 | # prepare data
120 | crop_size = (config.TRAIN.IMAGE_SIZE[1], config.TRAIN.IMAGE_SIZE[0])
121 | train_dataset = eval('datasets.'+config.DATASET.DATASET)(
122 | root=config.DATASET.ROOT,
123 | list_path=config.DATASET.TRAIN_SET,
124 | num_samples=None,
125 | num_classes=config.DATASET.NUM_CLASSES,
126 | multi_scale=config.TRAIN.MULTI_SCALE,
127 | flip=config.TRAIN.FLIP,
128 | ignore_label=config.TRAIN.IGNORE_LABEL,
129 | base_size=config.TRAIN.BASE_SIZE,
130 | crop_size=crop_size,
131 | downsample_rate=config.TRAIN.DOWNSAMPLERATE,
132 | scale_factor=config.TRAIN.SCALE_FACTOR)
133 |
134 | train_sampler = get_sampler(train_dataset)
135 | trainloader = torch.utils.data.DataLoader(
136 | train_dataset,
137 | batch_size=batch_size,
138 | shuffle=config.TRAIN.SHUFFLE and train_sampler is None,
139 | num_workers=config.WORKERS,
140 | pin_memory=True,
141 | drop_last=True,
142 | sampler=train_sampler)
143 |
144 | extra_epoch_iters = 0
145 | if config.DATASET.EXTRA_TRAIN_SET:
146 | extra_train_dataset = eval('datasets.'+config.DATASET.DATASET)(
147 | root=config.DATASET.ROOT,
148 | list_path=config.DATASET.EXTRA_TRAIN_SET,
149 | num_samples=None,
150 | num_classes=config.DATASET.NUM_CLASSES,
151 | multi_scale=config.TRAIN.MULTI_SCALE,
152 | flip=config.TRAIN.FLIP,
153 | ignore_label=config.TRAIN.IGNORE_LABEL,
154 | base_size=config.TRAIN.BASE_SIZE,
155 | crop_size=crop_size,
156 | downsample_rate=config.TRAIN.DOWNSAMPLERATE,
157 | scale_factor=config.TRAIN.SCALE_FACTOR)
158 | extra_train_sampler = get_sampler(extra_train_dataset)
159 | extra_trainloader = torch.utils.data.DataLoader(
160 | extra_train_dataset,
161 | batch_size=batch_size,
162 | shuffle=config.TRAIN.SHUFFLE and extra_train_sampler is None,
163 | num_workers=config.WORKERS,
164 | pin_memory=True,
165 | drop_last=True,
166 | sampler=extra_train_sampler)
167 | extra_epoch_iters = np.int(extra_train_dataset.__len__() /
168 | config.TRAIN.BATCH_SIZE_PER_GPU / len(gpus))
169 |
170 |
171 | test_size = (config.TEST.IMAGE_SIZE[1], config.TEST.IMAGE_SIZE[0])
172 | test_dataset = eval('datasets.'+config.DATASET.DATASET)(
173 | root=config.DATASET.ROOT,
174 | list_path=config.DATASET.TEST_SET,
175 | num_samples=config.TEST.NUM_SAMPLES,
176 | num_classes=config.DATASET.NUM_CLASSES,
177 | multi_scale=False,
178 | flip=False,
179 | ignore_label=config.TRAIN.IGNORE_LABEL,
180 | base_size=config.TEST.BASE_SIZE,
181 | crop_size=test_size,
182 | downsample_rate=1)
183 |
184 | test_sampler = get_sampler(test_dataset)
185 | testloader = torch.utils.data.DataLoader(
186 | test_dataset,
187 | batch_size=batch_size,
188 | shuffle=False,
189 | num_workers=config.WORKERS,
190 | pin_memory=True,
191 | sampler=test_sampler)
192 |
193 | # criterion
194 | if config.LOSS.USE_OHEM:
195 | criterion = OhemCrossEntropy(ignore_label=config.TRAIN.IGNORE_LABEL,
196 | thres=config.LOSS.OHEMTHRES,
197 | min_kept=config.LOSS.OHEMKEEP,
198 | weight=train_dataset.class_weights)
199 | else:
200 | criterion = CrossEntropy(ignore_label=config.TRAIN.IGNORE_LABEL,
201 | weight=train_dataset.class_weights)
202 |
203 | model = FullModel(model, criterion)
204 | if distributed:
205 | model = model.to(device)
206 | model = torch.nn.parallel.DistributedDataParallel(
207 | model,
208 | find_unused_parameters=True,
209 | device_ids=[args.local_rank],
210 | output_device=args.local_rank
211 | )
212 | else:
213 | model = nn.DataParallel(model, device_ids=gpus).cuda()
214 |
215 |
216 | # optimizer
217 | if config.TRAIN.OPTIMIZER == 'sgd':
218 |
219 | params_dict = dict(model.named_parameters())
220 | if config.TRAIN.NONBACKBONE_KEYWORDS:
221 | bb_lr = []
222 | nbb_lr = []
223 | nbb_keys = set()
224 | for k, param in params_dict.items():
225 | if any(part in k for part in config.TRAIN.NONBACKBONE_KEYWORDS):
226 | nbb_lr.append(param)
227 | nbb_keys.add(k)
228 | else:
229 | bb_lr.append(param)
230 | print(nbb_keys)
231 | params = [{'params': bb_lr, 'lr': config.TRAIN.LR}, {'params': nbb_lr, 'lr': config.TRAIN.LR * config.TRAIN.NONBACKBONE_MULT}]
232 | else:
233 | params = [{'params': list(params_dict.values()), 'lr': config.TRAIN.LR}]
234 |
235 | optimizer = torch.optim.SGD(params,
236 | lr=config.TRAIN.LR,
237 | momentum=config.TRAIN.MOMENTUM,
238 | weight_decay=config.TRAIN.WD,
239 | nesterov=config.TRAIN.NESTEROV,
240 | )
241 | else:
242 | raise ValueError('Only Support SGD optimizer')
243 |
244 | epoch_iters = np.int(train_dataset.__len__() /
245 | config.TRAIN.BATCH_SIZE_PER_GPU / len(gpus))
246 |
247 | best_mIoU = 0
248 | last_epoch = 0
249 | if config.TRAIN.RESUME:
250 | model_state_file = os.path.join(final_output_dir,
251 | 'checkpoint.pth.tar')
252 | if os.path.isfile(model_state_file):
253 | checkpoint = torch.load(model_state_file, map_location={'cuda:0': 'cpu'})
254 | best_mIoU = checkpoint['best_mIoU']
255 | last_epoch = checkpoint['epoch']
256 | dct = checkpoint['state_dict']
257 |
258 | model.module.model.load_state_dict({k.replace('model.', ''): v for k, v in checkpoint['state_dict'].items() if k.startswith('model.')})
259 | optimizer.load_state_dict(checkpoint['optimizer'])
260 | logger.info("=> loaded checkpoint (epoch {})"
261 | .format(checkpoint['epoch']))
262 | if distributed:
263 | torch.distributed.barrier()
264 |
265 | start = timeit.default_timer()
266 | end_epoch = config.TRAIN.END_EPOCH + config.TRAIN.EXTRA_EPOCH
267 | num_iters = config.TRAIN.END_EPOCH * epoch_iters
268 | extra_iters = config.TRAIN.EXTRA_EPOCH * extra_epoch_iters
269 |
270 | for epoch in range(last_epoch, end_epoch):
271 |
272 | current_trainloader = extra_trainloader if epoch >= config.TRAIN.END_EPOCH else trainloader
273 | if current_trainloader.sampler is not None and hasattr(current_trainloader.sampler, 'set_epoch'):
274 | current_trainloader.sampler.set_epoch(epoch)
275 |
276 | # valid_loss, mean_IoU, IoU_array = validate(config,
277 | # testloader, model, writer_dict)
278 |
279 | if epoch >= config.TRAIN.END_EPOCH:
280 | train(config, epoch-config.TRAIN.END_EPOCH,
281 | config.TRAIN.EXTRA_EPOCH, extra_epoch_iters,
282 | config.TRAIN.EXTRA_LR, extra_iters,
283 | extra_trainloader, optimizer, model, writer_dict)
284 | else:
285 | train(config, epoch, config.TRAIN.END_EPOCH,
286 | epoch_iters, config.TRAIN.LR, num_iters,
287 | trainloader, optimizer, model, writer_dict)
288 |
289 | valid_loss, mean_IoU, IoU_array = validate(config,
290 | testloader, model, writer_dict)
291 |
292 | if args.local_rank <= 0:
293 | logger.info('=> saving checkpoint to {}'.format(
294 | final_output_dir + 'checkpoint.pth.tar'))
295 | torch.save({
296 | 'epoch': epoch+1,
297 | 'best_mIoU': best_mIoU,
298 | 'state_dict': model.module.state_dict(),
299 | 'optimizer': optimizer.state_dict(),
300 | }, os.path.join(final_output_dir,'checkpoint.pth.tar'))
301 | if mean_IoU > best_mIoU:
302 | best_mIoU = mean_IoU
303 | torch.save(model.module.state_dict(),
304 | os.path.join(final_output_dir, 'best.pth'))
305 | msg = 'Loss: {:.3f}, MeanIU: {: 4.4f}, Best_mIoU: {: 4.4f}'.format(
306 | valid_loss, mean_IoU, best_mIoU)
307 | logging.info(msg)
308 | logging.info(IoU_array)
309 |
310 | if args.local_rank <= 0:
311 |
312 | torch.save(model.module.state_dict(),
313 | os.path.join(final_output_dir, 'final_state.pth'))
314 |
315 | writer_dict['writer'].close()
316 | end = timeit.default_timer()
317 | logger.info('Hours: %d' % np.int((end-start)/3600))
318 | logger.info('Done')
319 |
320 |
321 | if __name__ == '__main__':
322 | main()
323 |
--------------------------------------------------------------------------------