├── LICENSE ├── README.md ├── ShelfNet18_realtime ├── ShelfBlock.py ├── __pycache__ │ ├── ShelfBlock.cpython-37.pyc │ ├── cityscapes.cpython-37.pyc │ ├── evaluate.cpython-37.pyc │ ├── logger.cpython-37.pyc │ ├── loss.cpython-37.pyc │ ├── optimizer.cpython-37.pyc │ ├── refinement_lightweight.cpython-37.pyc │ ├── resnet.cpython-37.pyc │ ├── shelfnet.cpython-37.pyc │ └── transform.cpython-37.pyc ├── cityscapes.py ├── cityscapes_info.json ├── evaluate.py ├── logger.py ├── loss.py ├── modules │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── bn.cpython-37.pyc │ │ ├── dense.cpython-37.pyc │ │ ├── functions.cpython-37.pyc │ │ ├── misc.cpython-37.pyc │ │ └── residual.cpython-37.pyc │ ├── bn.py │ ├── deeplab.py │ ├── dense.py │ ├── functions.py │ ├── misc.py │ ├── residual.py │ └── src │ │ ├── checks.h │ │ ├── inplace_abn.cpp │ │ ├── inplace_abn.h │ │ ├── inplace_abn_cpu.cpp │ │ ├── inplace_abn_cuda.cu │ │ ├── inplace_abn_cuda_half.cu │ │ └── utils │ │ ├── checks.h │ │ ├── common.h │ │ └── cuda.cuh ├── optimizer.py ├── refinement_lightweight.py ├── resnet.py ├── shelfnet.py ├── test_LWRF_speed.py ├── test_speed.py ├── train.py └── transform.py ├── ShelfNet34_non_realtime ├── ShelfBlock.py ├── __pycache__ │ ├── ShelfBlock.cpython-37.pyc │ ├── cityscapes.cpython-37.pyc │ ├── evaluate.cpython-37.pyc │ ├── logger.cpython-37.pyc │ ├── loss.cpython-37.pyc │ ├── optimizer.cpython-37.pyc │ ├── resnet.cpython-37.pyc │ ├── shelfnet.cpython-37.pyc │ └── transform.cpython-37.pyc ├── cityscapes.py ├── cityscapes_info.json ├── evaluate.py ├── logger.py ├── loss.py ├── modules │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── bn.cpython-37.pyc │ │ ├── dense.cpython-37.pyc │ │ ├── functions.cpython-37.pyc │ │ ├── misc.cpython-37.pyc │ │ └── residual.cpython-37.pyc │ ├── bn.py │ ├── deeplab.py │ ├── dense.py │ ├── functions.py │ ├── misc.py │ ├── residual.py │ └── src │ │ ├── checks.h │ │ ├── inplace_abn.cpp │ │ ├── inplace_abn.h │ │ ├── inplace_abn_cpu.cpp │ │ ├── inplace_abn_cuda.cu │ │ ├── inplace_abn_cuda_half.cu │ │ └── utils │ │ ├── checks.h │ │ ├── common.h │ │ └── cuda.cuh ├── optimizer.py ├── resnet.py ├── shelfnet.py ├── train.py └── transform.py └── figures ├── images.png └── results_shelfnet.png /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Juntang Zhuang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ShelfNet-lightweight for paper ([ShelfNet for fast semantic segmentation](https://arxiv.org/abs/1811.11254)) 2 | * This repo contains implementation of ShelfNet-lightweight models for real-time models on Cityscapes.
3 | * For real-time tasks, we achieved 74.8% mIoU on Ctiyscapes dataset, with a speed of 59.2 FPS (61.7 FPS for BiSeNet at 74.7% on a GTX 1080Ti GPU).
4 | * For non real-time tasks, we achieved 79.0% mIoU on Cityscapes test set with ResNet34 backbone, suparssing other models (PSPNet and BiSeNet) with largers backbones with ResNet50 or Resnet 101 backbone.
5 | * For Non light-weight ShelfNet implementation, refer to another [ShelfNet repo](https://github.com/juntang-zhuang/ShelfNet).
6 | * This branch is the result on Cityscapes experiment, for results on PASCAL, see branch ```pascal``` 7 | 8 | This repo is based on two implementations [Implementation 1](https://github.com/ycszen/TorchSeg) and [Implementation 2](https://github.com/CoinCheung/BiSeNet). This implementation takes about 24h's training on 2 GTX 1080Ti GPU.
9 | 10 | ## Results 11 | ![Imagess](figures/images.png)
12 | ![Cityscapes results](figures/results_shelfnet.png)
13 | 14 | ## Link to results on Cityscapes test set 15 | ShelfNet18-lw real-time: [https://www.cityscapes-dataset.com/anonymous-results/?id=b2cc8f49fc3267c73e6bb686425016cb152c8bc34fc09ac207c81749f329dc8d](https://www.cityscapes-dataset.com/anonymous-results/?id=b2cc8f49fc3267c73e6bb686425016cb152c8bc34fc09ac207c81749f329dc8d)
16 | ShelfNet34-lw non real-time: [https://www.cityscapes-dataset.com/anonymous-results/?id=c0a7c8a4b64a880a715632c6a28b116d239096b63b5d14f5042c8b3280a7169d](https://www.cityscapes-dataset.com/anonymous-results/?id=c0a7c8a4b64a880a715632c6a28b116d239096b63b5d14f5042c8b3280a7169d) 17 | 18 | ## Data Preparation 19 | Download fine labelled dataset from Cityscapes server, and decompress into ```./data``` folder.
20 | You might need to modify data path [here](https://github.com/NoName-sketch/anonymous/blob/master/ShelfNet18_realtime/train.py/#L58) and [here](https://github.com/NoName-sketch/anonymous/blob/master/ShelfNet18_realtime/evaluate.py/#L143)
21 | ``` 22 | $ mkdir -p data 23 | $ mv /path/to/leftImg8bit_trainvaltest.zip data 24 | $ mv /path/to/gtFine_trainvaltest.zip data 25 | $ cd data 26 | $ unzip leftImg8bit_trainvaltest.zip 27 | $ unzip gtFine_trainvaltest.zip 28 | ``` 29 | 30 | ## Two models and the pretrained weights 31 | We provide two models, ShelfNet18 with 64 base channels for real-time semantic segmentation, and ShelfNet34 with 128 base channels for non-real-time semantic segmentation.
Pretrained weights for [ShelfNet18](https://www.dropbox.com/s/84ol8lk99qcis9p/ShelfNet18_realtime.pth?dl=0) and [ShelfNet34](https://www.dropbox.com/s/q9jae02qe27wwa3/ShelfNet34_non_realtime.pth?dl=0). 32 | 33 | ## Requirements 34 | PyTorch 1.1
35 | python3
36 | scikit-image
37 | tqdm
38 | 39 | ## How to run 40 | Find the folder (```cd ShelfNet18_realtime``` or ```cd ShelfNet34_non_realtime```) 41 | 42 | training 43 | ``` 44 | CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 train.py 45 | ``` 46 | 47 | evaluate on validation set (Create a folder called ```res```, this folder is automatically created if you train the model. Put checkpoint in ```res```folder, and make sure the checkpoint name and dataset path match ```evaluate.py```. Change checkpoint name to ```model_final.pth```by default) 48 | ``` 49 | python evaluate.py 50 | ``` 51 | 52 | ## Running speed 53 | test running speed of ShelfNet18-lw 54 | ``` 55 | python test_speed.py 56 | ``` 57 | 58 | You can modify the shape of input images to test running speed, by modifying [here](https://github.com/NoName-sketch/anonymous/blob/master/ShelfNet18_realtime/test_LWRF_speed.py#L32)
59 | You can test running speed of different models by modifying [here](https://github.com/NoName-sketch/anonymous/blob/master/ShelfNet18_realtime/test_LWRF_speed.py#L20)
60 | The running speed is an average of 100 single forward passes, therefore it's possible the speed varies. The code returns the mean running time by default. 61 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/ShelfBlock.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/ShelfBlock.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/cityscapes.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/cityscapes.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/evaluate.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/evaluate.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/loss.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/optimizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/optimizer.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/refinement_lightweight.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/refinement_lightweight.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/resnet.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/shelfnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/shelfnet.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/__pycache__/transform.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/__pycache__/transform.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/cityscapes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | import torchvision.transforms as transforms 8 | 9 | import os.path as osp 10 | import os 11 | from PIL import Image 12 | import numpy as np 13 | import json 14 | 15 | from transform import * 16 | 17 | 18 | 19 | class CityScapes(Dataset): 20 | def __init__(self, rootpth, cropsize=(640, 480), mode='train', label_scale=1.0, *args, **kwargs): 21 | super(CityScapes, self).__init__(*args, **kwargs) 22 | assert mode in ('train', 'val', 'test') 23 | self.mode = mode 24 | self.ignore_lb = 255 25 | self.label_scale = label_scale 26 | with open('./cityscapes_info.json', 'r') as fr: 27 | labels_info = json.load(fr) 28 | self.lb_map = {el['id']: el['trainId'] for el in labels_info} 29 | 30 | ## parse img directory 31 | self.imgs = {} 32 | imgnames = [] 33 | impth = osp.join(rootpth, 'leftImg8bit', mode) 34 | folders = os.listdir(impth) 35 | for fd in folders: 36 | fdpth = osp.join(impth, fd) 37 | im_names = os.listdir(fdpth) 38 | names = [el.replace('_leftImg8bit.png', '') for el in im_names] 39 | impths = [osp.join(fdpth, el) for el in im_names] 40 | imgnames.extend(names) 41 | self.imgs.update(dict(zip(names, impths))) 42 | 43 | ## parse gt directory 44 | self.labels = {} 45 | gtnames = [] 46 | if self.mode =='test': 47 | gtpth = osp.join(rootpth, mode) 48 | else: 49 | gtpth = osp.join(rootpth, 'gtFine', mode) 50 | folders = os.listdir(gtpth) 51 | for fd in folders: 52 | if fd == 'info.json': 53 | continue 54 | 55 | fdpth = osp.join(gtpth, fd) 56 | lbnames = os.listdir(fdpth) 57 | lbnames = [el for el in lbnames if 'labelIds' in el] 58 | names = [el.replace('_gtFine_labelIds.png', '') for el in lbnames] 59 | lbpths = [osp.join(fdpth, el) for el in lbnames] 60 | gtnames.extend(names) 61 | self.labels.update(dict(zip(names, lbpths))) 62 | 63 | self.imnames = imgnames 64 | self.len = len(self.imnames) 65 | if self.mode != 'test': 66 | assert set(imgnames) == set(gtnames) 67 | assert set(self.imnames) == set(self.imgs.keys()) 68 | assert set(self.imnames) == set(self.labels.keys()) 69 | 70 | ## pre-processing 71 | self.to_tensor = transforms.Compose([ 72 | transforms.ToTensor(), 73 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 74 | ]) 75 | self.trans_train = Compose([ 76 | ColorJitter( 77 | brightness = 0.5, 78 | contrast = 0.5, 79 | saturation = 0.5), 80 | HorizontalFlip(), 81 | RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)), 82 | RandomCrop(cropsize) 83 | ]) 84 | 85 | 86 | def __getitem__(self, idx): 87 | fn = self.imnames[idx] 88 | impth = self.imgs[fn] 89 | img = Image.open(impth) 90 | 91 | if self.mode != 'test': 92 | lbpth = self.labels[fn] 93 | label = Image.open(lbpth) 94 | 95 | if self.mode == 'train': 96 | im_lb = dict(im = img, lb = label) 97 | im_lb = self.trans_train(im_lb) 98 | img, label = im_lb['im'], im_lb['lb'] 99 | 100 | img = self.to_tensor(img) 101 | label = np.array(label).astype(np.int64)[np.newaxis, :] 102 | label = self.convert_labels(label) 103 | return img, label 104 | elif self.mode == 'val': 105 | img = self.to_tensor(img) 106 | 107 | if self.label_scale != 1.0: 108 | H,W = label.size 109 | label = label.resize((int(H*self.label_scale), int(W*self.label_scale)), 110 | Image.NEAREST) 111 | 112 | label = np.array(label).astype(np.int64)[np.newaxis, :] 113 | label = self.convert_labels(label) 114 | return img, label 115 | else: 116 | img = self.to_tensor(img) 117 | return img, impth 118 | 119 | 120 | 121 | def __len__(self): 122 | return self.len 123 | 124 | 125 | def convert_labels(self, label): 126 | for k, v in self.lb_map.items(): 127 | label[label == k] = v 128 | return label 129 | 130 | 131 | 132 | if __name__ == "__main__": 133 | from tqdm import tqdm 134 | ds = CityScapes('./data/', n_classes=19, mode='val') 135 | uni = [] 136 | for im, lb in tqdm(ds): 137 | lb_uni = np.unique(lb).tolist() 138 | uni.extend(lb_uni) 139 | print(uni) 140 | print(set(uni)) 141 | 142 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/cityscapes_info.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "hasInstances": false, 4 | "category": "void", 5 | "catid": 0, 6 | "name": "unlabeled", 7 | "ignoreInEval": true, 8 | "id": 0, 9 | "color": [ 10 | 0, 11 | 0, 12 | 0 13 | ], 14 | "trainId": 255 15 | }, 16 | { 17 | "hasInstances": false, 18 | "category": "void", 19 | "catid": 0, 20 | "name": "ego vehicle", 21 | "ignoreInEval": true, 22 | "id": 1, 23 | "color": [ 24 | 0, 25 | 0, 26 | 0 27 | ], 28 | "trainId": 255 29 | }, 30 | { 31 | "hasInstances": false, 32 | "category": "void", 33 | "catid": 0, 34 | "name": "rectification border", 35 | "ignoreInEval": true, 36 | "id": 2, 37 | "color": [ 38 | 0, 39 | 0, 40 | 0 41 | ], 42 | "trainId": 255 43 | }, 44 | { 45 | "hasInstances": false, 46 | "category": "void", 47 | "catid": 0, 48 | "name": "out of roi", 49 | "ignoreInEval": true, 50 | "id": 3, 51 | "color": [ 52 | 0, 53 | 0, 54 | 0 55 | ], 56 | "trainId": 255 57 | }, 58 | { 59 | "hasInstances": false, 60 | "category": "void", 61 | "catid": 0, 62 | "name": "static", 63 | "ignoreInEval": true, 64 | "id": 4, 65 | "color": [ 66 | 0, 67 | 0, 68 | 0 69 | ], 70 | "trainId": 255 71 | }, 72 | { 73 | "hasInstances": false, 74 | "category": "void", 75 | "catid": 0, 76 | "name": "dynamic", 77 | "ignoreInEval": true, 78 | "id": 5, 79 | "color": [ 80 | 111, 81 | 74, 82 | 0 83 | ], 84 | "trainId": 255 85 | }, 86 | { 87 | "hasInstances": false, 88 | "category": "void", 89 | "catid": 0, 90 | "name": "ground", 91 | "ignoreInEval": true, 92 | "id": 6, 93 | "color": [ 94 | 81, 95 | 0, 96 | 81 97 | ], 98 | "trainId": 255 99 | }, 100 | { 101 | "hasInstances": false, 102 | "category": "flat", 103 | "catid": 1, 104 | "name": "road", 105 | "ignoreInEval": false, 106 | "id": 7, 107 | "color": [ 108 | 128, 109 | 64, 110 | 128 111 | ], 112 | "trainId": 0 113 | }, 114 | { 115 | "hasInstances": false, 116 | "category": "flat", 117 | "catid": 1, 118 | "name": "sidewalk", 119 | "ignoreInEval": false, 120 | "id": 8, 121 | "color": [ 122 | 244, 123 | 35, 124 | 232 125 | ], 126 | "trainId": 1 127 | }, 128 | { 129 | "hasInstances": false, 130 | "category": "flat", 131 | "catid": 1, 132 | "name": "parking", 133 | "ignoreInEval": true, 134 | "id": 9, 135 | "color": [ 136 | 250, 137 | 170, 138 | 160 139 | ], 140 | "trainId": 255 141 | }, 142 | { 143 | "hasInstances": false, 144 | "category": "flat", 145 | "catid": 1, 146 | "name": "rail track", 147 | "ignoreInEval": true, 148 | "id": 10, 149 | "color": [ 150 | 230, 151 | 150, 152 | 140 153 | ], 154 | "trainId": 255 155 | }, 156 | { 157 | "hasInstances": false, 158 | "category": "construction", 159 | "catid": 2, 160 | "name": "building", 161 | "ignoreInEval": false, 162 | "id": 11, 163 | "color": [ 164 | 70, 165 | 70, 166 | 70 167 | ], 168 | "trainId": 2 169 | }, 170 | { 171 | "hasInstances": false, 172 | "category": "construction", 173 | "catid": 2, 174 | "name": "wall", 175 | "ignoreInEval": false, 176 | "id": 12, 177 | "color": [ 178 | 102, 179 | 102, 180 | 156 181 | ], 182 | "trainId": 3 183 | }, 184 | { 185 | "hasInstances": false, 186 | "category": "construction", 187 | "catid": 2, 188 | "name": "fence", 189 | "ignoreInEval": false, 190 | "id": 13, 191 | "color": [ 192 | 190, 193 | 153, 194 | 153 195 | ], 196 | "trainId": 4 197 | }, 198 | { 199 | "hasInstances": false, 200 | "category": "construction", 201 | "catid": 2, 202 | "name": "guard rail", 203 | "ignoreInEval": true, 204 | "id": 14, 205 | "color": [ 206 | 180, 207 | 165, 208 | 180 209 | ], 210 | "trainId": 255 211 | }, 212 | { 213 | "hasInstances": false, 214 | "category": "construction", 215 | "catid": 2, 216 | "name": "bridge", 217 | "ignoreInEval": true, 218 | "id": 15, 219 | "color": [ 220 | 150, 221 | 100, 222 | 100 223 | ], 224 | "trainId": 255 225 | }, 226 | { 227 | "hasInstances": false, 228 | "category": "construction", 229 | "catid": 2, 230 | "name": "tunnel", 231 | "ignoreInEval": true, 232 | "id": 16, 233 | "color": [ 234 | 150, 235 | 120, 236 | 90 237 | ], 238 | "trainId": 255 239 | }, 240 | { 241 | "hasInstances": false, 242 | "category": "object", 243 | "catid": 3, 244 | "name": "pole", 245 | "ignoreInEval": false, 246 | "id": 17, 247 | "color": [ 248 | 153, 249 | 153, 250 | 153 251 | ], 252 | "trainId": 5 253 | }, 254 | { 255 | "hasInstances": false, 256 | "category": "object", 257 | "catid": 3, 258 | "name": "polegroup", 259 | "ignoreInEval": true, 260 | "id": 18, 261 | "color": [ 262 | 153, 263 | 153, 264 | 153 265 | ], 266 | "trainId": 255 267 | }, 268 | { 269 | "hasInstances": false, 270 | "category": "object", 271 | "catid": 3, 272 | "name": "traffic light", 273 | "ignoreInEval": false, 274 | "id": 19, 275 | "color": [ 276 | 250, 277 | 170, 278 | 30 279 | ], 280 | "trainId": 6 281 | }, 282 | { 283 | "hasInstances": false, 284 | "category": "object", 285 | "catid": 3, 286 | "name": "traffic sign", 287 | "ignoreInEval": false, 288 | "id": 20, 289 | "color": [ 290 | 220, 291 | 220, 292 | 0 293 | ], 294 | "trainId": 7 295 | }, 296 | { 297 | "hasInstances": false, 298 | "category": "nature", 299 | "catid": 4, 300 | "name": "vegetation", 301 | "ignoreInEval": false, 302 | "id": 21, 303 | "color": [ 304 | 107, 305 | 142, 306 | 35 307 | ], 308 | "trainId": 8 309 | }, 310 | { 311 | "hasInstances": false, 312 | "category": "nature", 313 | "catid": 4, 314 | "name": "terrain", 315 | "ignoreInEval": false, 316 | "id": 22, 317 | "color": [ 318 | 152, 319 | 251, 320 | 152 321 | ], 322 | "trainId": 9 323 | }, 324 | { 325 | "hasInstances": false, 326 | "category": "sky", 327 | "catid": 5, 328 | "name": "sky", 329 | "ignoreInEval": false, 330 | "id": 23, 331 | "color": [ 332 | 70, 333 | 130, 334 | 180 335 | ], 336 | "trainId": 10 337 | }, 338 | { 339 | "hasInstances": true, 340 | "category": "human", 341 | "catid": 6, 342 | "name": "person", 343 | "ignoreInEval": false, 344 | "id": 24, 345 | "color": [ 346 | 220, 347 | 20, 348 | 60 349 | ], 350 | "trainId": 11 351 | }, 352 | { 353 | "hasInstances": true, 354 | "category": "human", 355 | "catid": 6, 356 | "name": "rider", 357 | "ignoreInEval": false, 358 | "id": 25, 359 | "color": [ 360 | 255, 361 | 0, 362 | 0 363 | ], 364 | "trainId": 12 365 | }, 366 | { 367 | "hasInstances": true, 368 | "category": "vehicle", 369 | "catid": 7, 370 | "name": "car", 371 | "ignoreInEval": false, 372 | "id": 26, 373 | "color": [ 374 | 0, 375 | 0, 376 | 142 377 | ], 378 | "trainId": 13 379 | }, 380 | { 381 | "hasInstances": true, 382 | "category": "vehicle", 383 | "catid": 7, 384 | "name": "truck", 385 | "ignoreInEval": false, 386 | "id": 27, 387 | "color": [ 388 | 0, 389 | 0, 390 | 70 391 | ], 392 | "trainId": 14 393 | }, 394 | { 395 | "hasInstances": true, 396 | "category": "vehicle", 397 | "catid": 7, 398 | "name": "bus", 399 | "ignoreInEval": false, 400 | "id": 28, 401 | "color": [ 402 | 0, 403 | 60, 404 | 100 405 | ], 406 | "trainId": 15 407 | }, 408 | { 409 | "hasInstances": true, 410 | "category": "vehicle", 411 | "catid": 7, 412 | "name": "caravan", 413 | "ignoreInEval": true, 414 | "id": 29, 415 | "color": [ 416 | 0, 417 | 0, 418 | 90 419 | ], 420 | "trainId": 255 421 | }, 422 | { 423 | "hasInstances": true, 424 | "category": "vehicle", 425 | "catid": 7, 426 | "name": "trailer", 427 | "ignoreInEval": true, 428 | "id": 30, 429 | "color": [ 430 | 0, 431 | 0, 432 | 110 433 | ], 434 | "trainId": 255 435 | }, 436 | { 437 | "hasInstances": true, 438 | "category": "vehicle", 439 | "catid": 7, 440 | "name": "train", 441 | "ignoreInEval": false, 442 | "id": 31, 443 | "color": [ 444 | 0, 445 | 80, 446 | 100 447 | ], 448 | "trainId": 16 449 | }, 450 | { 451 | "hasInstances": true, 452 | "category": "vehicle", 453 | "catid": 7, 454 | "name": "motorcycle", 455 | "ignoreInEval": false, 456 | "id": 32, 457 | "color": [ 458 | 0, 459 | 0, 460 | 230 461 | ], 462 | "trainId": 17 463 | }, 464 | { 465 | "hasInstances": true, 466 | "category": "vehicle", 467 | "catid": 7, 468 | "name": "bicycle", 469 | "ignoreInEval": false, 470 | "id": 33, 471 | "color": [ 472 | 119, 473 | 11, 474 | 32 475 | ], 476 | "trainId": 18 477 | }, 478 | { 479 | "hasInstances": false, 480 | "category": "vehicle", 481 | "catid": 7, 482 | "name": "license plate", 483 | "ignoreInEval": true, 484 | "id": -1, 485 | "color": [ 486 | 0, 487 | 0, 488 | 142 489 | ], 490 | "trainId": -1 491 | } 492 | ] -------------------------------------------------------------------------------- /ShelfNet18_realtime/evaluate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | from logger import setup_logger 4 | from cityscapes import CityScapes 5 | 6 | import torch 7 | import torch.nn as nn 8 | from torch.utils.data import DataLoader 9 | import torch.nn.functional as F 10 | import torch.distributed as dist 11 | 12 | import os 13 | import os.path as osp 14 | import logging 15 | import time 16 | import numpy as np 17 | from tqdm import tqdm 18 | import math 19 | from shelfnet import ShelfNet 20 | 21 | 22 | class MscEval(object): 23 | def __init__(self, 24 | model, 25 | dataloader, 26 | scales = [ 1.0], 27 | n_classes = 19, 28 | lb_ignore = 255, 29 | cropsize = 1024, 30 | flip = False, 31 | *args, **kwargs): 32 | self.scales = scales 33 | self.n_classes = n_classes 34 | self.lb_ignore = lb_ignore 35 | self.flip = flip 36 | self.cropsize = cropsize 37 | ## dataloader 38 | self.dl = dataloader 39 | self.net = model 40 | 41 | 42 | def pad_tensor(self, inten, size): 43 | N, C, H, W = inten.size() 44 | outten = torch.zeros(N, C, size[0], size[1]).cuda() 45 | outten.requires_grad = False 46 | margin_h, margin_w = size[0]-H, size[1]-W 47 | hst, hed = margin_h//2, margin_h//2+H 48 | wst, wed = margin_w//2, margin_w//2+W 49 | outten[:, :, hst:hed, wst:wed] = inten 50 | return outten, [hst, hed, wst, wed] 51 | 52 | 53 | def eval_chip(self, crop): 54 | with torch.no_grad(): 55 | out = self.net(crop)[0] 56 | prob = F.softmax(out, 1) 57 | if self.flip: 58 | crop = torch.flip(crop, dims=(3,)) 59 | out = self.net(crop)[0] 60 | out = torch.flip(out, dims=(3,)) 61 | prob += F.softmax(out, 1) 62 | #prob = torch.exp(prob) 63 | return prob 64 | 65 | 66 | def crop_eval(self, im): 67 | cropsize = self.cropsize 68 | stride_rate = 1.0 69 | N, C, H, W = im.size() 70 | long_size, short_size = (H,W) if H>W else (W,H) 71 | if long_size < cropsize: 72 | im, indices = self.pad_tensor(im, (cropsize, cropsize)) 73 | prob = self.eval_chip(im) 74 | prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]] 75 | else: 76 | stride = math.ceil(cropsize*stride_rate) 77 | if short_size < cropsize: 78 | if H < W: 79 | im, indices = self.pad_tensor(im, (cropsize, W)) 80 | else: 81 | im, indices = self.pad_tensor(im, (H, cropsize)) 82 | N, C, H, W = im.size() 83 | n_x = math.ceil((W-cropsize)/stride)+1 84 | n_y = math.ceil((H-cropsize)/stride)+1 85 | prob = torch.zeros(N, self.n_classes, H, W).cuda() 86 | prob.requires_grad = False 87 | for iy in range(n_y): 88 | for ix in range(n_x): 89 | hed, wed = min(H, stride*iy+cropsize), min(W, stride*ix+cropsize) 90 | hst, wst = hed-cropsize, wed-cropsize 91 | chip = im[:, :, hst:hed, wst:wed] 92 | prob_chip = self.eval_chip(chip) 93 | prob[:, :, hst:hed, wst:wed] += prob_chip 94 | if short_size < cropsize: 95 | prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]] 96 | return prob 97 | 98 | 99 | def scale_crop_eval(self, im, scale): 100 | N, C, H, W = im.size() 101 | new_hw = [int(H*scale), int(W*scale)] 102 | im = F.interpolate(im, new_hw, mode='bilinear', align_corners=True) 103 | prob = self.crop_eval(im) 104 | prob = F.interpolate(prob, (H, W), mode='bilinear', align_corners=True) 105 | return prob 106 | 107 | 108 | def compute_hist(self, pred, lb): 109 | n_classes = self.n_classes 110 | ignore_idx = self.lb_ignore 111 | keep = np.logical_not(lb==ignore_idx) 112 | merge = pred[keep] * n_classes + lb[keep] 113 | hist = np.bincount(merge, minlength=n_classes**2) 114 | hist = hist.reshape((n_classes, n_classes)) 115 | return hist 116 | 117 | 118 | def evaluate(self): 119 | ## evaluate 120 | n_classes = self.n_classes 121 | hist = np.zeros((n_classes, n_classes), dtype=np.float32) 122 | dloader = tqdm(self.dl) 123 | if dist.is_initialized() and not dist.get_rank()==0: 124 | dloader = self.dl 125 | for i, (imgs, label) in enumerate(dloader): 126 | N, _, H, W = label.shape 127 | probs = torch.zeros((N, self.n_classes, H, W)) 128 | probs.requires_grad = False 129 | imgs = imgs.cuda() 130 | for sc in self.scales: 131 | prob = self.scale_crop_eval(imgs, sc) 132 | probs += prob.detach().cpu() 133 | probs = probs.data.numpy() 134 | preds = np.argmax(probs, axis=1) 135 | 136 | hist_once = self.compute_hist(preds, label.data.numpy().squeeze(1)) 137 | hist = hist + hist_once 138 | IOUs = np.diag(hist) / (np.sum(hist, axis=0)+np.sum(hist, axis=1)-np.diag(hist)) 139 | mIOU = np.mean(IOUs) 140 | return mIOU 141 | 142 | 143 | def evaluate(respth='./res', dspth='/data2/.encoding/data/cityscapes', checkpoint=None): 144 | ## logger 145 | logger = logging.getLogger() 146 | 147 | ## model 148 | logger.info('\n') 149 | logger.info('===='*20) 150 | logger.info('evaluating the model ...\n') 151 | logger.info('setup and restore model') 152 | n_classes = 19 153 | net = ShelfNet(n_classes=n_classes) 154 | 155 | if checkpoint is None: 156 | save_pth = osp.join(respth, 'model_final.pth') 157 | else: 158 | save_pth = checkpoint 159 | 160 | net.load_state_dict(torch.load(save_pth)) 161 | net.cuda() 162 | net.eval() 163 | 164 | ## dataset 165 | batchsize = 5 166 | n_workers = 2 167 | dsval = CityScapes(dspth, mode='val') 168 | dl = DataLoader(dsval, 169 | batch_size = batchsize, 170 | shuffle = False, 171 | num_workers = n_workers, 172 | drop_last = False) 173 | 174 | ## evaluator 175 | logger.info('compute the mIOU') 176 | evaluator = MscEval(net, dl, scales=[1.0],flip=False) 177 | ## eval 178 | mIOU = evaluator.evaluate() 179 | logger.info('mIOU is: {:.6f}'.format(mIOU)) 180 | 181 | 182 | 183 | if __name__ == "__main__": 184 | setup_logger('./res') 185 | evaluate() 186 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import os.path as osp 6 | import time 7 | import sys 8 | import logging 9 | 10 | import torch.distributed as dist 11 | 12 | 13 | def setup_logger(logpth): 14 | logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S')) 15 | logfile = osp.join(logpth, logfile) 16 | FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s' 17 | log_level = logging.INFO 18 | if dist.is_initialized() and not dist.get_rank()==0: 19 | log_level = logging.ERROR 20 | logging.basicConfig(level=log_level, format=FORMAT, filename=logfile) 21 | logging.root.addHandler(logging.StreamHandler()) 22 | 23 | 24 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | import numpy as np 10 | 11 | 12 | class OhemCELoss(nn.Module): 13 | def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs): 14 | super(OhemCELoss, self).__init__() 15 | self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda() 16 | self.n_min = n_min 17 | self.ignore_lb = ignore_lb 18 | self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none') 19 | 20 | def forward(self, logits, labels): 21 | N, C, H, W = logits.size() 22 | loss = self.criteria(logits, labels).view(-1) 23 | loss, _ = torch.sort(loss, descending=True) 24 | if loss[self.n_min] > self.thresh: 25 | loss = loss[loss>self.thresh] 26 | else: 27 | loss = loss[:self.n_min] 28 | return torch.mean(loss) 29 | 30 | 31 | class SoftmaxFocalLoss(nn.Module): 32 | def __init__(self, gamma, ignore_lb=255, *args, **kwargs): 33 | super(FocalLoss, self).__init__() 34 | self.gamma = gamma 35 | self.nll = nn.NLLLoss(ignore_index=ignore_lb) 36 | 37 | def forward(self, logits, labels): 38 | scores = F.softmax(logits, dim=1) 39 | factor = torch.pow(1.-scores, self.gamma) 40 | log_score = F.log_softmax(logits, dim=1) 41 | log_score = factor * log_score 42 | loss = self.nll(log_score, labels) 43 | return loss 44 | 45 | 46 | if __name__ == '__main__': 47 | torch.manual_seed(15) 48 | criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda() 49 | criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda() 50 | net1 = nn.Sequential( 51 | nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), 52 | ) 53 | net1.cuda() 54 | net1.train() 55 | net2 = nn.Sequential( 56 | nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), 57 | ) 58 | net2.cuda() 59 | net2.train() 60 | 61 | with torch.no_grad(): 62 | inten = torch.randn(16, 3, 20, 20).cuda() 63 | lbs = torch.randint(0, 19, [16, 20, 20]).cuda() 64 | lbs[1, :, :] = 255 65 | 66 | logits1 = net1(inten) 67 | logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear') 68 | logits2 = net2(inten) 69 | logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear') 70 | 71 | loss1 = criteria1(logits1, lbs) 72 | loss2 = criteria2(logits2, lbs) 73 | loss = loss1 + loss2 74 | print(loss.detach().cpu()) 75 | loss.backward() 76 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .bn import ABN, InPlaceABN, InPlaceABNSync 2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE 3 | from .misc import GlobalAvgPool2d, SingleGPU 4 | from .residual import IdentityResidualBlock 5 | from .dense import DenseModule 6 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/modules/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/__pycache__/bn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/modules/__pycache__/bn.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/__pycache__/dense.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/modules/__pycache__/dense.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/__pycache__/functions.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/modules/__pycache__/functions.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/__pycache__/misc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/modules/__pycache__/misc.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/__pycache__/residual.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet18_realtime/modules/__pycache__/residual.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/bn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as functional 4 | 5 | try: 6 | from queue import Queue 7 | except ImportError: 8 | from Queue import Queue 9 | 10 | from .functions import * 11 | 12 | 13 | class ABN(nn.Module): 14 | """Activated Batch Normalization 15 | 16 | This gathers a `BatchNorm2d` and an activation function in a single module 17 | """ 18 | 19 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 20 | """Creates an Activated Batch Normalization module 21 | 22 | Parameters 23 | ---------- 24 | num_features : int 25 | Number of feature channels in the input and output. 26 | eps : float 27 | Small constant to prevent numerical issues. 28 | momentum : float 29 | Momentum factor applied to compute running statistics as. 30 | affine : bool 31 | If `True` apply learned scale and shift transformation after normalization. 32 | activation : str 33 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 34 | slope : float 35 | Negative slope for the `leaky_relu` activation. 36 | """ 37 | super(ABN, self).__init__() 38 | self.num_features = num_features 39 | self.affine = affine 40 | self.eps = eps 41 | self.momentum = momentum 42 | self.activation = activation 43 | self.slope = slope 44 | if self.affine: 45 | self.weight = nn.Parameter(torch.ones(num_features)) 46 | self.bias = nn.Parameter(torch.zeros(num_features)) 47 | else: 48 | self.register_parameter('weight', None) 49 | self.register_parameter('bias', None) 50 | self.register_buffer('running_mean', torch.zeros(num_features)) 51 | self.register_buffer('running_var', torch.ones(num_features)) 52 | self.reset_parameters() 53 | 54 | def reset_parameters(self): 55 | nn.init.constant_(self.running_mean, 0) 56 | nn.init.constant_(self.running_var, 1) 57 | if self.affine: 58 | nn.init.constant_(self.weight, 1) 59 | nn.init.constant_(self.bias, 0) 60 | 61 | def forward(self, x): 62 | x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, 63 | self.training, self.momentum, self.eps) 64 | 65 | if self.activation == ACT_RELU: 66 | return functional.relu(x, inplace=True) 67 | elif self.activation == ACT_LEAKY_RELU: 68 | return functional.leaky_relu(x, negative_slope=self.slope, inplace=True) 69 | elif self.activation == ACT_ELU: 70 | return functional.elu(x, inplace=True) 71 | else: 72 | return x 73 | 74 | def __repr__(self): 75 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 76 | ' affine={affine}, activation={activation}' 77 | if self.activation == "leaky_relu": 78 | rep += ', slope={slope})' 79 | else: 80 | rep += ')' 81 | return rep.format(name=self.__class__.__name__, **self.__dict__) 82 | 83 | 84 | class InPlaceABN(ABN): 85 | """InPlace Activated Batch Normalization""" 86 | 87 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 88 | """Creates an InPlace Activated Batch Normalization module 89 | 90 | Parameters 91 | ---------- 92 | num_features : int 93 | Number of feature channels in the input and output. 94 | eps : float 95 | Small constant to prevent numerical issues. 96 | momentum : float 97 | Momentum factor applied to compute running statistics as. 98 | affine : bool 99 | If `True` apply learned scale and shift transformation after normalization. 100 | activation : str 101 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 102 | slope : float 103 | Negative slope for the `leaky_relu` activation. 104 | """ 105 | super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope) 106 | 107 | def forward(self, x): 108 | return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var, 109 | self.training, self.momentum, self.eps, self.activation, self.slope) 110 | 111 | 112 | class InPlaceABNSync(ABN): 113 | """InPlace Activated Batch Normalization with cross-GPU synchronization 114 | This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`. 115 | """ 116 | 117 | def forward(self, x): 118 | return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var, 119 | self.training, self.momentum, self.eps, self.activation, self.slope) 120 | 121 | def __repr__(self): 122 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 123 | ' affine={affine}, activation={activation}' 124 | if self.activation == "leaky_relu": 125 | rep += ', slope={slope})' 126 | else: 127 | rep += ')' 128 | return rep.format(name=self.__class__.__name__, **self.__dict__) 129 | 130 | 131 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/deeplab.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as functional 4 | 5 | from models._util import try_index 6 | from .bn import ABN 7 | 8 | 9 | class DeeplabV3(nn.Module): 10 | def __init__(self, 11 | in_channels, 12 | out_channels, 13 | hidden_channels=256, 14 | dilations=(12, 24, 36), 15 | norm_act=ABN, 16 | pooling_size=None): 17 | super(DeeplabV3, self).__init__() 18 | self.pooling_size = pooling_size 19 | 20 | self.map_convs = nn.ModuleList([ 21 | nn.Conv2d(in_channels, hidden_channels, 1, bias=False), 22 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]), 23 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]), 24 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2]) 25 | ]) 26 | self.map_bn = norm_act(hidden_channels * 4) 27 | 28 | self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False) 29 | self.global_pooling_bn = norm_act(hidden_channels) 30 | 31 | self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False) 32 | self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False) 33 | self.red_bn = norm_act(out_channels) 34 | 35 | self.reset_parameters(self.map_bn.activation, self.map_bn.slope) 36 | 37 | def reset_parameters(self, activation, slope): 38 | gain = nn.init.calculate_gain(activation, slope) 39 | for m in self.modules(): 40 | if isinstance(m, nn.Conv2d): 41 | nn.init.xavier_normal_(m.weight.data, gain) 42 | if hasattr(m, "bias") and m.bias is not None: 43 | nn.init.constant_(m.bias, 0) 44 | elif isinstance(m, ABN): 45 | if hasattr(m, "weight") and m.weight is not None: 46 | nn.init.constant_(m.weight, 1) 47 | if hasattr(m, "bias") and m.bias is not None: 48 | nn.init.constant_(m.bias, 0) 49 | 50 | def forward(self, x): 51 | # Map convolutions 52 | out = torch.cat([m(x) for m in self.map_convs], dim=1) 53 | out = self.map_bn(out) 54 | out = self.red_conv(out) 55 | 56 | # Global pooling 57 | pool = self._global_pooling(x) 58 | pool = self.global_pooling_conv(pool) 59 | pool = self.global_pooling_bn(pool) 60 | pool = self.pool_red_conv(pool) 61 | if self.training or self.pooling_size is None: 62 | pool = pool.repeat(1, 1, x.size(2), x.size(3)) 63 | 64 | out += pool 65 | out = self.red_bn(out) 66 | return out 67 | 68 | def _global_pooling(self, x): 69 | if self.training or self.pooling_size is None: 70 | pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1) 71 | pool = pool.view(x.size(0), x.size(1), 1, 1) 72 | else: 73 | pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]), 74 | min(try_index(self.pooling_size, 1), x.shape[3])) 75 | padding = ( 76 | (pooling_size[1] - 1) // 2, 77 | (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1, 78 | (pooling_size[0] - 1) // 2, 79 | (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1 80 | ) 81 | 82 | pool = functional.avg_pool2d(x, pooling_size, stride=1) 83 | pool = functional.pad(pool, pad=padding, mode="replicate") 84 | return pool 85 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/dense.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .bn import ABN 7 | 8 | 9 | class DenseModule(nn.Module): 10 | def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1): 11 | super(DenseModule, self).__init__() 12 | self.in_channels = in_channels 13 | self.growth = growth 14 | self.layers = layers 15 | 16 | self.convs1 = nn.ModuleList() 17 | self.convs3 = nn.ModuleList() 18 | for i in range(self.layers): 19 | self.convs1.append(nn.Sequential(OrderedDict([ 20 | ("bn", norm_act(in_channels)), 21 | ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False)) 22 | ]))) 23 | self.convs3.append(nn.Sequential(OrderedDict([ 24 | ("bn", norm_act(self.growth * bottleneck_factor)), 25 | ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False, 26 | dilation=dilation)) 27 | ]))) 28 | in_channels += self.growth 29 | 30 | @property 31 | def out_channels(self): 32 | return self.in_channels + self.growth * self.layers 33 | 34 | def forward(self, x): 35 | inputs = [x] 36 | for i in range(self.layers): 37 | x = torch.cat(inputs, dim=1) 38 | x = self.convs1[i](x) 39 | x = self.convs3[i](x) 40 | inputs += [x] 41 | 42 | return torch.cat(inputs, dim=1) 43 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/functions.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | import torch 3 | import torch.distributed as dist 4 | import torch.autograd as autograd 5 | import torch.cuda.comm as comm 6 | from torch.autograd.function import once_differentiable 7 | from torch.utils.cpp_extension import load 8 | 9 | _src_path = path.join(path.dirname(path.abspath(__file__)), "src") 10 | _backend = load(name="inplace_abn", 11 | extra_cflags=["-O3"], 12 | sources=[path.join(_src_path, f) for f in [ 13 | "inplace_abn.cpp", 14 | "inplace_abn_cpu.cpp", 15 | "inplace_abn_cuda.cu", 16 | "inplace_abn_cuda_half.cu" 17 | ]], 18 | extra_cuda_cflags=["--expt-extended-lambda"]) 19 | 20 | # Activation names 21 | ACT_RELU = "relu" 22 | ACT_LEAKY_RELU = "leaky_relu" 23 | ACT_ELU = "elu" 24 | ACT_NONE = "none" 25 | 26 | 27 | def _check(fn, *args, **kwargs): 28 | success = fn(*args, **kwargs) 29 | if not success: 30 | raise RuntimeError("CUDA Error encountered in {}".format(fn)) 31 | 32 | 33 | def _broadcast_shape(x): 34 | out_size = [] 35 | for i, s in enumerate(x.size()): 36 | if i != 1: 37 | out_size.append(1) 38 | else: 39 | out_size.append(s) 40 | return out_size 41 | 42 | 43 | def _reduce(x): 44 | if len(x.size()) == 2: 45 | return x.sum(dim=0) 46 | else: 47 | n, c = x.size()[0:2] 48 | return x.contiguous().view((n, c, -1)).sum(2).sum(0) 49 | 50 | 51 | def _count_samples(x): 52 | count = 1 53 | for i, s in enumerate(x.size()): 54 | if i != 1: 55 | count *= s 56 | return count 57 | 58 | 59 | def _act_forward(ctx, x): 60 | if ctx.activation == ACT_LEAKY_RELU: 61 | _backend.leaky_relu_forward(x, ctx.slope) 62 | elif ctx.activation == ACT_ELU: 63 | _backend.elu_forward(x) 64 | elif ctx.activation == ACT_NONE: 65 | pass 66 | 67 | 68 | def _act_backward(ctx, x, dx): 69 | if ctx.activation == ACT_LEAKY_RELU: 70 | _backend.leaky_relu_backward(x, dx, ctx.slope) 71 | elif ctx.activation == ACT_ELU: 72 | _backend.elu_backward(x, dx) 73 | elif ctx.activation == ACT_NONE: 74 | pass 75 | 76 | 77 | class InPlaceABN(autograd.Function): 78 | @staticmethod 79 | def forward(ctx, x, weight, bias, running_mean, running_var, 80 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): 81 | # Save context 82 | ctx.training = training 83 | ctx.momentum = momentum 84 | ctx.eps = eps 85 | ctx.activation = activation 86 | ctx.slope = slope 87 | ctx.affine = weight is not None and bias is not None 88 | 89 | # Prepare inputs 90 | count = _count_samples(x) 91 | x = x.contiguous() 92 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 93 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 94 | 95 | if ctx.training: 96 | mean, var = _backend.mean_var(x) 97 | 98 | # Update running stats 99 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 100 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1)) 101 | 102 | # Mark in-place modified tensors 103 | ctx.mark_dirty(x, running_mean, running_var) 104 | else: 105 | mean, var = running_mean.contiguous(), running_var.contiguous() 106 | ctx.mark_dirty(x) 107 | 108 | # BN forward + activation 109 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 110 | _act_forward(ctx, x) 111 | 112 | # Output 113 | ctx.var = var 114 | ctx.save_for_backward(x, var, weight, bias) 115 | return x 116 | 117 | @staticmethod 118 | @once_differentiable 119 | def backward(ctx, dz): 120 | z, var, weight, bias = ctx.saved_tensors 121 | dz = dz.contiguous() 122 | 123 | # Undo activation 124 | _act_backward(ctx, z, dz) 125 | 126 | if ctx.training: 127 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 128 | else: 129 | # TODO: implement simplified CUDA backward for inference mode 130 | edz = dz.new_zeros(dz.size(1)) 131 | eydz = dz.new_zeros(dz.size(1)) 132 | 133 | dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 134 | dweight = eydz * weight.sign() if ctx.affine else None 135 | dbias = edz if ctx.affine else None 136 | 137 | return dx, dweight, dbias, None, None, None, None, None, None, None 138 | 139 | class InPlaceABNSync(autograd.Function): 140 | @classmethod 141 | def forward(cls, ctx, x, weight, bias, running_mean, running_var, 142 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True): 143 | # Save context 144 | ctx.training = training 145 | ctx.momentum = momentum 146 | ctx.eps = eps 147 | ctx.activation = activation 148 | ctx.slope = slope 149 | ctx.affine = weight is not None and bias is not None 150 | 151 | # Prepare inputs 152 | ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1 153 | 154 | #count = _count_samples(x) 155 | batch_size = x.new_tensor([x.shape[0]],dtype=torch.long) 156 | 157 | x = x.contiguous() 158 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 159 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 160 | 161 | if ctx.training: 162 | mean, var = _backend.mean_var(x) 163 | if ctx.world_size>1: 164 | # get global batch size 165 | if equal_batches: 166 | batch_size *= ctx.world_size 167 | else: 168 | dist.all_reduce(batch_size, dist.ReduceOp.SUM) 169 | 170 | ctx.factor = x.shape[0]/float(batch_size.item()) 171 | 172 | mean_all = mean.clone() * ctx.factor 173 | dist.all_reduce(mean_all, dist.ReduceOp.SUM) 174 | 175 | var_all = (var + (mean - mean_all) ** 2) * ctx.factor 176 | dist.all_reduce(var_all, dist.ReduceOp.SUM) 177 | 178 | mean = mean_all 179 | var = var_all 180 | 181 | # Update running stats 182 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 183 | count = batch_size.item() * x.view(x.shape[0],x.shape[1],-1).shape[-1] 184 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1))) 185 | 186 | # Mark in-place modified tensors 187 | ctx.mark_dirty(x, running_mean, running_var) 188 | else: 189 | mean, var = running_mean.contiguous(), running_var.contiguous() 190 | ctx.mark_dirty(x) 191 | 192 | # BN forward + activation 193 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 194 | _act_forward(ctx, x) 195 | 196 | # Output 197 | ctx.var = var 198 | ctx.save_for_backward(x, var, weight, bias) 199 | return x 200 | 201 | @staticmethod 202 | @once_differentiable 203 | def backward(ctx, dz): 204 | z, var, weight, bias = ctx.saved_tensors 205 | dz = dz.contiguous() 206 | 207 | # Undo activation 208 | _act_backward(ctx, z, dz) 209 | 210 | if ctx.training: 211 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 212 | edz_local = edz.clone() 213 | eydz_local = eydz.clone() 214 | 215 | if ctx.world_size>1: 216 | edz *= ctx.factor 217 | dist.all_reduce(edz, dist.ReduceOp.SUM) 218 | 219 | eydz *= ctx.factor 220 | dist.all_reduce(eydz, dist.ReduceOp.SUM) 221 | else: 222 | edz_local = edz = dz.new_zeros(dz.size(1)) 223 | eydz_local = eydz = dz.new_zeros(dz.size(1)) 224 | 225 | dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 226 | dweight = eydz_local * weight.sign() if ctx.affine else None 227 | dbias = edz_local if ctx.affine else None 228 | 229 | return dx, dweight, dbias, None, None, None, None, None, None, None 230 | 231 | inplace_abn = InPlaceABN.apply 232 | inplace_abn_sync = InPlaceABNSync.apply 233 | 234 | __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"] 235 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/misc.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.distributed as dist 4 | 5 | class GlobalAvgPool2d(nn.Module): 6 | def __init__(self): 7 | """Global average pooling over the input's spatial dimensions""" 8 | super(GlobalAvgPool2d, self).__init__() 9 | 10 | def forward(self, inputs): 11 | in_size = inputs.size() 12 | return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2) 13 | 14 | class SingleGPU(nn.Module): 15 | def __init__(self, module): 16 | super(SingleGPU, self).__init__() 17 | self.module=module 18 | 19 | def forward(self, input): 20 | return self.module(input.cuda(non_blocking=True)) 21 | 22 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/residual.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.nn as nn 4 | 5 | from .bn import ABN 6 | 7 | 8 | class IdentityResidualBlock(nn.Module): 9 | def __init__(self, 10 | in_channels, 11 | channels, 12 | stride=1, 13 | dilation=1, 14 | groups=1, 15 | norm_act=ABN, 16 | dropout=None): 17 | """Configurable identity-mapping residual block 18 | 19 | Parameters 20 | ---------- 21 | in_channels : int 22 | Number of input channels. 23 | channels : list of int 24 | Number of channels in the internal feature maps. Can either have two or three elements: if three construct 25 | a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then 26 | `3 x 3` then `1 x 1` convolutions. 27 | stride : int 28 | Stride of the first `3 x 3` convolution 29 | dilation : int 30 | Dilation to apply to the `3 x 3` convolutions. 31 | groups : int 32 | Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with 33 | bottleneck blocks. 34 | norm_act : callable 35 | Function to create normalization / activation Module. 36 | dropout: callable 37 | Function to create Dropout Module. 38 | """ 39 | super(IdentityResidualBlock, self).__init__() 40 | 41 | # Check parameters for inconsistencies 42 | if len(channels) != 2 and len(channels) != 3: 43 | raise ValueError("channels must contain either two or three values") 44 | if len(channels) == 2 and groups != 1: 45 | raise ValueError("groups > 1 are only valid if len(channels) == 3") 46 | 47 | is_bottleneck = len(channels) == 3 48 | need_proj_conv = stride != 1 or in_channels != channels[-1] 49 | 50 | self.bn1 = norm_act(in_channels) 51 | if not is_bottleneck: 52 | layers = [ 53 | ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False, 54 | dilation=dilation)), 55 | ("bn2", norm_act(channels[0])), 56 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 57 | dilation=dilation)) 58 | ] 59 | if dropout is not None: 60 | layers = layers[0:2] + [("dropout", dropout())] + layers[2:] 61 | else: 62 | layers = [ 63 | ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)), 64 | ("bn2", norm_act(channels[0])), 65 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 66 | groups=groups, dilation=dilation)), 67 | ("bn3", norm_act(channels[1])), 68 | ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)) 69 | ] 70 | if dropout is not None: 71 | layers = layers[0:4] + [("dropout", dropout())] + layers[4:] 72 | self.convs = nn.Sequential(OrderedDict(layers)) 73 | 74 | if need_proj_conv: 75 | self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False) 76 | 77 | def forward(self, x): 78 | if hasattr(self, "proj_conv"): 79 | bn1 = self.bn1(x) 80 | shortcut = self.proj_conv(bn1) 81 | else: 82 | shortcut = x.clone() 83 | bn1 = self.bn1(x) 84 | 85 | out = self.convs(bn1) 86 | out.add_(shortcut) 87 | 88 | return out 89 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/src/checks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT 6 | #ifndef AT_CHECK 7 | #define AT_CHECK AT_ASSERT 8 | #endif 9 | 10 | #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") 11 | #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") 12 | #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") 13 | 14 | #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 15 | #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/src/inplace_abn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | std::vector mean_var(at::Tensor x) { 8 | if (x.is_cuda()) { 9 | if (x.type().scalarType() == at::ScalarType::Half) { 10 | return mean_var_cuda_h(x); 11 | } else { 12 | return mean_var_cuda(x); 13 | } 14 | } else { 15 | return mean_var_cpu(x); 16 | } 17 | } 18 | 19 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 20 | bool affine, float eps) { 21 | if (x.is_cuda()) { 22 | if (x.type().scalarType() == at::ScalarType::Half) { 23 | return forward_cuda_h(x, mean, var, weight, bias, affine, eps); 24 | } else { 25 | return forward_cuda(x, mean, var, weight, bias, affine, eps); 26 | } 27 | } else { 28 | return forward_cpu(x, mean, var, weight, bias, affine, eps); 29 | } 30 | } 31 | 32 | std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 33 | bool affine, float eps) { 34 | if (z.is_cuda()) { 35 | if (z.type().scalarType() == at::ScalarType::Half) { 36 | return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps); 37 | } else { 38 | return edz_eydz_cuda(z, dz, weight, bias, affine, eps); 39 | } 40 | } else { 41 | return edz_eydz_cpu(z, dz, weight, bias, affine, eps); 42 | } 43 | } 44 | 45 | at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 46 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 47 | if (z.is_cuda()) { 48 | if (z.type().scalarType() == at::ScalarType::Half) { 49 | return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps); 50 | } else { 51 | return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps); 52 | } 53 | } else { 54 | return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps); 55 | } 56 | } 57 | 58 | void leaky_relu_forward(at::Tensor z, float slope) { 59 | at::leaky_relu_(z, slope); 60 | } 61 | 62 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) { 63 | if (z.is_cuda()) { 64 | if (z.type().scalarType() == at::ScalarType::Half) { 65 | return leaky_relu_backward_cuda_h(z, dz, slope); 66 | } else { 67 | return leaky_relu_backward_cuda(z, dz, slope); 68 | } 69 | } else { 70 | return leaky_relu_backward_cpu(z, dz, slope); 71 | } 72 | } 73 | 74 | void elu_forward(at::Tensor z) { 75 | at::elu_(z); 76 | } 77 | 78 | void elu_backward(at::Tensor z, at::Tensor dz) { 79 | if (z.is_cuda()) { 80 | return elu_backward_cuda(z, dz); 81 | } else { 82 | return elu_backward_cpu(z, dz); 83 | } 84 | } 85 | 86 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 87 | m.def("mean_var", &mean_var, "Mean and variance computation"); 88 | m.def("forward", &forward, "In-place forward computation"); 89 | m.def("edz_eydz", &edz_eydz, "First part of backward computation"); 90 | m.def("backward", &backward, "Second part of backward computation"); 91 | m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation"); 92 | m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion"); 93 | m.def("elu_forward", &elu_forward, "Elu forward computation"); 94 | m.def("elu_backward", &elu_backward, "Elu backward computation and inversion"); 95 | } 96 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/src/inplace_abn.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | std::vector mean_var_cpu(at::Tensor x); 8 | std::vector mean_var_cuda(at::Tensor x); 9 | std::vector mean_var_cuda_h(at::Tensor x); 10 | 11 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 12 | bool affine, float eps); 13 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 14 | bool affine, float eps); 15 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 16 | bool affine, float eps); 17 | 18 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 19 | bool affine, float eps); 20 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 21 | bool affine, float eps); 22 | std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 23 | bool affine, float eps); 24 | 25 | at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 26 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 27 | at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 28 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 29 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 30 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 31 | 32 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope); 33 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope); 34 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope); 35 | 36 | void elu_backward_cpu(at::Tensor z, at::Tensor dz); 37 | void elu_backward_cuda(at::Tensor z, at::Tensor dz); 38 | 39 | static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) { 40 | num = x.size(0); 41 | chn = x.size(1); 42 | sp = 1; 43 | for (int64_t i = 2; i < x.ndimension(); ++i) 44 | sp *= x.size(i); 45 | } 46 | 47 | /* 48 | * Specialized CUDA reduction functions for BN 49 | */ 50 | #ifdef __CUDACC__ 51 | 52 | #include "utils/cuda.cuh" 53 | 54 | template 55 | __device__ T reduce(Op op, int plane, int N, int S) { 56 | T sum = (T)0; 57 | for (int batch = 0; batch < N; ++batch) { 58 | for (int x = threadIdx.x; x < S; x += blockDim.x) { 59 | sum += op(batch, plane, x); 60 | } 61 | } 62 | 63 | // sum over NumThreads within a warp 64 | sum = warpSum(sum); 65 | 66 | // 'transpose', and reduce within warp again 67 | __shared__ T shared[32]; 68 | __syncthreads(); 69 | if (threadIdx.x % WARP_SIZE == 0) { 70 | shared[threadIdx.x / WARP_SIZE] = sum; 71 | } 72 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 73 | // zero out the other entries in shared 74 | shared[threadIdx.x] = (T)0; 75 | } 76 | __syncthreads(); 77 | if (threadIdx.x / WARP_SIZE == 0) { 78 | sum = warpSum(shared[threadIdx.x]); 79 | if (threadIdx.x == 0) { 80 | shared[0] = sum; 81 | } 82 | } 83 | __syncthreads(); 84 | 85 | // Everyone picks it up, should be broadcast into the whole gradInput 86 | return shared[0]; 87 | } 88 | #endif 89 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/src/inplace_abn_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "utils/checks.h" 6 | #include "inplace_abn.h" 7 | 8 | at::Tensor reduce_sum(at::Tensor x) { 9 | if (x.ndimension() == 2) { 10 | return x.sum(0); 11 | } else { 12 | auto x_view = x.view({x.size(0), x.size(1), -1}); 13 | return x_view.sum(-1).sum(0); 14 | } 15 | } 16 | 17 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { 18 | if (x.ndimension() == 2) { 19 | return v; 20 | } else { 21 | std::vector broadcast_size = {1, -1}; 22 | for (int64_t i = 2; i < x.ndimension(); ++i) 23 | broadcast_size.push_back(1); 24 | 25 | return v.view(broadcast_size); 26 | } 27 | } 28 | 29 | int64_t count(at::Tensor x) { 30 | int64_t count = x.size(0); 31 | for (int64_t i = 2; i < x.ndimension(); ++i) 32 | count *= x.size(i); 33 | 34 | return count; 35 | } 36 | 37 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) { 38 | if (affine) { 39 | return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z); 40 | } else { 41 | return z; 42 | } 43 | } 44 | 45 | std::vector mean_var_cpu(at::Tensor x) { 46 | auto num = count(x); 47 | auto mean = reduce_sum(x) / num; 48 | auto diff = x - broadcast_to(mean, x); 49 | auto var = reduce_sum(diff.pow(2)) / num; 50 | 51 | return {mean, var}; 52 | } 53 | 54 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 55 | bool affine, float eps) { 56 | auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var); 57 | auto mul = at::rsqrt(var + eps) * gamma; 58 | 59 | x.sub_(broadcast_to(mean, x)); 60 | x.mul_(broadcast_to(mul, x)); 61 | if (affine) x.add_(broadcast_to(bias, x)); 62 | 63 | return x; 64 | } 65 | 66 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 67 | bool affine, float eps) { 68 | auto edz = reduce_sum(dz); 69 | auto y = invert_affine(z, weight, bias, affine, eps); 70 | auto eydz = reduce_sum(y * dz); 71 | 72 | return {edz, eydz}; 73 | } 74 | 75 | at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 76 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 77 | auto y = invert_affine(z, weight, bias, affine, eps); 78 | auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps); 79 | 80 | auto num = count(z); 81 | auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz); 82 | return dx; 83 | } 84 | 85 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) { 86 | CHECK_CPU_INPUT(z); 87 | CHECK_CPU_INPUT(dz); 88 | 89 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] { 90 | int64_t count = z.numel(); 91 | auto *_z = z.data(); 92 | auto *_dz = dz.data(); 93 | 94 | for (int64_t i = 0; i < count; ++i) { 95 | if (_z[i] < 0) { 96 | _z[i] *= 1 / slope; 97 | _dz[i] *= slope; 98 | } 99 | } 100 | })); 101 | } 102 | 103 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) { 104 | CHECK_CPU_INPUT(z); 105 | CHECK_CPU_INPUT(dz); 106 | 107 | AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] { 108 | int64_t count = z.numel(); 109 | auto *_z = z.data(); 110 | auto *_dz = dz.data(); 111 | 112 | for (int64_t i = 0; i < count; ++i) { 113 | if (_z[i] < 0) { 114 | _z[i] = log1p(_z[i]); 115 | _dz[i] *= (_z[i] + 1.f); 116 | } 117 | } 118 | })); 119 | } 120 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/src/inplace_abn_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "utils/checks.h" 9 | #include "utils/cuda.cuh" 10 | #include "inplace_abn.h" 11 | 12 | #include 13 | 14 | // Operations for reduce 15 | template 16 | struct SumOp { 17 | __device__ SumOp(const T *t, int c, int s) 18 | : tensor(t), chn(c), sp(s) {} 19 | __device__ __forceinline__ T operator()(int batch, int plane, int n) { 20 | return tensor[(batch * chn + plane) * sp + n]; 21 | } 22 | const T *tensor; 23 | const int chn; 24 | const int sp; 25 | }; 26 | 27 | template 28 | struct VarOp { 29 | __device__ VarOp(T m, const T *t, int c, int s) 30 | : mean(m), tensor(t), chn(c), sp(s) {} 31 | __device__ __forceinline__ T operator()(int batch, int plane, int n) { 32 | T val = tensor[(batch * chn + plane) * sp + n]; 33 | return (val - mean) * (val - mean); 34 | } 35 | const T mean; 36 | const T *tensor; 37 | const int chn; 38 | const int sp; 39 | }; 40 | 41 | template 42 | struct GradOp { 43 | __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s) 44 | : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} 45 | __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { 46 | T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight; 47 | T _dz = dz[(batch * chn + plane) * sp + n]; 48 | return Pair(_dz, _y * _dz); 49 | } 50 | const T weight; 51 | const T bias; 52 | const T *z; 53 | const T *dz; 54 | const int chn; 55 | const int sp; 56 | }; 57 | 58 | /*********** 59 | * mean_var 60 | ***********/ 61 | 62 | template 63 | __global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) { 64 | int plane = blockIdx.x; 65 | T norm = T(1) / T(num * sp); 66 | 67 | T _mean = reduce>(SumOp(x, chn, sp), plane, num, sp) * norm; 68 | __syncthreads(); 69 | T _var = reduce>(VarOp(_mean, x, chn, sp), plane, num, sp) * norm; 70 | 71 | if (threadIdx.x == 0) { 72 | mean[plane] = _mean; 73 | var[plane] = _var; 74 | } 75 | } 76 | 77 | std::vector mean_var_cuda(at::Tensor x) { 78 | CHECK_CUDA_INPUT(x); 79 | 80 | // Extract dimensions 81 | int64_t num, chn, sp; 82 | get_dims(x, num, chn, sp); 83 | 84 | // Prepare output tensors 85 | auto mean = at::empty({chn}, x.options()); 86 | auto var = at::empty({chn}, x.options()); 87 | 88 | // Run kernel 89 | dim3 blocks(chn); 90 | dim3 threads(getNumThreads(sp)); 91 | auto stream = at::cuda::getCurrentCUDAStream(); 92 | AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] { 93 | mean_var_kernel<<>>( 94 | x.data(), 95 | mean.data(), 96 | var.data(), 97 | num, chn, sp); 98 | })); 99 | 100 | return {mean, var}; 101 | } 102 | 103 | /********** 104 | * forward 105 | **********/ 106 | 107 | template 108 | __global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias, 109 | bool affine, float eps, int num, int chn, int sp) { 110 | int plane = blockIdx.x; 111 | 112 | T _mean = mean[plane]; 113 | T _var = var[plane]; 114 | T _weight = affine ? abs(weight[plane]) + eps : T(1); 115 | T _bias = affine ? bias[plane] : T(0); 116 | 117 | T mul = rsqrt(_var + eps) * _weight; 118 | 119 | for (int batch = 0; batch < num; ++batch) { 120 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 121 | T _x = x[(batch * chn + plane) * sp + n]; 122 | T _y = (_x - _mean) * mul + _bias; 123 | 124 | x[(batch * chn + plane) * sp + n] = _y; 125 | } 126 | } 127 | } 128 | 129 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 130 | bool affine, float eps) { 131 | CHECK_CUDA_INPUT(x); 132 | CHECK_CUDA_INPUT(mean); 133 | CHECK_CUDA_INPUT(var); 134 | CHECK_CUDA_INPUT(weight); 135 | CHECK_CUDA_INPUT(bias); 136 | 137 | // Extract dimensions 138 | int64_t num, chn, sp; 139 | get_dims(x, num, chn, sp); 140 | 141 | // Run kernel 142 | dim3 blocks(chn); 143 | dim3 threads(getNumThreads(sp)); 144 | auto stream = at::cuda::getCurrentCUDAStream(); 145 | AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] { 146 | forward_kernel<<>>( 147 | x.data(), 148 | mean.data(), 149 | var.data(), 150 | weight.data(), 151 | bias.data(), 152 | affine, eps, num, chn, sp); 153 | })); 154 | 155 | return x; 156 | } 157 | 158 | /*********** 159 | * edz_eydz 160 | ***********/ 161 | 162 | template 163 | __global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias, 164 | T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) { 165 | int plane = blockIdx.x; 166 | 167 | T _weight = affine ? abs(weight[plane]) + eps : 1.f; 168 | T _bias = affine ? bias[plane] : 0.f; 169 | 170 | Pair res = reduce, GradOp>(GradOp(_weight, _bias, z, dz, chn, sp), plane, num, sp); 171 | __syncthreads(); 172 | 173 | if (threadIdx.x == 0) { 174 | edz[plane] = res.v1; 175 | eydz[plane] = res.v2; 176 | } 177 | } 178 | 179 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 180 | bool affine, float eps) { 181 | CHECK_CUDA_INPUT(z); 182 | CHECK_CUDA_INPUT(dz); 183 | CHECK_CUDA_INPUT(weight); 184 | CHECK_CUDA_INPUT(bias); 185 | 186 | // Extract dimensions 187 | int64_t num, chn, sp; 188 | get_dims(z, num, chn, sp); 189 | 190 | auto edz = at::empty({chn}, z.options()); 191 | auto eydz = at::empty({chn}, z.options()); 192 | 193 | // Run kernel 194 | dim3 blocks(chn); 195 | dim3 threads(getNumThreads(sp)); 196 | auto stream = at::cuda::getCurrentCUDAStream(); 197 | AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] { 198 | edz_eydz_kernel<<>>( 199 | z.data(), 200 | dz.data(), 201 | weight.data(), 202 | bias.data(), 203 | edz.data(), 204 | eydz.data(), 205 | affine, eps, num, chn, sp); 206 | })); 207 | 208 | return {edz, eydz}; 209 | } 210 | 211 | /*********** 212 | * backward 213 | ***********/ 214 | 215 | template 216 | __global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz, 217 | const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) { 218 | int plane = blockIdx.x; 219 | 220 | T _weight = affine ? abs(weight[plane]) + eps : 1.f; 221 | T _bias = affine ? bias[plane] : 0.f; 222 | T _var = var[plane]; 223 | T _edz = edz[plane]; 224 | T _eydz = eydz[plane]; 225 | 226 | T _mul = _weight * rsqrt(_var + eps); 227 | T count = T(num * sp); 228 | 229 | for (int batch = 0; batch < num; ++batch) { 230 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 231 | T _dz = dz[(batch * chn + plane) * sp + n]; 232 | T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight; 233 | 234 | dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul; 235 | } 236 | } 237 | } 238 | 239 | at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 240 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 241 | CHECK_CUDA_INPUT(z); 242 | CHECK_CUDA_INPUT(dz); 243 | CHECK_CUDA_INPUT(var); 244 | CHECK_CUDA_INPUT(weight); 245 | CHECK_CUDA_INPUT(bias); 246 | CHECK_CUDA_INPUT(edz); 247 | CHECK_CUDA_INPUT(eydz); 248 | 249 | // Extract dimensions 250 | int64_t num, chn, sp; 251 | get_dims(z, num, chn, sp); 252 | 253 | auto dx = at::zeros_like(z); 254 | 255 | // Run kernel 256 | dim3 blocks(chn); 257 | dim3 threads(getNumThreads(sp)); 258 | auto stream = at::cuda::getCurrentCUDAStream(); 259 | AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] { 260 | backward_kernel<<>>( 261 | z.data(), 262 | dz.data(), 263 | var.data(), 264 | weight.data(), 265 | bias.data(), 266 | edz.data(), 267 | eydz.data(), 268 | dx.data(), 269 | affine, eps, num, chn, sp); 270 | })); 271 | 272 | return dx; 273 | } 274 | 275 | /************** 276 | * activations 277 | **************/ 278 | 279 | template 280 | inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) { 281 | // Create thrust pointers 282 | thrust::device_ptr th_z = thrust::device_pointer_cast(z); 283 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); 284 | 285 | auto stream = at::cuda::getCurrentCUDAStream(); 286 | thrust::transform_if(thrust::cuda::par.on(stream), 287 | th_dz, th_dz + count, th_z, th_dz, 288 | [slope] __device__ (const T& dz) { return dz * slope; }, 289 | [] __device__ (const T& z) { return z < 0; }); 290 | thrust::transform_if(thrust::cuda::par.on(stream), 291 | th_z, th_z + count, th_z, 292 | [slope] __device__ (const T& z) { return z / slope; }, 293 | [] __device__ (const T& z) { return z < 0; }); 294 | } 295 | 296 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) { 297 | CHECK_CUDA_INPUT(z); 298 | CHECK_CUDA_INPUT(dz); 299 | 300 | int64_t count = z.numel(); 301 | 302 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { 303 | leaky_relu_backward_impl(z.data(), dz.data(), slope, count); 304 | })); 305 | } 306 | 307 | template 308 | inline void elu_backward_impl(T *z, T *dz, int64_t count) { 309 | // Create thrust pointers 310 | thrust::device_ptr th_z = thrust::device_pointer_cast(z); 311 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); 312 | 313 | auto stream = at::cuda::getCurrentCUDAStream(); 314 | thrust::transform_if(thrust::cuda::par.on(stream), 315 | th_dz, th_dz + count, th_z, th_z, th_dz, 316 | [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); }, 317 | [] __device__ (const T& z) { return z < 0; }); 318 | thrust::transform_if(thrust::cuda::par.on(stream), 319 | th_z, th_z + count, th_z, 320 | [] __device__ (const T& z) { return log1p(z); }, 321 | [] __device__ (const T& z) { return z < 0; }); 322 | } 323 | 324 | void elu_backward_cuda(at::Tensor z, at::Tensor dz) { 325 | CHECK_CUDA_INPUT(z); 326 | CHECK_CUDA_INPUT(dz); 327 | 328 | int64_t count = z.numel(); 329 | 330 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { 331 | elu_backward_impl(z.data(), dz.data(), count); 332 | })); 333 | } 334 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/src/inplace_abn_cuda_half.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "utils/checks.h" 8 | #include "utils/cuda.cuh" 9 | #include "inplace_abn.h" 10 | 11 | #include 12 | 13 | // Operations for reduce 14 | struct SumOpH { 15 | __device__ SumOpH(const half *t, int c, int s) 16 | : tensor(t), chn(c), sp(s) {} 17 | __device__ __forceinline__ float operator()(int batch, int plane, int n) { 18 | return __half2float(tensor[(batch * chn + plane) * sp + n]); 19 | } 20 | const half *tensor; 21 | const int chn; 22 | const int sp; 23 | }; 24 | 25 | struct VarOpH { 26 | __device__ VarOpH(float m, const half *t, int c, int s) 27 | : mean(m), tensor(t), chn(c), sp(s) {} 28 | __device__ __forceinline__ float operator()(int batch, int plane, int n) { 29 | const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]); 30 | return (t - mean) * (t - mean); 31 | } 32 | const float mean; 33 | const half *tensor; 34 | const int chn; 35 | const int sp; 36 | }; 37 | 38 | struct GradOpH { 39 | __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s) 40 | : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} 41 | __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { 42 | float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight; 43 | float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); 44 | return Pair(_dz, _y * _dz); 45 | } 46 | const float weight; 47 | const float bias; 48 | const half *z; 49 | const half *dz; 50 | const int chn; 51 | const int sp; 52 | }; 53 | 54 | /*********** 55 | * mean_var 56 | ***********/ 57 | 58 | __global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) { 59 | int plane = blockIdx.x; 60 | float norm = 1.f / static_cast(num * sp); 61 | 62 | float _mean = reduce(SumOpH(x, chn, sp), plane, num, sp) * norm; 63 | __syncthreads(); 64 | float _var = reduce(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm; 65 | 66 | if (threadIdx.x == 0) { 67 | mean[plane] = _mean; 68 | var[plane] = _var; 69 | } 70 | } 71 | 72 | std::vector mean_var_cuda_h(at::Tensor x) { 73 | CHECK_CUDA_INPUT(x); 74 | 75 | // Extract dimensions 76 | int64_t num, chn, sp; 77 | get_dims(x, num, chn, sp); 78 | 79 | // Prepare output tensors 80 | auto mean = at::empty({chn},x.options().dtype(at::kFloat)); 81 | auto var = at::empty({chn},x.options().dtype(at::kFloat)); 82 | 83 | // Run kernel 84 | dim3 blocks(chn); 85 | dim3 threads(getNumThreads(sp)); 86 | auto stream = at::cuda::getCurrentCUDAStream(); 87 | mean_var_kernel_h<<>>( 88 | reinterpret_cast(x.data()), 89 | mean.data(), 90 | var.data(), 91 | num, chn, sp); 92 | 93 | return {mean, var}; 94 | } 95 | 96 | /********** 97 | * forward 98 | **********/ 99 | 100 | __global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias, 101 | bool affine, float eps, int num, int chn, int sp) { 102 | int plane = blockIdx.x; 103 | 104 | const float _mean = mean[plane]; 105 | const float _var = var[plane]; 106 | const float _weight = affine ? abs(weight[plane]) + eps : 1.f; 107 | const float _bias = affine ? bias[plane] : 0.f; 108 | 109 | const float mul = rsqrt(_var + eps) * _weight; 110 | 111 | for (int batch = 0; batch < num; ++batch) { 112 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 113 | half *x_ptr = x + (batch * chn + plane) * sp + n; 114 | float _x = __half2float(*x_ptr); 115 | float _y = (_x - _mean) * mul + _bias; 116 | 117 | *x_ptr = __float2half(_y); 118 | } 119 | } 120 | } 121 | 122 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 123 | bool affine, float eps) { 124 | CHECK_CUDA_INPUT(x); 125 | CHECK_CUDA_INPUT(mean); 126 | CHECK_CUDA_INPUT(var); 127 | CHECK_CUDA_INPUT(weight); 128 | CHECK_CUDA_INPUT(bias); 129 | 130 | // Extract dimensions 131 | int64_t num, chn, sp; 132 | get_dims(x, num, chn, sp); 133 | 134 | // Run kernel 135 | dim3 blocks(chn); 136 | dim3 threads(getNumThreads(sp)); 137 | auto stream = at::cuda::getCurrentCUDAStream(); 138 | forward_kernel_h<<>>( 139 | reinterpret_cast(x.data()), 140 | mean.data(), 141 | var.data(), 142 | weight.data(), 143 | bias.data(), 144 | affine, eps, num, chn, sp); 145 | 146 | return x; 147 | } 148 | 149 | __global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias, 150 | float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) { 151 | int plane = blockIdx.x; 152 | 153 | float _weight = affine ? abs(weight[plane]) + eps : 1.f; 154 | float _bias = affine ? bias[plane] : 0.f; 155 | 156 | Pair res = reduce, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp); 157 | __syncthreads(); 158 | 159 | if (threadIdx.x == 0) { 160 | edz[plane] = res.v1; 161 | eydz[plane] = res.v2; 162 | } 163 | } 164 | 165 | std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 166 | bool affine, float eps) { 167 | CHECK_CUDA_INPUT(z); 168 | CHECK_CUDA_INPUT(dz); 169 | CHECK_CUDA_INPUT(weight); 170 | CHECK_CUDA_INPUT(bias); 171 | 172 | // Extract dimensions 173 | int64_t num, chn, sp; 174 | get_dims(z, num, chn, sp); 175 | 176 | auto edz = at::empty({chn},z.options().dtype(at::kFloat)); 177 | auto eydz = at::empty({chn},z.options().dtype(at::kFloat)); 178 | 179 | // Run kernel 180 | dim3 blocks(chn); 181 | dim3 threads(getNumThreads(sp)); 182 | auto stream = at::cuda::getCurrentCUDAStream(); 183 | edz_eydz_kernel_h<<>>( 184 | reinterpret_cast(z.data()), 185 | reinterpret_cast(dz.data()), 186 | weight.data(), 187 | bias.data(), 188 | edz.data(), 189 | eydz.data(), 190 | affine, eps, num, chn, sp); 191 | 192 | return {edz, eydz}; 193 | } 194 | 195 | __global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz, 196 | const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) { 197 | int plane = blockIdx.x; 198 | 199 | float _weight = affine ? abs(weight[plane]) + eps : 1.f; 200 | float _bias = affine ? bias[plane] : 0.f; 201 | float _var = var[plane]; 202 | float _edz = edz[plane]; 203 | float _eydz = eydz[plane]; 204 | 205 | float _mul = _weight * rsqrt(_var + eps); 206 | float count = float(num * sp); 207 | 208 | for (int batch = 0; batch < num; ++batch) { 209 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 210 | float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); 211 | float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight; 212 | 213 | dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul); 214 | } 215 | } 216 | } 217 | 218 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 219 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 220 | CHECK_CUDA_INPUT(z); 221 | CHECK_CUDA_INPUT(dz); 222 | CHECK_CUDA_INPUT(var); 223 | CHECK_CUDA_INPUT(weight); 224 | CHECK_CUDA_INPUT(bias); 225 | CHECK_CUDA_INPUT(edz); 226 | CHECK_CUDA_INPUT(eydz); 227 | 228 | // Extract dimensions 229 | int64_t num, chn, sp; 230 | get_dims(z, num, chn, sp); 231 | 232 | auto dx = at::zeros_like(z); 233 | 234 | // Run kernel 235 | dim3 blocks(chn); 236 | dim3 threads(getNumThreads(sp)); 237 | auto stream = at::cuda::getCurrentCUDAStream(); 238 | backward_kernel_h<<>>( 239 | reinterpret_cast(z.data()), 240 | reinterpret_cast(dz.data()), 241 | var.data(), 242 | weight.data(), 243 | bias.data(), 244 | edz.data(), 245 | eydz.data(), 246 | reinterpret_cast(dx.data()), 247 | affine, eps, num, chn, sp); 248 | 249 | return dx; 250 | } 251 | 252 | __global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) { 253 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){ 254 | float _z = __half2float(z[i]); 255 | if (_z < 0) { 256 | dz[i] = __float2half(__half2float(dz[i]) * slope); 257 | z[i] = __float2half(_z / slope); 258 | } 259 | } 260 | } 261 | 262 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) { 263 | CHECK_CUDA_INPUT(z); 264 | CHECK_CUDA_INPUT(dz); 265 | 266 | int64_t count = z.numel(); 267 | dim3 threads(getNumThreads(count)); 268 | dim3 blocks = (count + threads.x - 1) / threads.x; 269 | auto stream = at::cuda::getCurrentCUDAStream(); 270 | leaky_relu_backward_impl_h<<>>( 271 | reinterpret_cast(z.data()), 272 | reinterpret_cast(dz.data()), 273 | slope, count); 274 | } 275 | 276 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/src/utils/checks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT 6 | #ifndef AT_CHECK 7 | #define AT_CHECK AT_ASSERT 8 | #endif 9 | 10 | #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") 11 | #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") 12 | #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") 13 | 14 | #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 15 | #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/src/utils/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* 6 | * Functions to share code between CPU and GPU 7 | */ 8 | 9 | #ifdef __CUDACC__ 10 | // CUDA versions 11 | 12 | #define HOST_DEVICE __host__ __device__ 13 | #define INLINE_HOST_DEVICE __host__ __device__ inline 14 | #define FLOOR(x) floor(x) 15 | 16 | #if __CUDA_ARCH__ >= 600 17 | // Recent compute capabilities have block-level atomicAdd for all data types, so we use that 18 | #define ACCUM(x,y) atomicAdd_block(&(x),(y)) 19 | #else 20 | // Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float 21 | // and use the known atomicCAS-based implementation for double 22 | template 23 | __device__ inline data_t atomic_add(data_t *address, data_t val) { 24 | return atomicAdd(address, val); 25 | } 26 | 27 | template<> 28 | __device__ inline double atomic_add(double *address, double val) { 29 | unsigned long long int* address_as_ull = (unsigned long long int*)address; 30 | unsigned long long int old = *address_as_ull, assumed; 31 | do { 32 | assumed = old; 33 | old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); 34 | } while (assumed != old); 35 | return __longlong_as_double(old); 36 | } 37 | 38 | #define ACCUM(x,y) atomic_add(&(x),(y)) 39 | #endif // #if __CUDA_ARCH__ >= 600 40 | 41 | #else 42 | // CPU versions 43 | 44 | #define HOST_DEVICE 45 | #define INLINE_HOST_DEVICE inline 46 | #define FLOOR(x) std::floor(x) 47 | #define ACCUM(x,y) (x) += (y) 48 | 49 | #endif // #ifdef __CUDACC__ -------------------------------------------------------------------------------- /ShelfNet18_realtime/modules/src/utils/cuda.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * General settings and functions 5 | */ 6 | const int WARP_SIZE = 32; 7 | const int MAX_BLOCK_SIZE = 1024; 8 | 9 | static int getNumThreads(int nElem) { 10 | int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE}; 11 | for (int i = 0; i < 6; ++i) { 12 | if (nElem <= threadSizes[i]) { 13 | return threadSizes[i]; 14 | } 15 | } 16 | return MAX_BLOCK_SIZE; 17 | } 18 | 19 | /* 20 | * Reduction utilities 21 | */ 22 | template 23 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, 24 | unsigned int mask = 0xffffffff) { 25 | #if CUDART_VERSION >= 9000 26 | return __shfl_xor_sync(mask, value, laneMask, width); 27 | #else 28 | return __shfl_xor(value, laneMask, width); 29 | #endif 30 | } 31 | 32 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); } 33 | 34 | template 35 | struct Pair { 36 | T v1, v2; 37 | __device__ Pair() {} 38 | __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {} 39 | __device__ Pair(T v) : v1(v), v2(v) {} 40 | __device__ Pair(int v) : v1(v), v2(v) {} 41 | __device__ Pair &operator+=(const Pair &a) { 42 | v1 += a.v1; 43 | v2 += a.v2; 44 | return *this; 45 | } 46 | }; 47 | 48 | template 49 | static __device__ __forceinline__ T warpSum(T val) { 50 | #if __CUDA_ARCH__ >= 300 51 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) { 52 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); 53 | } 54 | #else 55 | __shared__ T values[MAX_BLOCK_SIZE]; 56 | values[threadIdx.x] = val; 57 | __threadfence_block(); 58 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; 59 | for (int i = 1; i < WARP_SIZE; i++) { 60 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; 61 | } 62 | #endif 63 | return val; 64 | } 65 | 66 | template 67 | static __device__ __forceinline__ Pair warpSum(Pair value) { 68 | value.v1 = warpSum(value.v1); 69 | value.v2 = warpSum(value.v2); 70 | return value; 71 | } -------------------------------------------------------------------------------- /ShelfNet18_realtime/optimizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import torch 6 | import logging 7 | 8 | logger = logging.getLogger() 9 | 10 | class Optimizer(object): 11 | def __init__(self, 12 | model, 13 | lr0, 14 | momentum, 15 | wd, 16 | warmup_steps, 17 | warmup_start_lr, 18 | max_iter, 19 | power, 20 | *args, **kwargs): 21 | self.warmup_steps = warmup_steps 22 | self.warmup_start_lr = warmup_start_lr 23 | self.lr0 = lr0 24 | self.lr = self.lr0 25 | self.max_iter = float(max_iter) 26 | self.power = power 27 | self.it = 0 28 | wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = model.get_params() 29 | param_list = [ 30 | {'params': wd_params}, 31 | {'params': nowd_params, 'weight_decay': 0}, 32 | {'params': lr_mul_wd_params, 'lr_mul': True}, 33 | {'params': lr_mul_nowd_params, 'weight_decay': 0, 'lr_mul': True}] 34 | self.optim = torch.optim.SGD( 35 | param_list, 36 | lr = lr0, 37 | momentum = momentum, 38 | weight_decay = wd) 39 | self.warmup_factor = (self.lr0/self.warmup_start_lr)**(1./self.warmup_steps) 40 | 41 | 42 | def get_lr(self): 43 | if self.it <= self.warmup_steps: 44 | lr = self.warmup_start_lr*(self.warmup_factor**self.it) 45 | else: 46 | factor = (1-(self.it-self.warmup_steps)/(self.max_iter-self.warmup_steps))**self.power 47 | lr = self.lr0 * factor 48 | return lr 49 | 50 | 51 | def step(self): 52 | self.lr = self.get_lr() 53 | for pg in self.optim.param_groups: 54 | if pg.get('lr_mul', False): 55 | pg['lr'] = self.lr * 10 56 | else: 57 | pg['lr'] = self.lr 58 | if self.optim.defaults.get('lr_mul', False): 59 | self.optim.defaults['lr'] = self.lr * 10 60 | else: 61 | self.optim.defaults['lr'] = self.lr 62 | self.it += 1 63 | self.optim.step() 64 | if self.it == self.warmup_steps+2: 65 | logger.info('==> warmup done, start to implement poly lr strategy') 66 | 67 | def zero_grad(self): 68 | self.optim.zero_grad() 69 | 70 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/resnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.utils.model_zoo as modelzoo 8 | 9 | from modules.bn import InPlaceABNSync as BatchNorm2d 10 | 11 | resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth' 12 | 13 | 14 | def conv3x3(in_planes, out_planes, stride=1): 15 | """3x3 convolution with padding""" 16 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 17 | padding=1, bias=False) 18 | 19 | 20 | class BasicBlock(nn.Module): 21 | def __init__(self, in_chan, out_chan, stride=1): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(in_chan, out_chan, stride) 24 | self.bn1 = BatchNorm2d(out_chan) 25 | self.conv2 = conv3x3(out_chan, out_chan) 26 | self.bn2 = BatchNorm2d(out_chan, activation='none') 27 | self.relu = nn.ReLU(inplace=True) 28 | self.downsample = None 29 | if in_chan != out_chan or stride != 1: 30 | self.downsample = nn.Sequential( 31 | nn.Conv2d(in_chan, out_chan, 32 | kernel_size=1, stride=stride, bias=False), 33 | BatchNorm2d(out_chan, activation='none'), 34 | ) 35 | 36 | def forward(self, x): 37 | residual = self.conv1(x) 38 | residual = self.bn1(residual) 39 | residual = self.conv2(residual) 40 | residual = self.bn2(residual) 41 | 42 | shortcut = x 43 | if self.downsample is not None: 44 | shortcut = self.downsample(x) 45 | 46 | out = shortcut + residual 47 | out = self.relu(out) 48 | return out 49 | 50 | 51 | def create_layer_basic(in_chan, out_chan, bnum, stride=1): 52 | layers = [BasicBlock(in_chan, out_chan, stride=stride)] 53 | for i in range(bnum-1): 54 | layers.append(BasicBlock(out_chan, out_chan, stride=1)) 55 | return nn.Sequential(*layers) 56 | 57 | 58 | class Resnet18(nn.Module): 59 | def __init__(self): 60 | super(Resnet18, self).__init__() 61 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 62 | bias=False) 63 | self.bn1 = BatchNorm2d(64) 64 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 65 | self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1) 66 | self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2) 67 | self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2) 68 | self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2) 69 | self.init_weight() 70 | 71 | def forward(self, x): 72 | x = self.conv1(x) 73 | x = self.bn1(x) 74 | x = self.maxpool(x) 75 | 76 | x = self.layer1(x) 77 | feat8 = self.layer2(x) # 1/8 78 | feat16 = self.layer3(feat8) # 1/16 79 | feat32 = self.layer4(feat16) # 1/32 80 | return feat8, feat16, feat32 81 | 82 | def init_weight(self): 83 | state_dict = modelzoo.load_url(resnet18_url) 84 | self_state_dict = self.state_dict() 85 | for k, v in state_dict.items(): 86 | if 'fc' in k: continue 87 | self_state_dict.update({k: v}) 88 | self.load_state_dict(self_state_dict) 89 | 90 | def get_params(self): 91 | wd_params, nowd_params = [], [] 92 | for name, module in self.named_modules(): 93 | if isinstance(module, (nn.Linear, nn.Conv2d)): 94 | wd_params.append(module.weight) 95 | if not module.bias is None: 96 | nowd_params.append(module.bias) 97 | elif isinstance(module, (BatchNorm2d, nn.BatchNorm2d)): 98 | nowd_params += list(module.parameters()) 99 | return wd_params, nowd_params 100 | 101 | 102 | if __name__ == "__main__": 103 | net = Resnet18() 104 | x = torch.randn(16, 3, 224, 224) 105 | out = net(x) 106 | print(out[0].size()) 107 | print(out[1].size()) 108 | print(out[2].size()) 109 | net.get_params() 110 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/shelfnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torchvision 5 | 6 | from resnet import Resnet18 7 | from modules.bn import InPlaceABNSync 8 | from ShelfBlock import Decoder, LadderBlock 9 | 10 | class ConvBNReLU(nn.Module): 11 | def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs): 12 | super(ConvBNReLU, self).__init__() 13 | self.conv = nn.Conv2d(in_chan, 14 | out_chan, 15 | kernel_size = ks, 16 | stride = stride, 17 | padding = padding, 18 | bias = False) 19 | self.bn = InPlaceABNSync(out_chan) 20 | self.init_weight() 21 | 22 | def forward(self, x): 23 | x = self.conv(x) 24 | x = self.bn(x) 25 | return x 26 | 27 | def init_weight(self): 28 | for ly in self.children(): 29 | if isinstance(ly, nn.Conv2d): 30 | nn.init.kaiming_normal_(ly.weight, a=1) 31 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 32 | 33 | def get_params(self): 34 | wd_params, nowd_params = [], [] 35 | for name, module in self.named_modules(): 36 | if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): 37 | wd_params.append(module.weight) 38 | if not module.bias is None: 39 | nowd_params.append(module.bias) 40 | elif isinstance(module, InPlaceABNSync) or isinstance(module, torch.nn.BatchNorm2d): 41 | nowd_params += list(module.parameters()) 42 | return wd_params, nowd_params 43 | 44 | 45 | class NetOutput(nn.Module): 46 | def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs): 47 | super(NetOutput, self).__init__() 48 | self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1) 49 | self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=3, bias=False, 50 | padding=1) 51 | self.init_weight() 52 | 53 | def forward(self, x): 54 | x = self.conv(x) 55 | x = self.conv_out(x) 56 | return x 57 | 58 | def init_weight(self): 59 | for ly in self.children(): 60 | if isinstance(ly, nn.Conv2d): 61 | nn.init.kaiming_normal_(ly.weight, a=1) 62 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 63 | 64 | def get_params(self): 65 | wd_params, nowd_params = [], [] 66 | for name, module in self.named_modules(): 67 | if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): 68 | wd_params.append(module.weight) 69 | if not module.bias is None: 70 | nowd_params.append(module.bias) 71 | elif isinstance(module, InPlaceABNSync) or isinstance(module, torch.nn.BatchNorm2d): 72 | nowd_params += list(module.parameters()) 73 | return wd_params, nowd_params 74 | 75 | 76 | class ShelfNet(nn.Module): 77 | def __init__(self, n_classes, *args, **kwargs): 78 | super(ShelfNet, self).__init__() 79 | self.backbone = Resnet18() 80 | 81 | self.decoder = Decoder(planes=64,layers=3,kernel=3) 82 | self.ladder = LadderBlock(planes=64,layers=3, kernel=3) 83 | 84 | self.conv_out = NetOutput(64, 64, n_classes) 85 | self.conv_out16 = NetOutput(128, 64, n_classes) 86 | self.conv_out32 = NetOutput(256, 64, n_classes) 87 | 88 | self.trans1 = ConvBNReLU(128,64,ks=1,stride=1,padding=0) 89 | self.trans2 = ConvBNReLU(256, 128, ks=1, stride=1, padding=0) 90 | self.trans3 = ConvBNReLU(512, 256, ks=1, stride=1, padding=0) 91 | def forward(self, x, aux = True): 92 | H, W = x.size()[2:] 93 | 94 | feat8, feat16, feat32 = self.backbone(x) 95 | 96 | feat8 = self.trans1(feat8) 97 | feat16 = self.trans2(feat16) 98 | feat32 = self.trans3(feat32) 99 | 100 | out = self.decoder([feat8, feat16, feat32]) 101 | 102 | out2 = self.ladder(out) 103 | 104 | feat_cp8, feat_cp16, feat_cp32 = out2[-1], out2[-2], out2[-3] 105 | 106 | feat_out = self.conv_out(feat_cp8) 107 | feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True) 108 | 109 | if aux: 110 | feat_out16 = self.conv_out16(feat_cp16) 111 | feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True) 112 | 113 | feat_out32 = self.conv_out32(feat_cp32) 114 | feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True) 115 | 116 | return feat_out, feat_out16, feat_out32 117 | else: 118 | return feat_out 119 | 120 | def get_params(self): 121 | wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], [] 122 | for name, child in self.named_children(): 123 | child_wd_params, child_nowd_params = child.get_params() 124 | if isinstance(child, LadderBlock) or isinstance(child, NetOutput) or isinstance(child, Decoder)\ 125 | or isinstance(child, ConvBNReLU): 126 | lr_mul_wd_params += child_wd_params 127 | lr_mul_nowd_params += child_nowd_params 128 | else: 129 | wd_params += child_wd_params 130 | nowd_params += child_nowd_params 131 | return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params 132 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/test_LWRF_speed.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from tqdm import tqdm 4 | from torch.nn import Parameter 5 | import torch 6 | from torch.utils import data 7 | import torchvision.transforms as transform 8 | from torch.nn.parallel.scatter_gather import gather 9 | import time 10 | import os 11 | from refinement_lightweight import rf_lw50, rf_lw101, rf_lw152 12 | # Use CUDA 13 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 14 | use_cuda = torch.cuda.is_available() 15 | 16 | from shelfnet import ShelfNet 17 | #from official_model_speed import BiSeNet 18 | #from model import BiSeNet 19 | def test(): 20 | model = rf_lw101(19) 21 | print(model) 22 | # count parameter number 23 | pytorch_total_params = sum(p.numel() for p in model.parameters()) 24 | print("Total number of parameters: %.3fM" % (pytorch_total_params/1e6)) 25 | 26 | model = model.cuda() 27 | model.eval() 28 | 29 | run_time = list() 30 | 31 | for i in range(0,100): 32 | input = torch.randn(1,3,512,512).cuda() 33 | # ensure that context initialization and normal_() operations 34 | # finish before you start measuring time 35 | torch.cuda.synchronize() 36 | torch.cuda.synchronize() 37 | start = time.perf_counter() 38 | 39 | with torch.no_grad(): 40 | output = model(input)#, aucx=False) 41 | #output = model(input , aux=False) 42 | 43 | torch.cuda.synchronize() # wait for mm to finish 44 | end = time.perf_counter() 45 | 46 | print(end-start) 47 | 48 | run_time.append(end-start) 49 | 50 | run_time.pop(0) 51 | 52 | print('Mean running time is ', np.mean(run_time)) 53 | 54 | if __name__ == "__main__": 55 | #args = Options().parse() 56 | 57 | test() 58 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/test_speed.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from tqdm import tqdm 4 | from torch.nn import Parameter 5 | import torch 6 | from torch.utils import data 7 | import torchvision.transforms as transform 8 | from torch.nn.parallel.scatter_gather import gather 9 | import time 10 | import os 11 | # Use CUDA 12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 13 | use_cuda = torch.cuda.is_available() 14 | 15 | from shelfnet import ShelfNet 16 | #from official_model_speed import BiSeNet 17 | #from model import BiSeNet 18 | def test(): 19 | model = ShelfNet(n_classes=19) 20 | 21 | # official implementation 22 | #model = BiSeNet(19,criterion=None,ohem_criterion=None,is_training=False) 23 | 24 | print(model) 25 | # count parameter number 26 | pytorch_total_params = sum(p.numel() for p in model.parameters()) 27 | print("Total number of parameters: %.3fM" % (pytorch_total_params/1e6)) 28 | 29 | model = model.cuda() 30 | model.eval() 31 | 32 | run_time = list() 33 | 34 | for i in range(0,100): 35 | input = torch.randn(1,3,1024,2048).cuda() 36 | # ensure that context initialization and normal_() operations 37 | # finish before you start measuring time 38 | torch.cuda.synchronize() 39 | torch.cuda.synchronize() 40 | start = time.perf_counter() 41 | 42 | with torch.no_grad(): 43 | #output = model(input)#, aucx=False) 44 | output = model(input , aux=False) 45 | 46 | torch.cuda.synchronize() # wait for mm to finish 47 | end = time.perf_counter() 48 | 49 | print(end-start) 50 | 51 | run_time.append(end-start) 52 | 53 | run_time.pop(0) 54 | 55 | print('Mean running time is ', np.mean(run_time)) 56 | 57 | if __name__ == "__main__": 58 | #args = Options().parse() 59 | 60 | test() 61 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import sys 5 | sys.path.insert(0,'./') 6 | from logger import setup_logger 7 | from cityscapes import CityScapes 8 | from loss import OhemCELoss 9 | from evaluate import evaluate 10 | from optimizer import Optimizer 11 | 12 | import torch 13 | import torch.nn as nn 14 | from torch.utils.data import DataLoader 15 | import torch.nn.functional as F 16 | import torch.distributed as dist 17 | 18 | import os 19 | import os.path as osp 20 | import logging 21 | import time 22 | import datetime 23 | import argparse 24 | from shelfnet import ShelfNet 25 | 26 | respth = './res' 27 | if not osp.exists(respth): os.makedirs(respth) 28 | logger = logging.getLogger() 29 | 30 | 31 | def parse_args(): 32 | parse = argparse.ArgumentParser() 33 | parse.add_argument( 34 | '--local_rank', 35 | dest = 'local_rank', 36 | type = int, 37 | default = -1, 38 | ) 39 | return parse.parse_args() 40 | 41 | 42 | def train(): 43 | args = parse_args() 44 | torch.cuda.set_device(args.local_rank) 45 | dist.init_process_group( 46 | backend = 'nccl', 47 | init_method = 'tcp://127.0.0.1:33241', 48 | world_size = torch.cuda.device_count(), 49 | rank=args.local_rank 50 | ) 51 | setup_logger(respth) 52 | 53 | ## dataset 54 | n_classes = 19 55 | n_img_per_gpu = 8 56 | n_workers = 4 57 | cropsize = [1024, 1024] 58 | ds = CityScapes('/data2/.encoding/data/cityscapes', cropsize=cropsize, mode='train') 59 | sampler = torch.utils.data.distributed.DistributedSampler(ds) 60 | dl = DataLoader(ds, 61 | batch_size = n_img_per_gpu, 62 | shuffle = False, 63 | sampler = sampler, 64 | num_workers = n_workers, 65 | pin_memory = True, 66 | drop_last = True) 67 | 68 | ## model 69 | ignore_idx = 255 70 | net = ShelfNet(n_classes=n_classes) 71 | net.cuda() 72 | net.train() 73 | net = nn.parallel.DistributedDataParallel(net, 74 | device_ids = [args.local_rank, ], 75 | output_device = args.local_rank, 76 | find_unused_parameters=True 77 | ) 78 | score_thres = 0.7 79 | n_min = n_img_per_gpu*cropsize[0]*cropsize[1]//16 80 | LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) 81 | Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) 82 | Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) 83 | 84 | ## optimizer 85 | momentum = 0.9 86 | weight_decay = 5e-4 87 | lr_start = 1e-2 88 | max_iter = 80000 89 | power = 0.9 90 | warmup_steps = 1000 91 | warmup_start_lr = 1e-5 92 | optim = Optimizer( 93 | model = net.module, 94 | lr0 = lr_start, 95 | momentum = momentum, 96 | wd = weight_decay, 97 | warmup_steps = warmup_steps, 98 | warmup_start_lr = warmup_start_lr, 99 | max_iter = max_iter, 100 | power = power) 101 | 102 | ## train loop 103 | msg_iter = 50 104 | loss_avg = [] 105 | st = glob_st = time.time() 106 | diter = iter(dl) 107 | epoch = 0 108 | for it in range(max_iter): 109 | try: 110 | im, lb = next(diter) 111 | if not im.size()[0]==n_img_per_gpu: raise StopIteration 112 | except StopIteration: 113 | epoch += 1 114 | sampler.set_epoch(epoch) 115 | diter = iter(dl) 116 | im, lb = next(diter) 117 | im = im.cuda() 118 | lb = lb.cuda() 119 | H, W = im.size()[2:] 120 | lb = torch.squeeze(lb, 1) 121 | 122 | optim.zero_grad() 123 | out, out16, out32 = net(im) 124 | lossp = LossP(out, lb) 125 | loss2 = Loss2(out16, lb) 126 | loss3 = Loss3(out32, lb) 127 | loss = lossp + loss2 + loss3 128 | loss.backward() 129 | optim.step() 130 | 131 | loss_avg.append(loss.item()) 132 | ## print training log message 133 | if (it+1)%msg_iter==0: 134 | loss_avg = sum(loss_avg) / len(loss_avg) 135 | lr = optim.lr 136 | ed = time.time() 137 | t_intv, glob_t_intv = ed - st, ed - glob_st 138 | eta = int((max_iter - it) * (glob_t_intv / it)) 139 | eta = str(datetime.timedelta(seconds=eta)) 140 | msg = ', '.join([ 141 | 'it: {it}/{max_it}', 142 | 'lr: {lr:4f}', 143 | 'loss: {loss:.4f}', 144 | 'eta: {eta}', 145 | 'time: {time:.4f}', 146 | ]).format( 147 | it = it+1, 148 | max_it = max_iter, 149 | lr = lr, 150 | loss = loss_avg, 151 | time = t_intv, 152 | eta = eta 153 | ) 154 | logger.info(msg) 155 | loss_avg = [] 156 | st = ed 157 | 158 | if it % 1000 == 0: 159 | ## dump the final model 160 | save_pth = osp.join(respth, 'shelfnet_model_it_%d.pth'%it) 161 | #net.cpu() 162 | #state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict() 163 | #if dist.get_rank() == 0: torch.save(state, save_pth) 164 | torch.save(net.module.state_dict(),save_pth) 165 | 166 | if it % 1000 == 0 and it > 0: 167 | evaluate(checkpoint=save_pth) 168 | 169 | ## dump the final model 170 | save_pth = osp.join(respth, 'model_final.pth') 171 | net.cpu() 172 | state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict() 173 | if dist.get_rank()==0: torch.save(state, save_pth) 174 | logger.info('training done, model saved to: {}'.format(save_pth)) 175 | 176 | 177 | if __name__ == "__main__": 178 | train() 179 | evaluate() 180 | -------------------------------------------------------------------------------- /ShelfNet18_realtime/transform.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | from PIL import Image 6 | import PIL.ImageEnhance as ImageEnhance 7 | import random 8 | 9 | 10 | class RandomCrop(object): 11 | def __init__(self, size, *args, **kwargs): 12 | self.size = size 13 | 14 | def __call__(self, im_lb): 15 | im = im_lb['im'] 16 | lb = im_lb['lb'] 17 | assert im.size == lb.size 18 | W, H = self.size 19 | w, h = im.size 20 | 21 | if (W, H) == (w, h): return dict(im=im, lb=lb) 22 | if w < W or h < H: 23 | scale = float(W) / w if w < h else float(H) / h 24 | w, h = int(scale * w + 1), int(scale * h + 1) 25 | im = im.resize((w, h), Image.BILINEAR) 26 | lb = lb.resize((w, h), Image.NEAREST) 27 | sw, sh = random.random() * (w - W), random.random() * (h - H) 28 | crop = int(sw), int(sh), int(sw) + W, int(sh) + H 29 | return dict( 30 | im = im.crop(crop), 31 | lb = lb.crop(crop) 32 | ) 33 | 34 | 35 | class HorizontalFlip(object): 36 | def __init__(self, p=0.5, *args, **kwargs): 37 | self.p = p 38 | 39 | def __call__(self, im_lb): 40 | if random.random() > self.p: 41 | return im_lb 42 | else: 43 | im = im_lb['im'] 44 | lb = im_lb['lb'] 45 | return dict(im = im.transpose(Image.FLIP_LEFT_RIGHT), 46 | lb = lb.transpose(Image.FLIP_LEFT_RIGHT), 47 | ) 48 | 49 | 50 | class RandomScale(object): 51 | def __init__(self, scales=(1, ), *args, **kwargs): 52 | self.scales = scales 53 | 54 | def __call__(self, im_lb): 55 | im = im_lb['im'] 56 | lb = im_lb['lb'] 57 | W, H = im.size 58 | scale = random.choice(self.scales) 59 | w, h = int(W * scale), int(H * scale) 60 | return dict(im = im.resize((w, h), Image.BILINEAR), 61 | lb = lb.resize((w, h), Image.NEAREST), 62 | ) 63 | 64 | 65 | class ColorJitter(object): 66 | def __init__(self, brightness=None, contrast=None, saturation=None, *args, **kwargs): 67 | if not brightness is None and brightness>0: 68 | self.brightness = [max(1-brightness, 0), 1+brightness] 69 | if not contrast is None and contrast>0: 70 | self.contrast = [max(1-contrast, 0), 1+contrast] 71 | if not saturation is None and saturation>0: 72 | self.saturation = [max(1-saturation, 0), 1+saturation] 73 | 74 | def __call__(self, im_lb): 75 | im = im_lb['im'] 76 | lb = im_lb['lb'] 77 | r_brightness = random.uniform(self.brightness[0], self.brightness[1]) 78 | r_contrast = random.uniform(self.contrast[0], self.contrast[1]) 79 | r_saturation = random.uniform(self.saturation[0], self.saturation[1]) 80 | im = ImageEnhance.Brightness(im).enhance(r_brightness) 81 | im = ImageEnhance.Contrast(im).enhance(r_contrast) 82 | im = ImageEnhance.Color(im).enhance(r_saturation) 83 | return dict(im = im, 84 | lb = lb, 85 | ) 86 | 87 | 88 | class MultiScale(object): 89 | def __init__(self, scales): 90 | self.scales = scales 91 | 92 | def __call__(self, img): 93 | W, H = img.size 94 | sizes = [(int(W*ratio), int(H*ratio)) for ratio in self.scales] 95 | imgs = [] 96 | [imgs.append(img.resize(size, Image.BILINEAR)) for size in sizes] 97 | return imgs 98 | 99 | 100 | class Compose(object): 101 | def __init__(self, do_list): 102 | self.do_list = do_list 103 | 104 | def __call__(self, im_lb): 105 | for comp in self.do_list: 106 | im_lb = comp(im_lb) 107 | return im_lb 108 | 109 | 110 | 111 | 112 | if __name__ == '__main__': 113 | flip = HorizontalFlip(p = 1) 114 | crop = RandomCrop((321, 321)) 115 | rscales = RandomScale((0.75, 1.0, 1.5, 1.75, 2.0)) 116 | img = Image.open('data/img.jpg') 117 | lb = Image.open('data/label.png') 118 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/__pycache__/ShelfBlock.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/__pycache__/ShelfBlock.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/__pycache__/cityscapes.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/__pycache__/cityscapes.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/__pycache__/evaluate.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/__pycache__/evaluate.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/__pycache__/loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/__pycache__/loss.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/__pycache__/optimizer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/__pycache__/optimizer.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/__pycache__/resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/__pycache__/resnet.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/__pycache__/shelfnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/__pycache__/shelfnet.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/__pycache__/transform.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/__pycache__/transform.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/cityscapes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import torch 6 | from torch.utils.data import Dataset 7 | import torchvision.transforms as transforms 8 | 9 | import os.path as osp 10 | import os 11 | from PIL import Image 12 | import numpy as np 13 | import json 14 | 15 | from transform import * 16 | 17 | 18 | 19 | class CityScapes(Dataset): 20 | def __init__(self, rootpth, cropsize=(640, 480), mode='train', label_scale=1.0, *args, **kwargs): 21 | super(CityScapes, self).__init__(*args, **kwargs) 22 | assert mode in ('train', 'val', 'test') 23 | self.mode = mode 24 | self.ignore_lb = 255 25 | self.label_scale = label_scale 26 | with open('./cityscapes_info.json', 'r') as fr: 27 | labels_info = json.load(fr) 28 | self.lb_map = {el['id']: el['trainId'] for el in labels_info} 29 | 30 | ## parse img directory 31 | self.imgs = {} 32 | imgnames = [] 33 | impth = osp.join(rootpth, 'leftImg8bit', mode) 34 | folders = os.listdir(impth) 35 | for fd in folders: 36 | fdpth = osp.join(impth, fd) 37 | im_names = os.listdir(fdpth) 38 | names = [el.replace('_leftImg8bit.png', '') for el in im_names] 39 | impths = [osp.join(fdpth, el) for el in im_names] 40 | imgnames.extend(names) 41 | self.imgs.update(dict(zip(names, impths))) 42 | 43 | ## parse gt directory 44 | self.labels = {} 45 | gtnames = [] 46 | if self.mode =='test': 47 | gtpth = osp.join(rootpth, mode) 48 | else: 49 | gtpth = osp.join(rootpth, 'gtFine', mode) 50 | folders = os.listdir(gtpth) 51 | for fd in folders: 52 | if fd == 'info.json': 53 | continue 54 | 55 | fdpth = osp.join(gtpth, fd) 56 | lbnames = os.listdir(fdpth) 57 | lbnames = [el for el in lbnames if 'labelIds' in el] 58 | names = [el.replace('_gtFine_labelIds.png', '') for el in lbnames] 59 | lbpths = [osp.join(fdpth, el) for el in lbnames] 60 | gtnames.extend(names) 61 | self.labels.update(dict(zip(names, lbpths))) 62 | 63 | self.imnames = imgnames 64 | self.len = len(self.imnames) 65 | if self.mode != 'test': 66 | assert set(imgnames) == set(gtnames) 67 | assert set(self.imnames) == set(self.imgs.keys()) 68 | assert set(self.imnames) == set(self.labels.keys()) 69 | 70 | ## pre-processing 71 | self.to_tensor = transforms.Compose([ 72 | transforms.ToTensor(), 73 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 74 | ]) 75 | self.trans_train = Compose([ 76 | ColorJitter( 77 | brightness = 0.5, 78 | contrast = 0.5, 79 | saturation = 0.5), 80 | HorizontalFlip(), 81 | RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)), 82 | RandomCrop(cropsize) 83 | ]) 84 | 85 | 86 | def __getitem__(self, idx): 87 | fn = self.imnames[idx] 88 | impth = self.imgs[fn] 89 | img = Image.open(impth) 90 | 91 | if self.mode != 'test': 92 | lbpth = self.labels[fn] 93 | label = Image.open(lbpth) 94 | 95 | if self.mode == 'train': 96 | im_lb = dict(im = img, lb = label) 97 | im_lb = self.trans_train(im_lb) 98 | img, label = im_lb['im'], im_lb['lb'] 99 | 100 | img = self.to_tensor(img) 101 | label = np.array(label).astype(np.int64)[np.newaxis, :] 102 | label = self.convert_labels(label) 103 | return img, label 104 | elif self.mode == 'val': 105 | img = self.to_tensor(img) 106 | 107 | if self.label_scale != 1.0: 108 | H,W = label.size 109 | label = label.resize((int(H*self.label_scale), int(W*self.label_scale)), 110 | Image.NEAREST) 111 | 112 | label = np.array(label).astype(np.int64)[np.newaxis, :] 113 | label = self.convert_labels(label) 114 | return img, label 115 | else: 116 | img = self.to_tensor(img) 117 | return img, impth 118 | 119 | 120 | 121 | def __len__(self): 122 | return self.len 123 | 124 | 125 | def convert_labels(self, label): 126 | for k, v in self.lb_map.items(): 127 | label[label == k] = v 128 | return label 129 | 130 | 131 | 132 | if __name__ == "__main__": 133 | from tqdm import tqdm 134 | ds = CityScapes('./data/', n_classes=19, mode='val') 135 | uni = [] 136 | for im, lb in tqdm(ds): 137 | lb_uni = np.unique(lb).tolist() 138 | uni.extend(lb_uni) 139 | print(uni) 140 | print(set(uni)) 141 | 142 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/cityscapes_info.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "hasInstances": false, 4 | "category": "void", 5 | "catid": 0, 6 | "name": "unlabeled", 7 | "ignoreInEval": true, 8 | "id": 0, 9 | "color": [ 10 | 0, 11 | 0, 12 | 0 13 | ], 14 | "trainId": 255 15 | }, 16 | { 17 | "hasInstances": false, 18 | "category": "void", 19 | "catid": 0, 20 | "name": "ego vehicle", 21 | "ignoreInEval": true, 22 | "id": 1, 23 | "color": [ 24 | 0, 25 | 0, 26 | 0 27 | ], 28 | "trainId": 255 29 | }, 30 | { 31 | "hasInstances": false, 32 | "category": "void", 33 | "catid": 0, 34 | "name": "rectification border", 35 | "ignoreInEval": true, 36 | "id": 2, 37 | "color": [ 38 | 0, 39 | 0, 40 | 0 41 | ], 42 | "trainId": 255 43 | }, 44 | { 45 | "hasInstances": false, 46 | "category": "void", 47 | "catid": 0, 48 | "name": "out of roi", 49 | "ignoreInEval": true, 50 | "id": 3, 51 | "color": [ 52 | 0, 53 | 0, 54 | 0 55 | ], 56 | "trainId": 255 57 | }, 58 | { 59 | "hasInstances": false, 60 | "category": "void", 61 | "catid": 0, 62 | "name": "static", 63 | "ignoreInEval": true, 64 | "id": 4, 65 | "color": [ 66 | 0, 67 | 0, 68 | 0 69 | ], 70 | "trainId": 255 71 | }, 72 | { 73 | "hasInstances": false, 74 | "category": "void", 75 | "catid": 0, 76 | "name": "dynamic", 77 | "ignoreInEval": true, 78 | "id": 5, 79 | "color": [ 80 | 111, 81 | 74, 82 | 0 83 | ], 84 | "trainId": 255 85 | }, 86 | { 87 | "hasInstances": false, 88 | "category": "void", 89 | "catid": 0, 90 | "name": "ground", 91 | "ignoreInEval": true, 92 | "id": 6, 93 | "color": [ 94 | 81, 95 | 0, 96 | 81 97 | ], 98 | "trainId": 255 99 | }, 100 | { 101 | "hasInstances": false, 102 | "category": "flat", 103 | "catid": 1, 104 | "name": "road", 105 | "ignoreInEval": false, 106 | "id": 7, 107 | "color": [ 108 | 128, 109 | 64, 110 | 128 111 | ], 112 | "trainId": 0 113 | }, 114 | { 115 | "hasInstances": false, 116 | "category": "flat", 117 | "catid": 1, 118 | "name": "sidewalk", 119 | "ignoreInEval": false, 120 | "id": 8, 121 | "color": [ 122 | 244, 123 | 35, 124 | 232 125 | ], 126 | "trainId": 1 127 | }, 128 | { 129 | "hasInstances": false, 130 | "category": "flat", 131 | "catid": 1, 132 | "name": "parking", 133 | "ignoreInEval": true, 134 | "id": 9, 135 | "color": [ 136 | 250, 137 | 170, 138 | 160 139 | ], 140 | "trainId": 255 141 | }, 142 | { 143 | "hasInstances": false, 144 | "category": "flat", 145 | "catid": 1, 146 | "name": "rail track", 147 | "ignoreInEval": true, 148 | "id": 10, 149 | "color": [ 150 | 230, 151 | 150, 152 | 140 153 | ], 154 | "trainId": 255 155 | }, 156 | { 157 | "hasInstances": false, 158 | "category": "construction", 159 | "catid": 2, 160 | "name": "building", 161 | "ignoreInEval": false, 162 | "id": 11, 163 | "color": [ 164 | 70, 165 | 70, 166 | 70 167 | ], 168 | "trainId": 2 169 | }, 170 | { 171 | "hasInstances": false, 172 | "category": "construction", 173 | "catid": 2, 174 | "name": "wall", 175 | "ignoreInEval": false, 176 | "id": 12, 177 | "color": [ 178 | 102, 179 | 102, 180 | 156 181 | ], 182 | "trainId": 3 183 | }, 184 | { 185 | "hasInstances": false, 186 | "category": "construction", 187 | "catid": 2, 188 | "name": "fence", 189 | "ignoreInEval": false, 190 | "id": 13, 191 | "color": [ 192 | 190, 193 | 153, 194 | 153 195 | ], 196 | "trainId": 4 197 | }, 198 | { 199 | "hasInstances": false, 200 | "category": "construction", 201 | "catid": 2, 202 | "name": "guard rail", 203 | "ignoreInEval": true, 204 | "id": 14, 205 | "color": [ 206 | 180, 207 | 165, 208 | 180 209 | ], 210 | "trainId": 255 211 | }, 212 | { 213 | "hasInstances": false, 214 | "category": "construction", 215 | "catid": 2, 216 | "name": "bridge", 217 | "ignoreInEval": true, 218 | "id": 15, 219 | "color": [ 220 | 150, 221 | 100, 222 | 100 223 | ], 224 | "trainId": 255 225 | }, 226 | { 227 | "hasInstances": false, 228 | "category": "construction", 229 | "catid": 2, 230 | "name": "tunnel", 231 | "ignoreInEval": true, 232 | "id": 16, 233 | "color": [ 234 | 150, 235 | 120, 236 | 90 237 | ], 238 | "trainId": 255 239 | }, 240 | { 241 | "hasInstances": false, 242 | "category": "object", 243 | "catid": 3, 244 | "name": "pole", 245 | "ignoreInEval": false, 246 | "id": 17, 247 | "color": [ 248 | 153, 249 | 153, 250 | 153 251 | ], 252 | "trainId": 5 253 | }, 254 | { 255 | "hasInstances": false, 256 | "category": "object", 257 | "catid": 3, 258 | "name": "polegroup", 259 | "ignoreInEval": true, 260 | "id": 18, 261 | "color": [ 262 | 153, 263 | 153, 264 | 153 265 | ], 266 | "trainId": 255 267 | }, 268 | { 269 | "hasInstances": false, 270 | "category": "object", 271 | "catid": 3, 272 | "name": "traffic light", 273 | "ignoreInEval": false, 274 | "id": 19, 275 | "color": [ 276 | 250, 277 | 170, 278 | 30 279 | ], 280 | "trainId": 6 281 | }, 282 | { 283 | "hasInstances": false, 284 | "category": "object", 285 | "catid": 3, 286 | "name": "traffic sign", 287 | "ignoreInEval": false, 288 | "id": 20, 289 | "color": [ 290 | 220, 291 | 220, 292 | 0 293 | ], 294 | "trainId": 7 295 | }, 296 | { 297 | "hasInstances": false, 298 | "category": "nature", 299 | "catid": 4, 300 | "name": "vegetation", 301 | "ignoreInEval": false, 302 | "id": 21, 303 | "color": [ 304 | 107, 305 | 142, 306 | 35 307 | ], 308 | "trainId": 8 309 | }, 310 | { 311 | "hasInstances": false, 312 | "category": "nature", 313 | "catid": 4, 314 | "name": "terrain", 315 | "ignoreInEval": false, 316 | "id": 22, 317 | "color": [ 318 | 152, 319 | 251, 320 | 152 321 | ], 322 | "trainId": 9 323 | }, 324 | { 325 | "hasInstances": false, 326 | "category": "sky", 327 | "catid": 5, 328 | "name": "sky", 329 | "ignoreInEval": false, 330 | "id": 23, 331 | "color": [ 332 | 70, 333 | 130, 334 | 180 335 | ], 336 | "trainId": 10 337 | }, 338 | { 339 | "hasInstances": true, 340 | "category": "human", 341 | "catid": 6, 342 | "name": "person", 343 | "ignoreInEval": false, 344 | "id": 24, 345 | "color": [ 346 | 220, 347 | 20, 348 | 60 349 | ], 350 | "trainId": 11 351 | }, 352 | { 353 | "hasInstances": true, 354 | "category": "human", 355 | "catid": 6, 356 | "name": "rider", 357 | "ignoreInEval": false, 358 | "id": 25, 359 | "color": [ 360 | 255, 361 | 0, 362 | 0 363 | ], 364 | "trainId": 12 365 | }, 366 | { 367 | "hasInstances": true, 368 | "category": "vehicle", 369 | "catid": 7, 370 | "name": "car", 371 | "ignoreInEval": false, 372 | "id": 26, 373 | "color": [ 374 | 0, 375 | 0, 376 | 142 377 | ], 378 | "trainId": 13 379 | }, 380 | { 381 | "hasInstances": true, 382 | "category": "vehicle", 383 | "catid": 7, 384 | "name": "truck", 385 | "ignoreInEval": false, 386 | "id": 27, 387 | "color": [ 388 | 0, 389 | 0, 390 | 70 391 | ], 392 | "trainId": 14 393 | }, 394 | { 395 | "hasInstances": true, 396 | "category": "vehicle", 397 | "catid": 7, 398 | "name": "bus", 399 | "ignoreInEval": false, 400 | "id": 28, 401 | "color": [ 402 | 0, 403 | 60, 404 | 100 405 | ], 406 | "trainId": 15 407 | }, 408 | { 409 | "hasInstances": true, 410 | "category": "vehicle", 411 | "catid": 7, 412 | "name": "caravan", 413 | "ignoreInEval": true, 414 | "id": 29, 415 | "color": [ 416 | 0, 417 | 0, 418 | 90 419 | ], 420 | "trainId": 255 421 | }, 422 | { 423 | "hasInstances": true, 424 | "category": "vehicle", 425 | "catid": 7, 426 | "name": "trailer", 427 | "ignoreInEval": true, 428 | "id": 30, 429 | "color": [ 430 | 0, 431 | 0, 432 | 110 433 | ], 434 | "trainId": 255 435 | }, 436 | { 437 | "hasInstances": true, 438 | "category": "vehicle", 439 | "catid": 7, 440 | "name": "train", 441 | "ignoreInEval": false, 442 | "id": 31, 443 | "color": [ 444 | 0, 445 | 80, 446 | 100 447 | ], 448 | "trainId": 16 449 | }, 450 | { 451 | "hasInstances": true, 452 | "category": "vehicle", 453 | "catid": 7, 454 | "name": "motorcycle", 455 | "ignoreInEval": false, 456 | "id": 32, 457 | "color": [ 458 | 0, 459 | 0, 460 | 230 461 | ], 462 | "trainId": 17 463 | }, 464 | { 465 | "hasInstances": true, 466 | "category": "vehicle", 467 | "catid": 7, 468 | "name": "bicycle", 469 | "ignoreInEval": false, 470 | "id": 33, 471 | "color": [ 472 | 119, 473 | 11, 474 | 32 475 | ], 476 | "trainId": 18 477 | }, 478 | { 479 | "hasInstances": false, 480 | "category": "vehicle", 481 | "catid": 7, 482 | "name": "license plate", 483 | "ignoreInEval": true, 484 | "id": -1, 485 | "color": [ 486 | 0, 487 | 0, 488 | 142 489 | ], 490 | "trainId": -1 491 | } 492 | ] -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/evaluate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | from logger import setup_logger 4 | from cityscapes import CityScapes 5 | 6 | import torch 7 | import torch.nn as nn 8 | from torch.utils.data import DataLoader 9 | import torch.nn.functional as F 10 | import torch.distributed as dist 11 | 12 | import os 13 | import os.path as osp 14 | import logging 15 | import time 16 | import numpy as np 17 | from tqdm import tqdm 18 | import math 19 | from shelfnet import ShelfNet 20 | 21 | 22 | class MscEval(object): 23 | def __init__(self, 24 | model, 25 | dataloader, 26 | scales = [ 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], 27 | n_classes = 19, 28 | lb_ignore = 255, 29 | cropsize = 1024, 30 | flip = True, 31 | *args, **kwargs): 32 | self.scales = scales 33 | self.n_classes = n_classes 34 | self.lb_ignore = lb_ignore 35 | self.flip = flip 36 | self.cropsize = cropsize 37 | ## dataloader 38 | self.dl = dataloader 39 | self.net = model 40 | 41 | 42 | def pad_tensor(self, inten, size): 43 | N, C, H, W = inten.size() 44 | outten = torch.zeros(N, C, size[0], size[1]).cuda() 45 | outten.requires_grad = False 46 | margin_h, margin_w = size[0]-H, size[1]-W 47 | hst, hed = margin_h//2, margin_h//2+H 48 | wst, wed = margin_w//2, margin_w//2+W 49 | outten[:, :, hst:hed, wst:wed] = inten 50 | return outten, [hst, hed, wst, wed] 51 | 52 | 53 | def eval_chip(self, crop): 54 | with torch.no_grad(): 55 | out = self.net(crop)[0] 56 | prob = F.softmax(out, 1) 57 | if self.flip: 58 | crop = torch.flip(crop, dims=(3,)) 59 | out = self.net(crop)[0] 60 | out = torch.flip(out, dims=(3,)) 61 | prob += F.softmax(out, 1) 62 | #prob = torch.exp(prob) 63 | return prob 64 | 65 | 66 | def crop_eval(self, im): 67 | cropsize = self.cropsize 68 | stride_rate = 5.0/6.0 69 | N, C, H, W = im.size() 70 | long_size, short_size = (H,W) if H>W else (W,H) 71 | if long_size < cropsize: 72 | im, indices = self.pad_tensor(im, (cropsize, cropsize)) 73 | prob = self.eval_chip(im) 74 | prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]] 75 | else: 76 | stride = math.ceil(cropsize*stride_rate) 77 | if short_size < cropsize: 78 | if H < W: 79 | im, indices = self.pad_tensor(im, (cropsize, W)) 80 | else: 81 | im, indices = self.pad_tensor(im, (H, cropsize)) 82 | N, C, H, W = im.size() 83 | n_x = math.ceil((W-cropsize)/stride)+1 84 | n_y = math.ceil((H-cropsize)/stride)+1 85 | prob = torch.zeros(N, self.n_classes, H, W).cuda() 86 | prob.requires_grad = False 87 | for iy in range(n_y): 88 | for ix in range(n_x): 89 | hed, wed = min(H, stride*iy+cropsize), min(W, stride*ix+cropsize) 90 | hst, wst = hed-cropsize, wed-cropsize 91 | chip = im[:, :, hst:hed, wst:wed] 92 | prob_chip = self.eval_chip(chip) 93 | prob[:, :, hst:hed, wst:wed] += prob_chip 94 | if short_size < cropsize: 95 | prob = prob[:, :, indices[0]:indices[1], indices[2]:indices[3]] 96 | return prob 97 | 98 | 99 | def scale_crop_eval(self, im, scale): 100 | N, C, H, W = im.size() 101 | new_hw = [int(H*scale), int(W*scale)] 102 | im = F.interpolate(im, new_hw, mode='bilinear', align_corners=True) 103 | prob = self.crop_eval(im) 104 | prob = F.interpolate(prob, (H, W), mode='bilinear', align_corners=True) 105 | return prob 106 | 107 | 108 | def compute_hist(self, pred, lb): 109 | n_classes = self.n_classes 110 | ignore_idx = self.lb_ignore 111 | keep = np.logical_not(lb==ignore_idx) 112 | merge = pred[keep] * n_classes + lb[keep] 113 | hist = np.bincount(merge, minlength=n_classes**2) 114 | hist = hist.reshape((n_classes, n_classes)) 115 | return hist 116 | 117 | 118 | def evaluate(self): 119 | ## evaluate 120 | n_classes = self.n_classes 121 | hist = np.zeros((n_classes, n_classes), dtype=np.float32) 122 | dloader = tqdm(self.dl) 123 | if dist.is_initialized() and not dist.get_rank()==0: 124 | dloader = self.dl 125 | for i, (imgs, label) in enumerate(dloader): 126 | N, _, H, W = label.shape 127 | probs = torch.zeros((N, self.n_classes, H, W)) 128 | probs.requires_grad = False 129 | imgs = imgs.cuda() 130 | for sc in self.scales: 131 | prob = self.scale_crop_eval(imgs, sc) 132 | probs += prob.detach().cpu() 133 | probs = probs.data.numpy() 134 | preds = np.argmax(probs, axis=1) 135 | 136 | hist_once = self.compute_hist(preds, label.data.numpy().squeeze(1)) 137 | hist = hist + hist_once 138 | IOUs = np.diag(hist) / (np.sum(hist, axis=0)+np.sum(hist, axis=1)-np.diag(hist)) 139 | mIOU = np.mean(IOUs) 140 | return mIOU 141 | 142 | 143 | def evaluate(respth='./res', dspth='/data2/.encoding/data/cityscapes', checkpoint=None): 144 | ## logger 145 | logger = logging.getLogger() 146 | 147 | ## model 148 | logger.info('\n') 149 | logger.info('===='*20) 150 | logger.info('evaluating the model ...\n') 151 | logger.info('setup and restore model') 152 | n_classes = 19 153 | net = ShelfNet(n_classes=n_classes) 154 | 155 | if checkpoint is None: 156 | save_pth = osp.join(respth, 'model_final.pth') 157 | else: 158 | save_pth = checkpoint 159 | 160 | net.load_state_dict(torch.load(save_pth)) 161 | net.cuda() 162 | net.eval() 163 | 164 | ## dataset 165 | batchsize = 5 166 | n_workers = 2 167 | dsval = CityScapes(dspth, mode='val') 168 | dl = DataLoader(dsval, 169 | batch_size = batchsize, 170 | shuffle = False, 171 | num_workers = n_workers, 172 | drop_last = False) 173 | 174 | ## evaluator 175 | logger.info('compute the mIOU') 176 | evaluator = MscEval(net, dl) 177 | 178 | mIOU = evaluator.evaluate() 179 | logger.info('mIOU is: {:.6f}'.format(mIOU)) 180 | 181 | 182 | 183 | if __name__ == "__main__": 184 | setup_logger('./res') 185 | evaluate() 186 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import os.path as osp 6 | import time 7 | import sys 8 | import logging 9 | 10 | import torch.distributed as dist 11 | 12 | 13 | def setup_logger(logpth): 14 | logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S')) 15 | logfile = osp.join(logpth, logfile) 16 | FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s' 17 | log_level = logging.INFO 18 | if dist.is_initialized() and not dist.get_rank()==0: 19 | log_level = logging.ERROR 20 | logging.basicConfig(level=log_level, format=FORMAT, filename=logfile) 21 | logging.root.addHandler(logging.StreamHandler()) 22 | 23 | 24 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | import numpy as np 10 | 11 | 12 | class OhemCELoss(nn.Module): 13 | def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs): 14 | super(OhemCELoss, self).__init__() 15 | self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda() 16 | self.n_min = n_min 17 | self.ignore_lb = ignore_lb 18 | self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none') 19 | 20 | def forward(self, logits, labels): 21 | N, C, H, W = logits.size() 22 | loss = self.criteria(logits, labels).view(-1) 23 | loss, _ = torch.sort(loss, descending=True) 24 | if loss[self.n_min] > self.thresh: 25 | loss = loss[loss>self.thresh] 26 | else: 27 | loss = loss[:self.n_min] 28 | return torch.mean(loss) 29 | 30 | 31 | class SoftmaxFocalLoss(nn.Module): 32 | def __init__(self, gamma, ignore_lb=255, *args, **kwargs): 33 | super(FocalLoss, self).__init__() 34 | self.gamma = gamma 35 | self.nll = nn.NLLLoss(ignore_index=ignore_lb) 36 | 37 | def forward(self, logits, labels): 38 | scores = F.softmax(logits, dim=1) 39 | factor = torch.pow(1.-scores, self.gamma) 40 | log_score = F.log_softmax(logits, dim=1) 41 | log_score = factor * log_score 42 | loss = self.nll(log_score, labels) 43 | return loss 44 | 45 | 46 | if __name__ == '__main__': 47 | torch.manual_seed(15) 48 | criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda() 49 | criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda() 50 | net1 = nn.Sequential( 51 | nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), 52 | ) 53 | net1.cuda() 54 | net1.train() 55 | net2 = nn.Sequential( 56 | nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1), 57 | ) 58 | net2.cuda() 59 | net2.train() 60 | 61 | with torch.no_grad(): 62 | inten = torch.randn(16, 3, 20, 20).cuda() 63 | lbs = torch.randint(0, 19, [16, 20, 20]).cuda() 64 | lbs[1, :, :] = 255 65 | 66 | logits1 = net1(inten) 67 | logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear') 68 | logits2 = net2(inten) 69 | logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear') 70 | 71 | loss1 = criteria1(logits1, lbs) 72 | loss2 = criteria2(logits2, lbs) 73 | loss = loss1 + loss2 74 | print(loss.detach().cpu()) 75 | loss.backward() 76 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .bn import ABN, InPlaceABN, InPlaceABNSync 2 | from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE 3 | from .misc import GlobalAvgPool2d, SingleGPU 4 | from .residual import IdentityResidualBlock 5 | from .dense import DenseModule 6 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/modules/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/__pycache__/bn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/modules/__pycache__/bn.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/__pycache__/dense.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/modules/__pycache__/dense.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/__pycache__/functions.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/modules/__pycache__/functions.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/__pycache__/misc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/modules/__pycache__/misc.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/__pycache__/residual.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/ShelfNet34_non_realtime/modules/__pycache__/residual.cpython-37.pyc -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/bn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as functional 4 | 5 | try: 6 | from queue import Queue 7 | except ImportError: 8 | from Queue import Queue 9 | 10 | from .functions import * 11 | 12 | 13 | class ABN(nn.Module): 14 | """Activated Batch Normalization 15 | 16 | This gathers a `BatchNorm2d` and an activation function in a single module 17 | """ 18 | 19 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 20 | """Creates an Activated Batch Normalization module 21 | 22 | Parameters 23 | ---------- 24 | num_features : int 25 | Number of feature channels in the input and output. 26 | eps : float 27 | Small constant to prevent numerical issues. 28 | momentum : float 29 | Momentum factor applied to compute running statistics as. 30 | affine : bool 31 | If `True` apply learned scale and shift transformation after normalization. 32 | activation : str 33 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 34 | slope : float 35 | Negative slope for the `leaky_relu` activation. 36 | """ 37 | super(ABN, self).__init__() 38 | self.num_features = num_features 39 | self.affine = affine 40 | self.eps = eps 41 | self.momentum = momentum 42 | self.activation = activation 43 | self.slope = slope 44 | if self.affine: 45 | self.weight = nn.Parameter(torch.ones(num_features)) 46 | self.bias = nn.Parameter(torch.zeros(num_features)) 47 | else: 48 | self.register_parameter('weight', None) 49 | self.register_parameter('bias', None) 50 | self.register_buffer('running_mean', torch.zeros(num_features)) 51 | self.register_buffer('running_var', torch.ones(num_features)) 52 | self.reset_parameters() 53 | 54 | def reset_parameters(self): 55 | nn.init.constant_(self.running_mean, 0) 56 | nn.init.constant_(self.running_var, 1) 57 | if self.affine: 58 | nn.init.constant_(self.weight, 1) 59 | nn.init.constant_(self.bias, 0) 60 | 61 | def forward(self, x): 62 | x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, 63 | self.training, self.momentum, self.eps) 64 | 65 | if self.activation == ACT_RELU: 66 | return functional.relu(x, inplace=True) 67 | elif self.activation == ACT_LEAKY_RELU: 68 | return functional.leaky_relu(x, negative_slope=self.slope, inplace=True) 69 | elif self.activation == ACT_ELU: 70 | return functional.elu(x, inplace=True) 71 | else: 72 | return x 73 | 74 | def __repr__(self): 75 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 76 | ' affine={affine}, activation={activation}' 77 | if self.activation == "leaky_relu": 78 | rep += ', slope={slope})' 79 | else: 80 | rep += ')' 81 | return rep.format(name=self.__class__.__name__, **self.__dict__) 82 | 83 | 84 | class InPlaceABN(ABN): 85 | """InPlace Activated Batch Normalization""" 86 | 87 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): 88 | """Creates an InPlace Activated Batch Normalization module 89 | 90 | Parameters 91 | ---------- 92 | num_features : int 93 | Number of feature channels in the input and output. 94 | eps : float 95 | Small constant to prevent numerical issues. 96 | momentum : float 97 | Momentum factor applied to compute running statistics as. 98 | affine : bool 99 | If `True` apply learned scale and shift transformation after normalization. 100 | activation : str 101 | Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. 102 | slope : float 103 | Negative slope for the `leaky_relu` activation. 104 | """ 105 | super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope) 106 | 107 | def forward(self, x): 108 | return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var, 109 | self.training, self.momentum, self.eps, self.activation, self.slope) 110 | 111 | 112 | class InPlaceABNSync(ABN): 113 | """InPlace Activated Batch Normalization with cross-GPU synchronization 114 | This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`. 115 | """ 116 | 117 | def forward(self, x): 118 | return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var, 119 | self.training, self.momentum, self.eps, self.activation, self.slope) 120 | 121 | def __repr__(self): 122 | rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ 123 | ' affine={affine}, activation={activation}' 124 | if self.activation == "leaky_relu": 125 | rep += ', slope={slope})' 126 | else: 127 | rep += ')' 128 | return rep.format(name=self.__class__.__name__, **self.__dict__) 129 | 130 | 131 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/deeplab.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as functional 4 | 5 | from models._util import try_index 6 | from .bn import ABN 7 | 8 | 9 | class DeeplabV3(nn.Module): 10 | def __init__(self, 11 | in_channels, 12 | out_channels, 13 | hidden_channels=256, 14 | dilations=(12, 24, 36), 15 | norm_act=ABN, 16 | pooling_size=None): 17 | super(DeeplabV3, self).__init__() 18 | self.pooling_size = pooling_size 19 | 20 | self.map_convs = nn.ModuleList([ 21 | nn.Conv2d(in_channels, hidden_channels, 1, bias=False), 22 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]), 23 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]), 24 | nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2]) 25 | ]) 26 | self.map_bn = norm_act(hidden_channels * 4) 27 | 28 | self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False) 29 | self.global_pooling_bn = norm_act(hidden_channels) 30 | 31 | self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False) 32 | self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False) 33 | self.red_bn = norm_act(out_channels) 34 | 35 | self.reset_parameters(self.map_bn.activation, self.map_bn.slope) 36 | 37 | def reset_parameters(self, activation, slope): 38 | gain = nn.init.calculate_gain(activation, slope) 39 | for m in self.modules(): 40 | if isinstance(m, nn.Conv2d): 41 | nn.init.xavier_normal_(m.weight.data, gain) 42 | if hasattr(m, "bias") and m.bias is not None: 43 | nn.init.constant_(m.bias, 0) 44 | elif isinstance(m, ABN): 45 | if hasattr(m, "weight") and m.weight is not None: 46 | nn.init.constant_(m.weight, 1) 47 | if hasattr(m, "bias") and m.bias is not None: 48 | nn.init.constant_(m.bias, 0) 49 | 50 | def forward(self, x): 51 | # Map convolutions 52 | out = torch.cat([m(x) for m in self.map_convs], dim=1) 53 | out = self.map_bn(out) 54 | out = self.red_conv(out) 55 | 56 | # Global pooling 57 | pool = self._global_pooling(x) 58 | pool = self.global_pooling_conv(pool) 59 | pool = self.global_pooling_bn(pool) 60 | pool = self.pool_red_conv(pool) 61 | if self.training or self.pooling_size is None: 62 | pool = pool.repeat(1, 1, x.size(2), x.size(3)) 63 | 64 | out += pool 65 | out = self.red_bn(out) 66 | return out 67 | 68 | def _global_pooling(self, x): 69 | if self.training or self.pooling_size is None: 70 | pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1) 71 | pool = pool.view(x.size(0), x.size(1), 1, 1) 72 | else: 73 | pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]), 74 | min(try_index(self.pooling_size, 1), x.shape[3])) 75 | padding = ( 76 | (pooling_size[1] - 1) // 2, 77 | (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1, 78 | (pooling_size[0] - 1) // 2, 79 | (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1 80 | ) 81 | 82 | pool = functional.avg_pool2d(x, pooling_size, stride=1) 83 | pool = functional.pad(pool, pad=padding, mode="replicate") 84 | return pool 85 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/dense.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .bn import ABN 7 | 8 | 9 | class DenseModule(nn.Module): 10 | def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1): 11 | super(DenseModule, self).__init__() 12 | self.in_channels = in_channels 13 | self.growth = growth 14 | self.layers = layers 15 | 16 | self.convs1 = nn.ModuleList() 17 | self.convs3 = nn.ModuleList() 18 | for i in range(self.layers): 19 | self.convs1.append(nn.Sequential(OrderedDict([ 20 | ("bn", norm_act(in_channels)), 21 | ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False)) 22 | ]))) 23 | self.convs3.append(nn.Sequential(OrderedDict([ 24 | ("bn", norm_act(self.growth * bottleneck_factor)), 25 | ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False, 26 | dilation=dilation)) 27 | ]))) 28 | in_channels += self.growth 29 | 30 | @property 31 | def out_channels(self): 32 | return self.in_channels + self.growth * self.layers 33 | 34 | def forward(self, x): 35 | inputs = [x] 36 | for i in range(self.layers): 37 | x = torch.cat(inputs, dim=1) 38 | x = self.convs1[i](x) 39 | x = self.convs3[i](x) 40 | inputs += [x] 41 | 42 | return torch.cat(inputs, dim=1) 43 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/functions.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | import torch 3 | import torch.distributed as dist 4 | import torch.autograd as autograd 5 | import torch.cuda.comm as comm 6 | from torch.autograd.function import once_differentiable 7 | from torch.utils.cpp_extension import load 8 | 9 | _src_path = path.join(path.dirname(path.abspath(__file__)), "src") 10 | _backend = load(name="inplace_abn", 11 | extra_cflags=["-O3"], 12 | sources=[path.join(_src_path, f) for f in [ 13 | "inplace_abn.cpp", 14 | "inplace_abn_cpu.cpp", 15 | "inplace_abn_cuda.cu", 16 | "inplace_abn_cuda_half.cu" 17 | ]], 18 | extra_cuda_cflags=["--expt-extended-lambda"]) 19 | 20 | # Activation names 21 | ACT_RELU = "relu" 22 | ACT_LEAKY_RELU = "leaky_relu" 23 | ACT_ELU = "elu" 24 | ACT_NONE = "none" 25 | 26 | 27 | def _check(fn, *args, **kwargs): 28 | success = fn(*args, **kwargs) 29 | if not success: 30 | raise RuntimeError("CUDA Error encountered in {}".format(fn)) 31 | 32 | 33 | def _broadcast_shape(x): 34 | out_size = [] 35 | for i, s in enumerate(x.size()): 36 | if i != 1: 37 | out_size.append(1) 38 | else: 39 | out_size.append(s) 40 | return out_size 41 | 42 | 43 | def _reduce(x): 44 | if len(x.size()) == 2: 45 | return x.sum(dim=0) 46 | else: 47 | n, c = x.size()[0:2] 48 | return x.contiguous().view((n, c, -1)).sum(2).sum(0) 49 | 50 | 51 | def _count_samples(x): 52 | count = 1 53 | for i, s in enumerate(x.size()): 54 | if i != 1: 55 | count *= s 56 | return count 57 | 58 | 59 | def _act_forward(ctx, x): 60 | if ctx.activation == ACT_LEAKY_RELU: 61 | _backend.leaky_relu_forward(x, ctx.slope) 62 | elif ctx.activation == ACT_ELU: 63 | _backend.elu_forward(x) 64 | elif ctx.activation == ACT_NONE: 65 | pass 66 | 67 | 68 | def _act_backward(ctx, x, dx): 69 | if ctx.activation == ACT_LEAKY_RELU: 70 | _backend.leaky_relu_backward(x, dx, ctx.slope) 71 | elif ctx.activation == ACT_ELU: 72 | _backend.elu_backward(x, dx) 73 | elif ctx.activation == ACT_NONE: 74 | pass 75 | 76 | 77 | class InPlaceABN(autograd.Function): 78 | @staticmethod 79 | def forward(ctx, x, weight, bias, running_mean, running_var, 80 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): 81 | # Save context 82 | ctx.training = training 83 | ctx.momentum = momentum 84 | ctx.eps = eps 85 | ctx.activation = activation 86 | ctx.slope = slope 87 | ctx.affine = weight is not None and bias is not None 88 | 89 | # Prepare inputs 90 | count = _count_samples(x) 91 | x = x.contiguous() 92 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 93 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 94 | 95 | if ctx.training: 96 | mean, var = _backend.mean_var(x) 97 | 98 | # Update running stats 99 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 100 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1)) 101 | 102 | # Mark in-place modified tensors 103 | ctx.mark_dirty(x, running_mean, running_var) 104 | else: 105 | mean, var = running_mean.contiguous(), running_var.contiguous() 106 | ctx.mark_dirty(x) 107 | 108 | # BN forward + activation 109 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 110 | _act_forward(ctx, x) 111 | 112 | # Output 113 | ctx.var = var 114 | ctx.save_for_backward(x, var, weight, bias) 115 | return x 116 | 117 | @staticmethod 118 | @once_differentiable 119 | def backward(ctx, dz): 120 | z, var, weight, bias = ctx.saved_tensors 121 | dz = dz.contiguous() 122 | 123 | # Undo activation 124 | _act_backward(ctx, z, dz) 125 | 126 | if ctx.training: 127 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 128 | else: 129 | # TODO: implement simplified CUDA backward for inference mode 130 | edz = dz.new_zeros(dz.size(1)) 131 | eydz = dz.new_zeros(dz.size(1)) 132 | 133 | dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 134 | dweight = eydz * weight.sign() if ctx.affine else None 135 | dbias = edz if ctx.affine else None 136 | 137 | return dx, dweight, dbias, None, None, None, None, None, None, None 138 | 139 | class InPlaceABNSync(autograd.Function): 140 | @classmethod 141 | def forward(cls, ctx, x, weight, bias, running_mean, running_var, 142 | training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True): 143 | # Save context 144 | ctx.training = training 145 | ctx.momentum = momentum 146 | ctx.eps = eps 147 | ctx.activation = activation 148 | ctx.slope = slope 149 | ctx.affine = weight is not None and bias is not None 150 | 151 | # Prepare inputs 152 | ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1 153 | 154 | #count = _count_samples(x) 155 | batch_size = x.new_tensor([x.shape[0]],dtype=torch.long) 156 | 157 | x = x.contiguous() 158 | weight = weight.contiguous() if ctx.affine else x.new_empty(0) 159 | bias = bias.contiguous() if ctx.affine else x.new_empty(0) 160 | 161 | if ctx.training: 162 | mean, var = _backend.mean_var(x) 163 | if ctx.world_size>1: 164 | # get global batch size 165 | if equal_batches: 166 | batch_size *= ctx.world_size 167 | else: 168 | dist.all_reduce(batch_size, dist.ReduceOp.SUM) 169 | 170 | ctx.factor = x.shape[0]/float(batch_size.item()) 171 | 172 | mean_all = mean.clone() * ctx.factor 173 | dist.all_reduce(mean_all, dist.ReduceOp.SUM) 174 | 175 | var_all = (var + (mean - mean_all) ** 2) * ctx.factor 176 | dist.all_reduce(var_all, dist.ReduceOp.SUM) 177 | 178 | mean = mean_all 179 | var = var_all 180 | 181 | # Update running stats 182 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) 183 | count = batch_size.item() * x.view(x.shape[0],x.shape[1],-1).shape[-1] 184 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1))) 185 | 186 | # Mark in-place modified tensors 187 | ctx.mark_dirty(x, running_mean, running_var) 188 | else: 189 | mean, var = running_mean.contiguous(), running_var.contiguous() 190 | ctx.mark_dirty(x) 191 | 192 | # BN forward + activation 193 | _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) 194 | _act_forward(ctx, x) 195 | 196 | # Output 197 | ctx.var = var 198 | ctx.save_for_backward(x, var, weight, bias) 199 | return x 200 | 201 | @staticmethod 202 | @once_differentiable 203 | def backward(ctx, dz): 204 | z, var, weight, bias = ctx.saved_tensors 205 | dz = dz.contiguous() 206 | 207 | # Undo activation 208 | _act_backward(ctx, z, dz) 209 | 210 | if ctx.training: 211 | edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) 212 | edz_local = edz.clone() 213 | eydz_local = eydz.clone() 214 | 215 | if ctx.world_size>1: 216 | edz *= ctx.factor 217 | dist.all_reduce(edz, dist.ReduceOp.SUM) 218 | 219 | eydz *= ctx.factor 220 | dist.all_reduce(eydz, dist.ReduceOp.SUM) 221 | else: 222 | edz_local = edz = dz.new_zeros(dz.size(1)) 223 | eydz_local = eydz = dz.new_zeros(dz.size(1)) 224 | 225 | dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) 226 | dweight = eydz_local * weight.sign() if ctx.affine else None 227 | dbias = edz_local if ctx.affine else None 228 | 229 | return dx, dweight, dbias, None, None, None, None, None, None, None 230 | 231 | inplace_abn = InPlaceABN.apply 232 | inplace_abn_sync = InPlaceABNSync.apply 233 | 234 | __all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"] 235 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/misc.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.distributed as dist 4 | 5 | class GlobalAvgPool2d(nn.Module): 6 | def __init__(self): 7 | """Global average pooling over the input's spatial dimensions""" 8 | super(GlobalAvgPool2d, self).__init__() 9 | 10 | def forward(self, inputs): 11 | in_size = inputs.size() 12 | return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2) 13 | 14 | class SingleGPU(nn.Module): 15 | def __init__(self, module): 16 | super(SingleGPU, self).__init__() 17 | self.module=module 18 | 19 | def forward(self, input): 20 | return self.module(input.cuda(non_blocking=True)) 21 | 22 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/residual.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.nn as nn 4 | 5 | from .bn import ABN 6 | 7 | 8 | class IdentityResidualBlock(nn.Module): 9 | def __init__(self, 10 | in_channels, 11 | channels, 12 | stride=1, 13 | dilation=1, 14 | groups=1, 15 | norm_act=ABN, 16 | dropout=None): 17 | """Configurable identity-mapping residual block 18 | 19 | Parameters 20 | ---------- 21 | in_channels : int 22 | Number of input channels. 23 | channels : list of int 24 | Number of channels in the internal feature maps. Can either have two or three elements: if three construct 25 | a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then 26 | `3 x 3` then `1 x 1` convolutions. 27 | stride : int 28 | Stride of the first `3 x 3` convolution 29 | dilation : int 30 | Dilation to apply to the `3 x 3` convolutions. 31 | groups : int 32 | Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with 33 | bottleneck blocks. 34 | norm_act : callable 35 | Function to create normalization / activation Module. 36 | dropout: callable 37 | Function to create Dropout Module. 38 | """ 39 | super(IdentityResidualBlock, self).__init__() 40 | 41 | # Check parameters for inconsistencies 42 | if len(channels) != 2 and len(channels) != 3: 43 | raise ValueError("channels must contain either two or three values") 44 | if len(channels) == 2 and groups != 1: 45 | raise ValueError("groups > 1 are only valid if len(channels) == 3") 46 | 47 | is_bottleneck = len(channels) == 3 48 | need_proj_conv = stride != 1 or in_channels != channels[-1] 49 | 50 | self.bn1 = norm_act(in_channels) 51 | if not is_bottleneck: 52 | layers = [ 53 | ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False, 54 | dilation=dilation)), 55 | ("bn2", norm_act(channels[0])), 56 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 57 | dilation=dilation)) 58 | ] 59 | if dropout is not None: 60 | layers = layers[0:2] + [("dropout", dropout())] + layers[2:] 61 | else: 62 | layers = [ 63 | ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)), 64 | ("bn2", norm_act(channels[0])), 65 | ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, 66 | groups=groups, dilation=dilation)), 67 | ("bn3", norm_act(channels[1])), 68 | ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)) 69 | ] 70 | if dropout is not None: 71 | layers = layers[0:4] + [("dropout", dropout())] + layers[4:] 72 | self.convs = nn.Sequential(OrderedDict(layers)) 73 | 74 | if need_proj_conv: 75 | self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False) 76 | 77 | def forward(self, x): 78 | if hasattr(self, "proj_conv"): 79 | bn1 = self.bn1(x) 80 | shortcut = self.proj_conv(bn1) 81 | else: 82 | shortcut = x.clone() 83 | bn1 = self.bn1(x) 84 | 85 | out = self.convs(bn1) 86 | out.add_(shortcut) 87 | 88 | return out 89 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/src/checks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT 6 | #ifndef AT_CHECK 7 | #define AT_CHECK AT_ASSERT 8 | #endif 9 | 10 | #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") 11 | #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") 12 | #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") 13 | 14 | #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 15 | #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/src/inplace_abn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "inplace_abn.h" 6 | 7 | std::vector mean_var(at::Tensor x) { 8 | if (x.is_cuda()) { 9 | if (x.type().scalarType() == at::ScalarType::Half) { 10 | return mean_var_cuda_h(x); 11 | } else { 12 | return mean_var_cuda(x); 13 | } 14 | } else { 15 | return mean_var_cpu(x); 16 | } 17 | } 18 | 19 | at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 20 | bool affine, float eps) { 21 | if (x.is_cuda()) { 22 | if (x.type().scalarType() == at::ScalarType::Half) { 23 | return forward_cuda_h(x, mean, var, weight, bias, affine, eps); 24 | } else { 25 | return forward_cuda(x, mean, var, weight, bias, affine, eps); 26 | } 27 | } else { 28 | return forward_cpu(x, mean, var, weight, bias, affine, eps); 29 | } 30 | } 31 | 32 | std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 33 | bool affine, float eps) { 34 | if (z.is_cuda()) { 35 | if (z.type().scalarType() == at::ScalarType::Half) { 36 | return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps); 37 | } else { 38 | return edz_eydz_cuda(z, dz, weight, bias, affine, eps); 39 | } 40 | } else { 41 | return edz_eydz_cpu(z, dz, weight, bias, affine, eps); 42 | } 43 | } 44 | 45 | at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 46 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 47 | if (z.is_cuda()) { 48 | if (z.type().scalarType() == at::ScalarType::Half) { 49 | return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps); 50 | } else { 51 | return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps); 52 | } 53 | } else { 54 | return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps); 55 | } 56 | } 57 | 58 | void leaky_relu_forward(at::Tensor z, float slope) { 59 | at::leaky_relu_(z, slope); 60 | } 61 | 62 | void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) { 63 | if (z.is_cuda()) { 64 | if (z.type().scalarType() == at::ScalarType::Half) { 65 | return leaky_relu_backward_cuda_h(z, dz, slope); 66 | } else { 67 | return leaky_relu_backward_cuda(z, dz, slope); 68 | } 69 | } else { 70 | return leaky_relu_backward_cpu(z, dz, slope); 71 | } 72 | } 73 | 74 | void elu_forward(at::Tensor z) { 75 | at::elu_(z); 76 | } 77 | 78 | void elu_backward(at::Tensor z, at::Tensor dz) { 79 | if (z.is_cuda()) { 80 | return elu_backward_cuda(z, dz); 81 | } else { 82 | return elu_backward_cpu(z, dz); 83 | } 84 | } 85 | 86 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 87 | m.def("mean_var", &mean_var, "Mean and variance computation"); 88 | m.def("forward", &forward, "In-place forward computation"); 89 | m.def("edz_eydz", &edz_eydz, "First part of backward computation"); 90 | m.def("backward", &backward, "Second part of backward computation"); 91 | m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation"); 92 | m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion"); 93 | m.def("elu_forward", &elu_forward, "Elu forward computation"); 94 | m.def("elu_backward", &elu_backward, "Elu backward computation and inversion"); 95 | } 96 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/src/inplace_abn.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | std::vector mean_var_cpu(at::Tensor x); 8 | std::vector mean_var_cuda(at::Tensor x); 9 | std::vector mean_var_cuda_h(at::Tensor x); 10 | 11 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 12 | bool affine, float eps); 13 | at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 14 | bool affine, float eps); 15 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 16 | bool affine, float eps); 17 | 18 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 19 | bool affine, float eps); 20 | std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 21 | bool affine, float eps); 22 | std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 23 | bool affine, float eps); 24 | 25 | at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 26 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 27 | at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 28 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 29 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 30 | at::Tensor edz, at::Tensor eydz, bool affine, float eps); 31 | 32 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope); 33 | void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope); 34 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope); 35 | 36 | void elu_backward_cpu(at::Tensor z, at::Tensor dz); 37 | void elu_backward_cuda(at::Tensor z, at::Tensor dz); 38 | 39 | static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) { 40 | num = x.size(0); 41 | chn = x.size(1); 42 | sp = 1; 43 | for (int64_t i = 2; i < x.ndimension(); ++i) 44 | sp *= x.size(i); 45 | } 46 | 47 | /* 48 | * Specialized CUDA reduction functions for BN 49 | */ 50 | #ifdef __CUDACC__ 51 | 52 | #include "utils/cuda.cuh" 53 | 54 | template 55 | __device__ T reduce(Op op, int plane, int N, int S) { 56 | T sum = (T)0; 57 | for (int batch = 0; batch < N; ++batch) { 58 | for (int x = threadIdx.x; x < S; x += blockDim.x) { 59 | sum += op(batch, plane, x); 60 | } 61 | } 62 | 63 | // sum over NumThreads within a warp 64 | sum = warpSum(sum); 65 | 66 | // 'transpose', and reduce within warp again 67 | __shared__ T shared[32]; 68 | __syncthreads(); 69 | if (threadIdx.x % WARP_SIZE == 0) { 70 | shared[threadIdx.x / WARP_SIZE] = sum; 71 | } 72 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 73 | // zero out the other entries in shared 74 | shared[threadIdx.x] = (T)0; 75 | } 76 | __syncthreads(); 77 | if (threadIdx.x / WARP_SIZE == 0) { 78 | sum = warpSum(shared[threadIdx.x]); 79 | if (threadIdx.x == 0) { 80 | shared[0] = sum; 81 | } 82 | } 83 | __syncthreads(); 84 | 85 | // Everyone picks it up, should be broadcast into the whole gradInput 86 | return shared[0]; 87 | } 88 | #endif 89 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/src/inplace_abn_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "utils/checks.h" 6 | #include "inplace_abn.h" 7 | 8 | at::Tensor reduce_sum(at::Tensor x) { 9 | if (x.ndimension() == 2) { 10 | return x.sum(0); 11 | } else { 12 | auto x_view = x.view({x.size(0), x.size(1), -1}); 13 | return x_view.sum(-1).sum(0); 14 | } 15 | } 16 | 17 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { 18 | if (x.ndimension() == 2) { 19 | return v; 20 | } else { 21 | std::vector broadcast_size = {1, -1}; 22 | for (int64_t i = 2; i < x.ndimension(); ++i) 23 | broadcast_size.push_back(1); 24 | 25 | return v.view(broadcast_size); 26 | } 27 | } 28 | 29 | int64_t count(at::Tensor x) { 30 | int64_t count = x.size(0); 31 | for (int64_t i = 2; i < x.ndimension(); ++i) 32 | count *= x.size(i); 33 | 34 | return count; 35 | } 36 | 37 | at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) { 38 | if (affine) { 39 | return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z); 40 | } else { 41 | return z; 42 | } 43 | } 44 | 45 | std::vector mean_var_cpu(at::Tensor x) { 46 | auto num = count(x); 47 | auto mean = reduce_sum(x) / num; 48 | auto diff = x - broadcast_to(mean, x); 49 | auto var = reduce_sum(diff.pow(2)) / num; 50 | 51 | return {mean, var}; 52 | } 53 | 54 | at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 55 | bool affine, float eps) { 56 | auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var); 57 | auto mul = at::rsqrt(var + eps) * gamma; 58 | 59 | x.sub_(broadcast_to(mean, x)); 60 | x.mul_(broadcast_to(mul, x)); 61 | if (affine) x.add_(broadcast_to(bias, x)); 62 | 63 | return x; 64 | } 65 | 66 | std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 67 | bool affine, float eps) { 68 | auto edz = reduce_sum(dz); 69 | auto y = invert_affine(z, weight, bias, affine, eps); 70 | auto eydz = reduce_sum(y * dz); 71 | 72 | return {edz, eydz}; 73 | } 74 | 75 | at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 76 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 77 | auto y = invert_affine(z, weight, bias, affine, eps); 78 | auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps); 79 | 80 | auto num = count(z); 81 | auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz); 82 | return dx; 83 | } 84 | 85 | void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) { 86 | CHECK_CPU_INPUT(z); 87 | CHECK_CPU_INPUT(dz); 88 | 89 | AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] { 90 | int64_t count = z.numel(); 91 | auto *_z = z.data(); 92 | auto *_dz = dz.data(); 93 | 94 | for (int64_t i = 0; i < count; ++i) { 95 | if (_z[i] < 0) { 96 | _z[i] *= 1 / slope; 97 | _dz[i] *= slope; 98 | } 99 | } 100 | })); 101 | } 102 | 103 | void elu_backward_cpu(at::Tensor z, at::Tensor dz) { 104 | CHECK_CPU_INPUT(z); 105 | CHECK_CPU_INPUT(dz); 106 | 107 | AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] { 108 | int64_t count = z.numel(); 109 | auto *_z = z.data(); 110 | auto *_dz = dz.data(); 111 | 112 | for (int64_t i = 0; i < count; ++i) { 113 | if (_z[i] < 0) { 114 | _z[i] = log1p(_z[i]); 115 | _dz[i] *= (_z[i] + 1.f); 116 | } 117 | } 118 | })); 119 | } 120 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/src/inplace_abn_cuda_half.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "utils/checks.h" 8 | #include "utils/cuda.cuh" 9 | #include "inplace_abn.h" 10 | 11 | #include 12 | 13 | // Operations for reduce 14 | struct SumOpH { 15 | __device__ SumOpH(const half *t, int c, int s) 16 | : tensor(t), chn(c), sp(s) {} 17 | __device__ __forceinline__ float operator()(int batch, int plane, int n) { 18 | return __half2float(tensor[(batch * chn + plane) * sp + n]); 19 | } 20 | const half *tensor; 21 | const int chn; 22 | const int sp; 23 | }; 24 | 25 | struct VarOpH { 26 | __device__ VarOpH(float m, const half *t, int c, int s) 27 | : mean(m), tensor(t), chn(c), sp(s) {} 28 | __device__ __forceinline__ float operator()(int batch, int plane, int n) { 29 | const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]); 30 | return (t - mean) * (t - mean); 31 | } 32 | const float mean; 33 | const half *tensor; 34 | const int chn; 35 | const int sp; 36 | }; 37 | 38 | struct GradOpH { 39 | __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s) 40 | : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} 41 | __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { 42 | float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight; 43 | float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); 44 | return Pair(_dz, _y * _dz); 45 | } 46 | const float weight; 47 | const float bias; 48 | const half *z; 49 | const half *dz; 50 | const int chn; 51 | const int sp; 52 | }; 53 | 54 | /*********** 55 | * mean_var 56 | ***********/ 57 | 58 | __global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) { 59 | int plane = blockIdx.x; 60 | float norm = 1.f / static_cast(num * sp); 61 | 62 | float _mean = reduce(SumOpH(x, chn, sp), plane, num, sp) * norm; 63 | __syncthreads(); 64 | float _var = reduce(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm; 65 | 66 | if (threadIdx.x == 0) { 67 | mean[plane] = _mean; 68 | var[plane] = _var; 69 | } 70 | } 71 | 72 | std::vector mean_var_cuda_h(at::Tensor x) { 73 | CHECK_CUDA_INPUT(x); 74 | 75 | // Extract dimensions 76 | int64_t num, chn, sp; 77 | get_dims(x, num, chn, sp); 78 | 79 | // Prepare output tensors 80 | auto mean = at::empty({chn},x.options().dtype(at::kFloat)); 81 | auto var = at::empty({chn},x.options().dtype(at::kFloat)); 82 | 83 | // Run kernel 84 | dim3 blocks(chn); 85 | dim3 threads(getNumThreads(sp)); 86 | auto stream = at::cuda::getCurrentCUDAStream(); 87 | mean_var_kernel_h<<>>( 88 | reinterpret_cast(x.data()), 89 | mean.data(), 90 | var.data(), 91 | num, chn, sp); 92 | 93 | return {mean, var}; 94 | } 95 | 96 | /********** 97 | * forward 98 | **********/ 99 | 100 | __global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias, 101 | bool affine, float eps, int num, int chn, int sp) { 102 | int plane = blockIdx.x; 103 | 104 | const float _mean = mean[plane]; 105 | const float _var = var[plane]; 106 | const float _weight = affine ? abs(weight[plane]) + eps : 1.f; 107 | const float _bias = affine ? bias[plane] : 0.f; 108 | 109 | const float mul = rsqrt(_var + eps) * _weight; 110 | 111 | for (int batch = 0; batch < num; ++batch) { 112 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 113 | half *x_ptr = x + (batch * chn + plane) * sp + n; 114 | float _x = __half2float(*x_ptr); 115 | float _y = (_x - _mean) * mul + _bias; 116 | 117 | *x_ptr = __float2half(_y); 118 | } 119 | } 120 | } 121 | 122 | at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, 123 | bool affine, float eps) { 124 | CHECK_CUDA_INPUT(x); 125 | CHECK_CUDA_INPUT(mean); 126 | CHECK_CUDA_INPUT(var); 127 | CHECK_CUDA_INPUT(weight); 128 | CHECK_CUDA_INPUT(bias); 129 | 130 | // Extract dimensions 131 | int64_t num, chn, sp; 132 | get_dims(x, num, chn, sp); 133 | 134 | // Run kernel 135 | dim3 blocks(chn); 136 | dim3 threads(getNumThreads(sp)); 137 | auto stream = at::cuda::getCurrentCUDAStream(); 138 | forward_kernel_h<<>>( 139 | reinterpret_cast(x.data()), 140 | mean.data(), 141 | var.data(), 142 | weight.data(), 143 | bias.data(), 144 | affine, eps, num, chn, sp); 145 | 146 | return x; 147 | } 148 | 149 | __global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias, 150 | float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) { 151 | int plane = blockIdx.x; 152 | 153 | float _weight = affine ? abs(weight[plane]) + eps : 1.f; 154 | float _bias = affine ? bias[plane] : 0.f; 155 | 156 | Pair res = reduce, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp); 157 | __syncthreads(); 158 | 159 | if (threadIdx.x == 0) { 160 | edz[plane] = res.v1; 161 | eydz[plane] = res.v2; 162 | } 163 | } 164 | 165 | std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, 166 | bool affine, float eps) { 167 | CHECK_CUDA_INPUT(z); 168 | CHECK_CUDA_INPUT(dz); 169 | CHECK_CUDA_INPUT(weight); 170 | CHECK_CUDA_INPUT(bias); 171 | 172 | // Extract dimensions 173 | int64_t num, chn, sp; 174 | get_dims(z, num, chn, sp); 175 | 176 | auto edz = at::empty({chn},z.options().dtype(at::kFloat)); 177 | auto eydz = at::empty({chn},z.options().dtype(at::kFloat)); 178 | 179 | // Run kernel 180 | dim3 blocks(chn); 181 | dim3 threads(getNumThreads(sp)); 182 | auto stream = at::cuda::getCurrentCUDAStream(); 183 | edz_eydz_kernel_h<<>>( 184 | reinterpret_cast(z.data()), 185 | reinterpret_cast(dz.data()), 186 | weight.data(), 187 | bias.data(), 188 | edz.data(), 189 | eydz.data(), 190 | affine, eps, num, chn, sp); 191 | 192 | return {edz, eydz}; 193 | } 194 | 195 | __global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz, 196 | const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) { 197 | int plane = blockIdx.x; 198 | 199 | float _weight = affine ? abs(weight[plane]) + eps : 1.f; 200 | float _bias = affine ? bias[plane] : 0.f; 201 | float _var = var[plane]; 202 | float _edz = edz[plane]; 203 | float _eydz = eydz[plane]; 204 | 205 | float _mul = _weight * rsqrt(_var + eps); 206 | float count = float(num * sp); 207 | 208 | for (int batch = 0; batch < num; ++batch) { 209 | for (int n = threadIdx.x; n < sp; n += blockDim.x) { 210 | float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); 211 | float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight; 212 | 213 | dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul); 214 | } 215 | } 216 | } 217 | 218 | at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, 219 | at::Tensor edz, at::Tensor eydz, bool affine, float eps) { 220 | CHECK_CUDA_INPUT(z); 221 | CHECK_CUDA_INPUT(dz); 222 | CHECK_CUDA_INPUT(var); 223 | CHECK_CUDA_INPUT(weight); 224 | CHECK_CUDA_INPUT(bias); 225 | CHECK_CUDA_INPUT(edz); 226 | CHECK_CUDA_INPUT(eydz); 227 | 228 | // Extract dimensions 229 | int64_t num, chn, sp; 230 | get_dims(z, num, chn, sp); 231 | 232 | auto dx = at::zeros_like(z); 233 | 234 | // Run kernel 235 | dim3 blocks(chn); 236 | dim3 threads(getNumThreads(sp)); 237 | auto stream = at::cuda::getCurrentCUDAStream(); 238 | backward_kernel_h<<>>( 239 | reinterpret_cast(z.data()), 240 | reinterpret_cast(dz.data()), 241 | var.data(), 242 | weight.data(), 243 | bias.data(), 244 | edz.data(), 245 | eydz.data(), 246 | reinterpret_cast(dx.data()), 247 | affine, eps, num, chn, sp); 248 | 249 | return dx; 250 | } 251 | 252 | __global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) { 253 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){ 254 | float _z = __half2float(z[i]); 255 | if (_z < 0) { 256 | dz[i] = __float2half(__half2float(dz[i]) * slope); 257 | z[i] = __float2half(_z / slope); 258 | } 259 | } 260 | } 261 | 262 | void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) { 263 | CHECK_CUDA_INPUT(z); 264 | CHECK_CUDA_INPUT(dz); 265 | 266 | int64_t count = z.numel(); 267 | dim3 threads(getNumThreads(count)); 268 | dim3 blocks = (count + threads.x - 1) / threads.x; 269 | auto stream = at::cuda::getCurrentCUDAStream(); 270 | leaky_relu_backward_impl_h<<>>( 271 | reinterpret_cast(z.data()), 272 | reinterpret_cast(dz.data()), 273 | slope, count); 274 | } 275 | 276 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/src/utils/checks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT 6 | #ifndef AT_CHECK 7 | #define AT_CHECK AT_ASSERT 8 | #endif 9 | 10 | #define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") 11 | #define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") 12 | #define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") 13 | 14 | #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 15 | #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/src/utils/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | /* 6 | * Functions to share code between CPU and GPU 7 | */ 8 | 9 | #ifdef __CUDACC__ 10 | // CUDA versions 11 | 12 | #define HOST_DEVICE __host__ __device__ 13 | #define INLINE_HOST_DEVICE __host__ __device__ inline 14 | #define FLOOR(x) floor(x) 15 | 16 | #if __CUDA_ARCH__ >= 600 17 | // Recent compute capabilities have block-level atomicAdd for all data types, so we use that 18 | #define ACCUM(x,y) atomicAdd_block(&(x),(y)) 19 | #else 20 | // Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float 21 | // and use the known atomicCAS-based implementation for double 22 | template 23 | __device__ inline data_t atomic_add(data_t *address, data_t val) { 24 | return atomicAdd(address, val); 25 | } 26 | 27 | template<> 28 | __device__ inline double atomic_add(double *address, double val) { 29 | unsigned long long int* address_as_ull = (unsigned long long int*)address; 30 | unsigned long long int old = *address_as_ull, assumed; 31 | do { 32 | assumed = old; 33 | old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); 34 | } while (assumed != old); 35 | return __longlong_as_double(old); 36 | } 37 | 38 | #define ACCUM(x,y) atomic_add(&(x),(y)) 39 | #endif // #if __CUDA_ARCH__ >= 600 40 | 41 | #else 42 | // CPU versions 43 | 44 | #define HOST_DEVICE 45 | #define INLINE_HOST_DEVICE inline 46 | #define FLOOR(x) std::floor(x) 47 | #define ACCUM(x,y) (x) += (y) 48 | 49 | #endif // #ifdef __CUDACC__ -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/modules/src/utils/cuda.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * General settings and functions 5 | */ 6 | const int WARP_SIZE = 32; 7 | const int MAX_BLOCK_SIZE = 1024; 8 | 9 | static int getNumThreads(int nElem) { 10 | int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE}; 11 | for (int i = 0; i < 6; ++i) { 12 | if (nElem <= threadSizes[i]) { 13 | return threadSizes[i]; 14 | } 15 | } 16 | return MAX_BLOCK_SIZE; 17 | } 18 | 19 | /* 20 | * Reduction utilities 21 | */ 22 | template 23 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, 24 | unsigned int mask = 0xffffffff) { 25 | #if CUDART_VERSION >= 9000 26 | return __shfl_xor_sync(mask, value, laneMask, width); 27 | #else 28 | return __shfl_xor(value, laneMask, width); 29 | #endif 30 | } 31 | 32 | __device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); } 33 | 34 | template 35 | struct Pair { 36 | T v1, v2; 37 | __device__ Pair() {} 38 | __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {} 39 | __device__ Pair(T v) : v1(v), v2(v) {} 40 | __device__ Pair(int v) : v1(v), v2(v) {} 41 | __device__ Pair &operator+=(const Pair &a) { 42 | v1 += a.v1; 43 | v2 += a.v2; 44 | return *this; 45 | } 46 | }; 47 | 48 | template 49 | static __device__ __forceinline__ T warpSum(T val) { 50 | #if __CUDA_ARCH__ >= 300 51 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) { 52 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); 53 | } 54 | #else 55 | __shared__ T values[MAX_BLOCK_SIZE]; 56 | values[threadIdx.x] = val; 57 | __threadfence_block(); 58 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; 59 | for (int i = 1; i < WARP_SIZE; i++) { 60 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; 61 | } 62 | #endif 63 | return val; 64 | } 65 | 66 | template 67 | static __device__ __forceinline__ Pair warpSum(Pair value) { 68 | value.v1 = warpSum(value.v1); 69 | value.v2 = warpSum(value.v2); 70 | return value; 71 | } -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/optimizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import torch 6 | import logging 7 | 8 | logger = logging.getLogger() 9 | 10 | class Optimizer(object): 11 | def __init__(self, 12 | model, 13 | lr0, 14 | momentum, 15 | wd, 16 | warmup_steps, 17 | warmup_start_lr, 18 | max_iter, 19 | power, 20 | *args, **kwargs): 21 | self.warmup_steps = warmup_steps 22 | self.warmup_start_lr = warmup_start_lr 23 | self.lr0 = lr0 24 | self.lr = self.lr0 25 | self.max_iter = float(max_iter) 26 | self.power = power 27 | self.it = 0 28 | wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = model.get_params() 29 | param_list = [ 30 | {'params': wd_params}, 31 | {'params': nowd_params, 'weight_decay': 0}, 32 | {'params': lr_mul_wd_params, 'lr_mul': True}, 33 | {'params': lr_mul_nowd_params, 'weight_decay': 0, 'lr_mul': True}] 34 | self.optim = torch.optim.SGD( 35 | param_list, 36 | lr = lr0, 37 | momentum = momentum, 38 | weight_decay = wd) 39 | self.warmup_factor = (self.lr0/self.warmup_start_lr)**(1./self.warmup_steps) 40 | 41 | 42 | def get_lr(self): 43 | if self.it <= self.warmup_steps: 44 | lr = self.warmup_start_lr*(self.warmup_factor**self.it) 45 | else: 46 | factor = (1-(self.it-self.warmup_steps)/(self.max_iter-self.warmup_steps))**self.power 47 | lr = self.lr0 * factor 48 | return lr 49 | 50 | 51 | def step(self): 52 | self.lr = self.get_lr() 53 | for pg in self.optim.param_groups: 54 | if pg.get('lr_mul', False): 55 | pg['lr'] = self.lr * 10 56 | else: 57 | pg['lr'] = self.lr 58 | if self.optim.defaults.get('lr_mul', False): 59 | self.optim.defaults['lr'] = self.lr * 10 60 | else: 61 | self.optim.defaults['lr'] = self.lr 62 | self.it += 1 63 | self.optim.step() 64 | if self.it == self.warmup_steps+2: 65 | logger.info('==> warmup done, start to implement poly lr strategy') 66 | 67 | def zero_grad(self): 68 | self.optim.zero_grad() 69 | 70 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/resnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.utils.model_zoo as modelzoo 8 | 9 | from modules.bn import InPlaceABNSync as BatchNorm2d 10 | 11 | resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth' 12 | resnet34_url = 'https://download.pytorch.org/models/resnet34-333f7ec4.pth' 13 | 14 | 15 | def conv3x3(in_planes, out_planes, stride=1): 16 | """3x3 convolution with padding""" 17 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 18 | padding=1, bias=False) 19 | 20 | 21 | class BasicBlock(nn.Module): 22 | def __init__(self, in_chan, out_chan, stride=1): 23 | super(BasicBlock, self).__init__() 24 | self.conv1 = conv3x3(in_chan, out_chan, stride) 25 | self.bn1 = BatchNorm2d(out_chan) 26 | self.conv2 = conv3x3(out_chan, out_chan) 27 | self.bn2 = BatchNorm2d(out_chan, activation='none') 28 | self.relu = nn.ReLU(inplace=True) 29 | self.downsample = None 30 | if in_chan != out_chan or stride != 1: 31 | self.downsample = nn.Sequential( 32 | nn.Conv2d(in_chan, out_chan, 33 | kernel_size=1, stride=stride, bias=False), 34 | BatchNorm2d(out_chan, activation='none'), 35 | ) 36 | 37 | def forward(self, x): 38 | residual = self.conv1(x) 39 | residual = self.bn1(residual) 40 | residual = self.conv2(residual) 41 | residual = self.bn2(residual) 42 | 43 | shortcut = x 44 | if self.downsample is not None: 45 | shortcut = self.downsample(x) 46 | 47 | out = shortcut + residual 48 | out = self.relu(out) 49 | return out 50 | 51 | 52 | def create_layer_basic(in_chan, out_chan, bnum, stride=1): 53 | layers = [BasicBlock(in_chan, out_chan, stride=stride)] 54 | for i in range(bnum-1): 55 | layers.append(BasicBlock(out_chan, out_chan, stride=1)) 56 | return nn.Sequential(*layers) 57 | 58 | 59 | class Resnet18(nn.Module): 60 | def __init__(self): 61 | super(Resnet18, self).__init__() 62 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 63 | bias=False) 64 | self.bn1 = BatchNorm2d(64) 65 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 66 | self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1) 67 | self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2) 68 | self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2) 69 | self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2) 70 | self.init_weight() 71 | 72 | def forward(self, x): 73 | x = self.conv1(x) 74 | x = self.bn1(x) 75 | x = self.maxpool(x) 76 | 77 | x = self.layer1(x) 78 | feat8 = self.layer2(x) # 1/8 79 | feat16 = self.layer3(feat8) # 1/16 80 | feat32 = self.layer4(feat16) # 1/32 81 | return feat8, feat16, feat32 82 | 83 | def init_weight(self): 84 | state_dict = modelzoo.load_url(resnet18_url) 85 | self_state_dict = self.state_dict() 86 | for k, v in state_dict.items(): 87 | if 'fc' in k: continue 88 | self_state_dict.update({k: v}) 89 | self.load_state_dict(self_state_dict) 90 | 91 | def get_params(self): 92 | wd_params, nowd_params = [], [] 93 | for name, module in self.named_modules(): 94 | if isinstance(module, (nn.Linear, nn.Conv2d)): 95 | wd_params.append(module.weight) 96 | if not module.bias is None: 97 | nowd_params.append(module.bias) 98 | elif isinstance(module, (BatchNorm2d, nn.BatchNorm2d)): 99 | nowd_params += list(module.parameters()) 100 | return wd_params, nowd_params 101 | 102 | class Resnet34(nn.Module): 103 | def __init__(self): 104 | super(Resnet34, self).__init__() 105 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 106 | bias=False) 107 | self.bn1 = BatchNorm2d(64) 108 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 109 | self.layer1 = create_layer_basic(64, 64, bnum=3, stride=1) 110 | self.layer2 = create_layer_basic(64, 128, bnum=4, stride=2) 111 | self.layer3 = create_layer_basic(128, 256, bnum=6, stride=2) 112 | self.layer4 = create_layer_basic(256, 512, bnum=3, stride=2) 113 | self.init_weight() 114 | 115 | def forward(self, x): 116 | x = self.conv1(x) 117 | x = self.bn1(x) 118 | x = self.maxpool(x) 119 | 120 | x = self.layer1(x) 121 | feat8 = self.layer2(x) # 1/8 122 | feat16 = self.layer3(feat8) # 1/16 123 | feat32 = self.layer4(feat16) # 1/32 124 | return feat8, feat16, feat32 125 | 126 | def init_weight(self): 127 | state_dict = modelzoo.load_url(resnet34_url) 128 | self_state_dict = self.state_dict() 129 | for k, v in state_dict.items(): 130 | if 'fc' in k: continue 131 | self_state_dict.update({k: v}) 132 | self.load_state_dict(self_state_dict) 133 | 134 | def get_params(self): 135 | wd_params, nowd_params = [], [] 136 | for name, module in self.named_modules(): 137 | if isinstance(module, (nn.Linear, nn.Conv2d)): 138 | wd_params.append(module.weight) 139 | if not module.bias is None: 140 | nowd_params.append(module.bias) 141 | elif isinstance(module, (BatchNorm2d, nn.BatchNorm2d)): 142 | nowd_params += list(module.parameters()) 143 | return wd_params, nowd_params 144 | 145 | if __name__ == "__main__": 146 | net = Resnet18() 147 | x = torch.randn(16, 3, 224, 224) 148 | out = net(x) 149 | print(out[0].size()) 150 | print(out[1].size()) 151 | print(out[2].size()) 152 | net.get_params() 153 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/shelfnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torchvision 5 | 6 | from resnet import Resnet34 7 | from modules.bn import InPlaceABNSync 8 | from ShelfBlock import Decoder, LadderBlock 9 | 10 | class ConvBNReLU(nn.Module): 11 | def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs): 12 | super(ConvBNReLU, self).__init__() 13 | self.conv = nn.Conv2d(in_chan, 14 | out_chan, 15 | kernel_size = ks, 16 | stride = stride, 17 | padding = padding, 18 | bias = False) 19 | self.bn = InPlaceABNSync(out_chan) 20 | self.init_weight() 21 | 22 | def forward(self, x): 23 | x = self.conv(x) 24 | x = self.bn(x) 25 | return x 26 | 27 | def init_weight(self): 28 | for ly in self.children(): 29 | if isinstance(ly, nn.Conv2d): 30 | nn.init.kaiming_normal_(ly.weight, a=1) 31 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 32 | 33 | def get_params(self): 34 | wd_params, nowd_params = [], [] 35 | for name, module in self.named_modules(): 36 | if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): 37 | wd_params.append(module.weight) 38 | if not module.bias is None: 39 | nowd_params.append(module.bias) 40 | elif isinstance(module, InPlaceABNSync) or isinstance(module, torch.nn.BatchNorm2d): 41 | nowd_params += list(module.parameters()) 42 | return wd_params, nowd_params 43 | 44 | 45 | class NetOutput(nn.Module): 46 | def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs): 47 | super(NetOutput, self).__init__() 48 | self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1) 49 | self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=3, bias=False, 50 | padding=1) 51 | self.init_weight() 52 | 53 | def forward(self, x): 54 | x = self.conv(x) 55 | x = self.conv_out(x) 56 | return x 57 | 58 | def init_weight(self): 59 | for ly in self.children(): 60 | if isinstance(ly, nn.Conv2d): 61 | nn.init.kaiming_normal_(ly.weight, a=1) 62 | if not ly.bias is None: nn.init.constant_(ly.bias, 0) 63 | 64 | def get_params(self): 65 | wd_params, nowd_params = [], [] 66 | for name, module in self.named_modules(): 67 | if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d): 68 | wd_params.append(module.weight) 69 | if not module.bias is None: 70 | nowd_params.append(module.bias) 71 | elif isinstance(module, InPlaceABNSync) or isinstance(module, torch.nn.BatchNorm2d): 72 | nowd_params += list(module.parameters()) 73 | return wd_params, nowd_params 74 | 75 | 76 | class ShelfNet(nn.Module): 77 | def __init__(self, n_classes, *args, **kwargs): 78 | super(ShelfNet, self).__init__() 79 | self.backbone = Resnet34() 80 | 81 | self.decoder = Decoder(planes=64*2,layers=3,kernel=3) 82 | self.ladder = LadderBlock(planes=64*2,layers=3, kernel=3) 83 | 84 | self.conv_out = NetOutput(64*2, 64*2, n_classes) 85 | self.conv_out16 = NetOutput(128*2, 64, n_classes) 86 | self.conv_out32 = NetOutput(256*2, 64, n_classes) 87 | 88 | #self.trans1 = ConvBNReLU(128,64,ks=1,stride=1,padding=0) 89 | #self.trans2 = ConvBNReLU(256, 128, ks=1, stride=1, padding=0) 90 | #self.trans3 = ConvBNReLU(512, 256, ks=1, stride=1, padding=0) 91 | def forward(self, x, aux = True): 92 | H, W = x.size()[2:] 93 | 94 | feat8, feat16, feat32 = self.backbone(x) 95 | 96 | #feat8 = self.trans1(feat8) 97 | #feat16 = self.trans2(feat16) 98 | #feat32 = self.trans3(feat32) 99 | 100 | out = self.decoder([feat8, feat16, feat32]) 101 | 102 | out2 = self.ladder(out) 103 | 104 | feat_cp8, feat_cp16, feat_cp32 = out2[-1], out2[-2], out2[-3] 105 | 106 | feat_out = self.conv_out(feat_cp8) 107 | feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True) 108 | 109 | if aux: 110 | feat_out16 = self.conv_out16(feat_cp16) 111 | feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True) 112 | 113 | feat_out32 = self.conv_out32(feat_cp32) 114 | feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True) 115 | 116 | return feat_out, feat_out16, feat_out32 117 | else: 118 | return feat_out 119 | 120 | def get_params(self): 121 | wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], [] 122 | for name, child in self.named_children(): 123 | child_wd_params, child_nowd_params = child.get_params() 124 | if isinstance(child, LadderBlock) or isinstance(child, NetOutput) or isinstance(child, Decoder)\ 125 | or isinstance(child, ConvBNReLU): 126 | lr_mul_wd_params += child_wd_params 127 | lr_mul_nowd_params += child_nowd_params 128 | else: 129 | wd_params += child_wd_params 130 | nowd_params += child_nowd_params 131 | return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params 132 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | import sys 5 | sys.path.insert(0,'./') 6 | from logger import setup_logger 7 | from cityscapes import CityScapes 8 | from loss import OhemCELoss 9 | from evaluate import evaluate 10 | from optimizer import Optimizer 11 | 12 | import torch 13 | import torch.nn as nn 14 | from torch.utils.data import DataLoader 15 | import torch.nn.functional as F 16 | import torch.distributed as dist 17 | 18 | import os 19 | import os.path as osp 20 | import logging 21 | import time 22 | import datetime 23 | import argparse 24 | from shelfnet import ShelfNet 25 | 26 | respth = './res' 27 | if not osp.exists(respth): os.makedirs(respth) 28 | logger = logging.getLogger() 29 | 30 | 31 | def parse_args(): 32 | parse = argparse.ArgumentParser() 33 | parse.add_argument( 34 | '--local_rank', 35 | dest = 'local_rank', 36 | type = int, 37 | default = -1, 38 | ) 39 | return parse.parse_args() 40 | 41 | 42 | def train(): 43 | args = parse_args() 44 | torch.cuda.set_device(args.local_rank) 45 | dist.init_process_group( 46 | backend = 'nccl', 47 | init_method = 'tcp://127.0.0.1:33241', 48 | world_size = torch.cuda.device_count(), 49 | rank=args.local_rank 50 | ) 51 | setup_logger(respth) 52 | 53 | ## dataset 54 | n_classes = 19 55 | n_img_per_gpu = 7 56 | n_workers = 4 57 | cropsize = [1024, 1024] 58 | ds = CityScapes('/data2/.encoding/data/cityscapes', cropsize=cropsize, mode='train') 59 | sampler = torch.utils.data.distributed.DistributedSampler(ds) 60 | dl = DataLoader(ds, 61 | batch_size = n_img_per_gpu, 62 | shuffle = False, 63 | sampler = sampler, 64 | num_workers = n_workers, 65 | pin_memory = True, 66 | drop_last = True) 67 | 68 | ## model 69 | ignore_idx = 255 70 | net = ShelfNet(n_classes=n_classes) 71 | net.cuda() 72 | net.train() 73 | net = nn.parallel.DistributedDataParallel(net, 74 | device_ids = [args.local_rank, ], 75 | output_device = args.local_rank, 76 | find_unused_parameters=True 77 | ) 78 | score_thres = 0.7 79 | n_min = n_img_per_gpu*cropsize[0]*cropsize[1]//16 80 | LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) 81 | Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) 82 | Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx) 83 | 84 | ## optimizer 85 | momentum = 0.9 86 | weight_decay = 5e-4 87 | lr_start = 1e-2 88 | max_iter = 80000 89 | power = 0.9 90 | warmup_steps = 1000 91 | warmup_start_lr = 1e-5 92 | optim = Optimizer( 93 | model = net.module, 94 | lr0 = lr_start, 95 | momentum = momentum, 96 | wd = weight_decay, 97 | warmup_steps = warmup_steps, 98 | warmup_start_lr = warmup_start_lr, 99 | max_iter = max_iter, 100 | power = power) 101 | 102 | ## train loop 103 | msg_iter = 50 104 | loss_avg = [] 105 | st = glob_st = time.time() 106 | diter = iter(dl) 107 | epoch = 0 108 | for it in range(max_iter): 109 | try: 110 | im, lb = next(diter) 111 | if not im.size()[0]==n_img_per_gpu: raise StopIteration 112 | except StopIteration: 113 | epoch += 1 114 | sampler.set_epoch(epoch) 115 | diter = iter(dl) 116 | im, lb = next(diter) 117 | im = im.cuda() 118 | lb = lb.cuda() 119 | H, W = im.size()[2:] 120 | lb = torch.squeeze(lb, 1) 121 | 122 | optim.zero_grad() 123 | out, out16, out32 = net(im) 124 | lossp = LossP(out, lb) 125 | loss2 = Loss2(out16, lb) 126 | loss3 = Loss3(out32, lb) 127 | loss = lossp + loss2 + loss3 128 | loss.backward() 129 | optim.step() 130 | 131 | loss_avg.append(loss.item()) 132 | ## print training log message 133 | if (it+1)%msg_iter==0: 134 | loss_avg = sum(loss_avg) / len(loss_avg) 135 | lr = optim.lr 136 | ed = time.time() 137 | t_intv, glob_t_intv = ed - st, ed - glob_st 138 | eta = int((max_iter - it) * (glob_t_intv / it)) 139 | eta = str(datetime.timedelta(seconds=eta)) 140 | msg = ', '.join([ 141 | 'it: {it}/{max_it}', 142 | 'lr: {lr:4f}', 143 | 'loss: {loss:.4f}', 144 | 'eta: {eta}', 145 | 'time: {time:.4f}', 146 | ]).format( 147 | it = it+1, 148 | max_it = max_iter, 149 | lr = lr, 150 | loss = loss_avg, 151 | time = t_intv, 152 | eta = eta 153 | ) 154 | logger.info(msg) 155 | loss_avg = [] 156 | st = ed 157 | 158 | if it % 1000 == 0: 159 | ## dump the final model 160 | save_pth = osp.join(respth, 'shelfnet_model_it_%d.pth'%it) 161 | #net.cpu() 162 | #state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict() 163 | #if dist.get_rank() == 0: torch.save(state, save_pth) 164 | torch.save(net.module.state_dict(),save_pth) 165 | 166 | if it % 1000 == 0 and it > 0: 167 | evaluate(checkpoint=save_pth) 168 | 169 | ## dump the final model 170 | save_pth = osp.join(respth, 'model_final.pth') 171 | net.cpu() 172 | state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict() 173 | if dist.get_rank()==0: torch.save(state, save_pth) 174 | logger.info('training done, model saved to: {}'.format(save_pth)) 175 | 176 | 177 | if __name__ == "__main__": 178 | train() 179 | evaluate() 180 | -------------------------------------------------------------------------------- /ShelfNet34_non_realtime/transform.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | from PIL import Image 6 | import PIL.ImageEnhance as ImageEnhance 7 | import random 8 | 9 | 10 | class RandomCrop(object): 11 | def __init__(self, size, *args, **kwargs): 12 | self.size = size 13 | 14 | def __call__(self, im_lb): 15 | im = im_lb['im'] 16 | lb = im_lb['lb'] 17 | assert im.size == lb.size 18 | W, H = self.size 19 | w, h = im.size 20 | 21 | if (W, H) == (w, h): return dict(im=im, lb=lb) 22 | if w < W or h < H: 23 | scale = float(W) / w if w < h else float(H) / h 24 | w, h = int(scale * w + 1), int(scale * h + 1) 25 | im = im.resize((w, h), Image.BILINEAR) 26 | lb = lb.resize((w, h), Image.NEAREST) 27 | sw, sh = random.random() * (w - W), random.random() * (h - H) 28 | crop = int(sw), int(sh), int(sw) + W, int(sh) + H 29 | return dict( 30 | im = im.crop(crop), 31 | lb = lb.crop(crop) 32 | ) 33 | 34 | 35 | class HorizontalFlip(object): 36 | def __init__(self, p=0.5, *args, **kwargs): 37 | self.p = p 38 | 39 | def __call__(self, im_lb): 40 | if random.random() > self.p: 41 | return im_lb 42 | else: 43 | im = im_lb['im'] 44 | lb = im_lb['lb'] 45 | return dict(im = im.transpose(Image.FLIP_LEFT_RIGHT), 46 | lb = lb.transpose(Image.FLIP_LEFT_RIGHT), 47 | ) 48 | 49 | 50 | class RandomScale(object): 51 | def __init__(self, scales=(1, ), *args, **kwargs): 52 | self.scales = scales 53 | 54 | def __call__(self, im_lb): 55 | im = im_lb['im'] 56 | lb = im_lb['lb'] 57 | W, H = im.size 58 | scale = random.choice(self.scales) 59 | w, h = int(W * scale), int(H * scale) 60 | return dict(im = im.resize((w, h), Image.BILINEAR), 61 | lb = lb.resize((w, h), Image.NEAREST), 62 | ) 63 | 64 | 65 | class ColorJitter(object): 66 | def __init__(self, brightness=None, contrast=None, saturation=None, *args, **kwargs): 67 | if not brightness is None and brightness>0: 68 | self.brightness = [max(1-brightness, 0), 1+brightness] 69 | if not contrast is None and contrast>0: 70 | self.contrast = [max(1-contrast, 0), 1+contrast] 71 | if not saturation is None and saturation>0: 72 | self.saturation = [max(1-saturation, 0), 1+saturation] 73 | 74 | def __call__(self, im_lb): 75 | im = im_lb['im'] 76 | lb = im_lb['lb'] 77 | r_brightness = random.uniform(self.brightness[0], self.brightness[1]) 78 | r_contrast = random.uniform(self.contrast[0], self.contrast[1]) 79 | r_saturation = random.uniform(self.saturation[0], self.saturation[1]) 80 | im = ImageEnhance.Brightness(im).enhance(r_brightness) 81 | im = ImageEnhance.Contrast(im).enhance(r_contrast) 82 | im = ImageEnhance.Color(im).enhance(r_saturation) 83 | return dict(im = im, 84 | lb = lb, 85 | ) 86 | 87 | 88 | class MultiScale(object): 89 | def __init__(self, scales): 90 | self.scales = scales 91 | 92 | def __call__(self, img): 93 | W, H = img.size 94 | sizes = [(int(W*ratio), int(H*ratio)) for ratio in self.scales] 95 | imgs = [] 96 | [imgs.append(img.resize(size, Image.BILINEAR)) for size in sizes] 97 | return imgs 98 | 99 | 100 | class Compose(object): 101 | def __init__(self, do_list): 102 | self.do_list = do_list 103 | 104 | def __call__(self, im_lb): 105 | for comp in self.do_list: 106 | im_lb = comp(im_lb) 107 | return im_lb 108 | 109 | 110 | 111 | 112 | if __name__ == '__main__': 113 | flip = HorizontalFlip(p = 1) 114 | crop = RandomCrop((321, 321)) 115 | rscales = RandomScale((0.75, 1.0, 1.5, 1.75, 2.0)) 116 | img = Image.open('data/img.jpg') 117 | lb = Image.open('data/label.png') 118 | -------------------------------------------------------------------------------- /figures/images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/figures/images.png -------------------------------------------------------------------------------- /figures/results_shelfnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/juntang-zhuang/ShelfNet/8e6dde2478440e3618745706b388b57b82e0dd1f/figures/results_shelfnet.png --------------------------------------------------------------------------------