├── .editorconfig ├── .gitignore ├── LICENSE ├── README.md ├── encoding ├── __init__.py ├── datasets │ ├── __init__.py │ ├── ade20k.py │ ├── base.py │ ├── cityscapes.py │ ├── coco.py │ ├── pascal_aug.py │ ├── pascal_voc.py │ └── pcontext.py ├── dilated │ ├── __init__.py │ └── resnet.py ├── functions │ ├── __init__.py │ ├── encoding.py │ └── syncbn.py ├── models │ ├── __init__.py │ ├── base.py │ ├── deeplabv3.py │ ├── encnet.py │ ├── fcn.py │ ├── model_store.py │ ├── model_zoo.py │ └── psp.py ├── nn │ ├── __init__.py │ ├── comm.py │ ├── customize.py │ ├── encoding.py │ └── syncbn.py ├── parallel.py └── utils │ ├── __init__.py │ ├── files.py │ ├── lr_scheduler.py │ ├── metrics.py │ └── pallete.py ├── experiments └── segmentation │ ├── option.py │ ├── scripts │ ├── deeplab_res50_pcontext.sh │ ├── encnet_res101_ade20k_train.sh │ ├── encnet_res101_ade20k_trainval.sh │ ├── encnet_res101_pcontext.sh │ ├── encnet_res50_ade20k_train.sh │ ├── encnet_res50_ade20k_trainval.sh │ ├── encnet_res50_pcontext.sh │ └── psp_res50_pcontext.sh │ ├── test.py │ ├── test_fps_params.py │ ├── test_single_image.py │ └── train.py ├── images ├── Framework.png ├── JPU.png ├── encnet_2009_001858.png ├── encnet_ADE_val_00001086.png ├── gt_2009_001858.png ├── gt_ADE_val_00001086.png ├── img_2009_001858.jpg ├── img_ADE_val_00001086.jpg ├── ours_2009_001858.png └── ours_ADE_val_00001086.png └── scripts ├── prepare_ade20k.py ├── prepare_cityscapes.py ├── prepare_coco.py ├── prepare_pascal.py └── prepare_pcontext.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.swp 3 | *.pyc 4 | 5 | version.py 6 | 7 | runs/ 8 | data/ 9 | build/ 10 | results/ 11 | 12 | .idea -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017- Hang Zhang. All rights reserved. 4 | Copyright (c) 2018- Amazon.com, Inc. or its affiliates. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | 1. Redistributions of source code must retain the above copyright 14 | notice, this list of conditions and the following disclaimer. 15 | 16 | 2. Redistributions in binary form must reproduce the above copyright 17 | notice, this list of conditions and the following disclaimer in the 18 | documentation and/or other materials provided with the distribution. 19 | 20 | 3. Neither the name of Amazon Inc nor the names of the contributors may be 21 | used to endorse or promote products derived from this software without 22 | specific prior written permission. 23 | 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 | SOFTWARE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation 2 | [[Project]](http://wuhuikai.me/FastFCNProject/) [[Paper]](http://wuhuikai.me/FastFCNProject/fast_fcn.pdf) [[arXiv]](https://arxiv.org/abs/1903.11816) [[Home]](http://wuhuikai.me) 3 | 4 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/fastfcn-rethinking-dilated-convolution-in-the/semantic-segmentation-pascal-context)](https://paperswithcode.com/sota/semantic-segmentation-pascal-context?p=fastfcn-rethinking-dilated-convolution-in-the) 5 | 6 | Official implementation of **FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation**. 7 | A **Faster**, **Stronger** and **Lighter** framework for semantic segmentation, achieving the state-of-the-art performance and more than **3x** acceleration. 8 | ``` 9 | @inproceedings{wu2019fastfcn, 10 | title = {FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation}, 11 | author = {Wu, Huikai and Zhang, Junge and Huang, Kaiqi and Liang, Kongming and Yu Yizhou}, 12 | booktitle = {arXiv preprint arXiv:1903.11816}, 13 | year = {2019} 14 | } 15 | ``` 16 | Contact: Hui-Kai Wu (huikaiwu@icloud.com) 17 | 18 | ## Update 19 | **2020-04-15: Now support inference on a single image !!!** 20 | ```bash 21 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test_single_image --dataset [pcontext|ade20k] \ 22 | --model [encnet|deeplab|psp] --jpu [JPU|JPU_X] \ 23 | --backbone [resnet50|resnet101] [--ms] --resume {MODEL} --input-path {INPUT} --save-path {OUTPUT} 24 | ``` 25 | 26 | **2020-04-15: New joint upsampling module is now available !!!** 27 | - `--jpu [JPU|JPU_X]`: JPU is the original module in the arXiv paper; JPU_X is a pyramid version of JPU. 28 | 29 | **2020-02-20: `FastFCN` can now run on every `OS` with `PyTorch>=1.1.0` and `Python==3.*.*`** 30 | - Replace all `C/C++` extensions with `pure python` extensions. 31 | 32 | ## Version 33 | 1. Original code, producing the results reported in the arXiv paper. [[branch:v1.0.0]](https://github.com/wuhuikai/FastFCN/tree/v1.0.0) 34 | 2. Pure PyTorch code, with `torch.nn.DistributedDataParallel` and `torch.nn.SyncBatchNorm`. [[branch:latest]](https://github.com/wuhuikai/FastFCN/tree/latest) 35 | 3. Pure Python code. [[branch:master]](https://github.com/wuhuikai/FastFCN) 36 | 37 | ## Overview 38 | ### Framework 39 | ![](images/Framework.png) 40 | ### Joint Pyramid Upsampling (JPU) 41 | ![](images/JPU.png) 42 | 43 | ## Install 44 | 1. [PyTorch >= 1.1.0](https://pytorch.org/get-started/locally) (Note: The code is test in the environment with `python=3.6, cuda=9.0`) 45 | 2. Download **FastFCN** 46 | ``` 47 | git clone https://github.com/wuhuikai/FastFCN.git 48 | cd FastFCN 49 | ``` 50 | 3. Install Requirements 51 | ``` 52 | nose 53 | tqdm 54 | scipy 55 | cython 56 | requests 57 | ``` 58 | 59 | ## Train and Test 60 | ### PContext 61 | ``` 62 | python -m scripts.prepare_pcontext 63 | ``` 64 | | Method | Backbone | mIoU | FPS | Model | Scripts | 65 | |:----|:----|:---:|:---:|:---:|:---:| 66 | | EncNet | ResNet-50 | 49.91 | 18.77 | | | 67 | | EncNet+JPU (ours) | ResNet-50 | **51.05** | **37.56** | [GoogleDrive](https://drive.google.com/open?id=1Hy_GWVnTyJBNv4Hejwh5LKa8S_ph27y0) | [bash](experiments/segmentation/scripts/encnet_res50_pcontext.sh) | 68 | | PSP | ResNet-50 | 50.58 | 18.08 | | | 69 | | PSP+JPU (ours) | ResNet-50 | **50.89** | **28.48** | [GoogleDrive](https://drive.google.com/open?id=1fJItp7B7uz6s69fmquqtm18A72EJE5jm) | [bash](experiments/segmentation/scripts/psp_res50_pcontext.sh) | 70 | | DeepLabV3 | ResNet-50 | 49.19 | 15.99 | | | 71 | | DeepLabV3+JPU (ours) | ResNet-50 | **50.07** | **20.67** | [GoogleDrive](https://drive.google.com/open?id=11s20bUkPrZXXmFqYpwC_h1G57CB8g2u9) | [bash](experiments/segmentation/scripts/deeplab_res50_pcontext.sh) | 72 | | EncNet | ResNet-101 | 52.60 (MS) | 10.51 | | | 73 | | EncNet+JPU (ours) | ResNet-101 | **54.03 (MS)** | **32.02** | [GoogleDrive](https://drive.google.com/open?id=1GOIma8cXTKfTa2qSIcDO8EmctyoDzHuV) | [bash](experiments/segmentation/scripts/encnet_res101_pcontext.sh) | 74 | 75 | ### ADE20K 76 | ``` 77 | python -m scripts.prepare_ade20k 78 | ``` 79 | #### Training Set 80 | | Method | Backbone | mIoU (MS) | Model | Scripts | 81 | |:----|:----|:---:|:---:|:---:| 82 | | EncNet | ResNet-50 | 41.11 | | | 83 | | EncNet+JPU (ours) | ResNet-50 | **42.75** | [GoogleDrive](https://drive.google.com/open?id=1EdHDjNDtPmVgSD7RYjeyXy7SSYpTzYyN) | [bash](experiments/segmentation/scripts/encnet_res50_ade20k_train.sh) | 84 | | EncNet | ResNet-101 | 44.65 | | | 85 | | EncNet+JPU (ours) | ResNet-101 | 44.34 | [GoogleDrive](https://drive.google.com/open?id=1WFkbf8OWJmLGnOz5M_IxIZtiHKn2_bEp) | [bash](experiments/segmentation/scripts/encnet_res101_ade20k_train.sh) | 86 | #### Training Set + Val Set 87 | | Method | Backbone | FinalScore (MS) | Model | Scripts | 88 | |:----|:----|:---:|:---:|:---:| 89 | | EncNet+JPU (ours) | ResNet-50 | | [GoogleDrive](https://drive.google.com/open?id=10u8ISncp0NukwQb0K94GsH_AHgT6hgxc) | [bash](experiments/segmentation/scripts/encnet_res50_ade20k_trainval.sh) | 90 | | EncNet | ResNet-101 | 55.67 | | | 91 | | EncNet+JPU (ours) | ResNet-101 | **55.84** | [GoogleDrive](https://drive.google.com/open?id=15gdJeKFy7OXhAr6mQNYvu25LiPwFfQ-Z) | [bash](experiments/segmentation/scripts/encnet_res101_ade20k_trainval.sh) | 92 | 93 | **Note:** EncNet (ResNet-101) is trained with `crop_size=576`, while EncNet+JPU (ResNet-101) is trained with `crop_size=480` for fitting 4 images into a 12G GPU. 94 | 95 | ## Visual Results 96 | |Dataset|Input|GT|EncNet|Ours| 97 | |:----|:---:|:---:|:---:|:---:| 98 | |PContext|![](images/img_2009_001858.jpg)|![](images/gt_2009_001858.png)|![](images/encnet_2009_001858.png)|![](images/ours_2009_001858.png)| 99 | |ADE20K|![](images/img_ADE_val_00001086.jpg)|![](images/gt_ADE_val_00001086.png)|![](images/encnet_ADE_val_00001086.png)|![](images/ours_ADE_val_00001086.png)| 100 | 101 | ### [More Visual Results](http://wuhuikai.me/FastFCNProject/#visual) 102 | 103 | ## Acknowledgement 104 | Code borrows heavily from [PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding). 105 | -------------------------------------------------------------------------------- /encoding/__init__.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """An optimized PyTorch package with CUDA backend.""" 12 | from . import nn, functions, dilated, parallel, utils, models, datasets 13 | -------------------------------------------------------------------------------- /encoding/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | from .coco import COCOSegmentation 3 | from .ade20k import ADE20KSegmentation 4 | from .pascal_voc import VOCSegmentation 5 | from .pascal_aug import VOCAugSegmentation 6 | from .pcontext import ContextSegmentation 7 | from .cityscapes import CitySegmentation 8 | 9 | datasets = { 10 | 'coco': COCOSegmentation, 11 | 'ade20k': ADE20KSegmentation, 12 | 'pascal_voc': VOCSegmentation, 13 | 'pascal_aug': VOCAugSegmentation, 14 | 'pcontext': ContextSegmentation, 15 | 'citys': CitySegmentation, 16 | } 17 | 18 | def get_segmentation_dataset(name, **kwargs): 19 | return datasets[name.lower()](**kwargs) 20 | -------------------------------------------------------------------------------- /encoding/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import os 8 | import numpy as np 9 | 10 | import torch 11 | 12 | from PIL import Image 13 | from .base import BaseDataset 14 | 15 | class ADE20KSegmentation(BaseDataset): 16 | BASE_DIR = 'ADEChallengeData2016' 17 | NUM_CLASS = 150 18 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 19 | mode=None, transform=None, target_transform=None, **kwargs): 20 | super(ADE20KSegmentation, self).__init__( 21 | root, split, mode, transform, target_transform, **kwargs) 22 | # assert exists and prepare dataset automatically 23 | root = os.path.join(root, self.BASE_DIR) 24 | assert os.path.exists(root), "Please setup the dataset using" + \ 25 | "encoding/scripts/prepare_ade20k.py" 26 | self.images, self.masks = _get_ade20k_pairs(root, split) 27 | if split != 'test': 28 | assert (len(self.images) == len(self.masks)) 29 | if len(self.images) == 0: 30 | raise(RuntimeError("Found 0 images in subfolders of: \ 31 | " + root + "\n")) 32 | 33 | def __getitem__(self, index): 34 | img = Image.open(self.images[index]).convert('RGB') 35 | if self.mode == 'test': 36 | if self.transform is not None: 37 | img = self.transform(img) 38 | return img, os.path.basename(self.images[index]) 39 | mask = Image.open(self.masks[index]) 40 | # synchrosized transform 41 | if self.mode == 'train': 42 | img, mask = self._sync_transform(img, mask) 43 | elif self.mode == 'val': 44 | img, mask = self._val_sync_transform(img, mask) 45 | else: 46 | assert self.mode == 'testval' 47 | mask = self._mask_transform(mask) 48 | # general resize, normalize and toTensor 49 | if self.transform is not None: 50 | img = self.transform(img) 51 | if self.target_transform is not None: 52 | mask = self.target_transform(mask) 53 | return img, mask 54 | 55 | def _mask_transform(self, mask): 56 | target = np.array(mask).astype('int64') - 1 57 | return torch.from_numpy(target) 58 | 59 | def __len__(self): 60 | return len(self.images) 61 | 62 | @property 63 | def pred_offset(self): 64 | return 1 65 | 66 | 67 | def _get_ade20k_pairs(folder, split='train'): 68 | def get_path_pairs(img_folder, mask_folder): 69 | img_paths = [] 70 | mask_paths = [] 71 | for filename in os.listdir(img_folder): 72 | basename, _ = os.path.splitext(filename) 73 | if filename.endswith(".jpg"): 74 | imgpath = os.path.join(img_folder, filename) 75 | maskname = basename + '.png' 76 | maskpath = os.path.join(mask_folder, maskname) 77 | if os.path.isfile(maskpath): 78 | img_paths.append(imgpath) 79 | mask_paths.append(maskpath) 80 | else: 81 | print('cannot find the mask:', maskpath) 82 | return img_paths, mask_paths 83 | 84 | if split == 'train': 85 | img_folder = os.path.join(folder, 'images/training') 86 | mask_folder = os.path.join(folder, 'annotations/training') 87 | img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) 88 | print('len(img_paths):', len(img_paths)) 89 | assert len(img_paths) == 20210 90 | elif split == 'val': 91 | img_folder = os.path.join(folder, 'images/validation') 92 | mask_folder = os.path.join(folder, 'annotations/validation') 93 | img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) 94 | assert len(img_paths) == 2000 95 | elif split == 'test': 96 | folder = os.path.join(folder, '../release_test') 97 | with open(os.path.join(folder, 'list.txt')) as f: 98 | img_paths = [os.path.join(folder, 'testing', line.strip()) for line in f] 99 | assert len(img_paths) == 3352 100 | return img_paths, None 101 | else: 102 | assert split == 'trainval' 103 | train_img_folder = os.path.join(folder, 'images/training') 104 | train_mask_folder = os.path.join(folder, 'annotations/training') 105 | val_img_folder = os.path.join(folder, 'images/validation') 106 | val_mask_folder = os.path.join(folder, 'annotations/validation') 107 | train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder) 108 | val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder) 109 | img_paths = train_img_paths + val_img_paths 110 | mask_paths = train_mask_paths + val_mask_paths 111 | assert len(img_paths) == 22210 112 | return img_paths, mask_paths 113 | -------------------------------------------------------------------------------- /encoding/datasets/base.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import random 8 | import numpy as np 9 | 10 | import torch 11 | import torch.utils.data as data 12 | 13 | from PIL import Image, ImageOps, ImageFilter 14 | 15 | __all__ = ['BaseDataset', 'test_batchify_fn'] 16 | 17 | class BaseDataset(data.Dataset): 18 | def __init__(self, root, split, mode=None, transform=None, 19 | target_transform=None, base_size=520, crop_size=480): 20 | self.root = root 21 | self.transform = transform 22 | self.target_transform = target_transform 23 | self.split = split 24 | self.mode = mode if mode is not None else split 25 | self.base_size = base_size 26 | self.crop_size = crop_size 27 | if self.mode == 'train': 28 | print('BaseDataset: base_size {}, crop_size {}'. \ 29 | format(base_size, crop_size)) 30 | 31 | def __getitem__(self, index): 32 | raise NotImplemented 33 | 34 | @property 35 | def num_class(self): 36 | return self.NUM_CLASS 37 | 38 | @property 39 | def pred_offset(self): 40 | raise NotImplemented 41 | 42 | def make_pred(self, x): 43 | return x + self.pred_offset 44 | 45 | def _val_sync_transform(self, img, mask): 46 | outsize = self.crop_size 47 | short_size = outsize 48 | w, h = img.size 49 | if w > h: 50 | oh = short_size 51 | ow = int(1.0 * w * oh / h) 52 | else: 53 | ow = short_size 54 | oh = int(1.0 * h * ow / w) 55 | img = img.resize((ow, oh), Image.BILINEAR) 56 | mask = mask.resize((ow, oh), Image.NEAREST) 57 | # center crop 58 | w, h = img.size 59 | x1 = int(round((w - outsize) / 2.)) 60 | y1 = int(round((h - outsize) / 2.)) 61 | img = img.crop((x1, y1, x1+outsize, y1+outsize)) 62 | mask = mask.crop((x1, y1, x1+outsize, y1+outsize)) 63 | # final transform 64 | return img, self._mask_transform(mask) 65 | 66 | def _sync_transform(self, img, mask): 67 | # random mirror 68 | if random.random() < 0.5: 69 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 70 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT) 71 | crop_size = self.crop_size 72 | # random scale (short edge from 480 to 720) 73 | short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0)) 74 | w, h = img.size 75 | if h > w: 76 | ow = short_size 77 | oh = int(1.0 * h * ow / w) 78 | else: 79 | oh = short_size 80 | ow = int(1.0 * w * oh / h) 81 | img = img.resize((ow, oh), Image.BILINEAR) 82 | mask = mask.resize((ow, oh), Image.NEAREST) 83 | # pad crop 84 | if short_size < crop_size: 85 | padh = crop_size - oh if oh < crop_size else 0 86 | padw = crop_size - ow if ow < crop_size else 0 87 | img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) 88 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) 89 | # random crop crop_size 90 | w, h = img.size 91 | x1 = random.randint(0, w - crop_size) 92 | y1 = random.randint(0, h - crop_size) 93 | img = img.crop((x1, y1, x1+crop_size, y1+crop_size)) 94 | mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size)) 95 | # gaussian blur as in PSP 96 | if random.random() < 0.5: 97 | img = img.filter(ImageFilter.GaussianBlur( 98 | radius=random.random())) 99 | # final transform 100 | return img, self._mask_transform(mask) 101 | 102 | def _mask_transform(self, mask): 103 | return torch.from_numpy(np.array(mask)).long() 104 | 105 | 106 | def test_batchify_fn(data): 107 | error_msg = "batch must contain tensors, tuples or lists; found {}" 108 | if isinstance(data[0], (str, torch.Tensor)): 109 | return list(data) 110 | elif isinstance(data[0], (tuple, list)): 111 | data = zip(*data) 112 | return [test_batchify_fn(i) for i in data] 113 | raise TypeError((error_msg.format(type(data[0])))) 114 | -------------------------------------------------------------------------------- /encoding/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2018 5 | ########################################################################### 6 | 7 | import os 8 | import random 9 | import numpy as np 10 | 11 | import torch 12 | 13 | from tqdm import tqdm 14 | from PIL import Image, ImageOps, ImageFilter 15 | 16 | from .base import BaseDataset 17 | 18 | class CitySegmentation(BaseDataset): 19 | NUM_CLASS = 19 20 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 21 | mode=None, transform=None, target_transform=None, **kwargs): 22 | super(CitySegmentation, self).__init__( 23 | root, split, mode, transform, target_transform, **kwargs) 24 | self.images, self.mask_paths = get_city_pairs(self.root, self.split) 25 | assert (len(self.images) == len(self.mask_paths)) 26 | if len(self.images) == 0: 27 | raise RuntimeError("Found 0 images in subfolders of: \ 28 | " + self.root + "\n") 29 | self._indices = np.array(range(-1, 19)) 30 | self._classes = np.array([0, 7, 8, 11, 12, 13, 17, 19, 20, 21, 22, 31 | 23, 24, 25, 26, 27, 28, 31, 32, 33]) 32 | self._key = np.array([-1, -1, -1, -1, -1, -1, 33 | -1, -1, 0, 1, -1, -1, 34 | 2, 3, 4, -1, -1, -1, 35 | 5, -1, 6, 7, 8, 9, 36 | 10, 11, 12, 13, 14, 15, 37 | -1, -1, 16, 17, 18]) 38 | self._mapping = np.array(range(-1, len(self._key)-1)).astype('int32') 39 | 40 | def _class_to_index(self, mask): 41 | # assert the values 42 | values = np.unique(mask) 43 | for i in range(len(values)): 44 | assert(values[i] in self._mapping) 45 | index = np.digitize(mask.ravel(), self._mapping, right=True) 46 | return self._key[index].reshape(mask.shape) 47 | 48 | def _preprocess(self, mask_file): 49 | if os.path.exists(mask_file): 50 | masks = torch.load(mask_file) 51 | return masks 52 | masks = [] 53 | print("Preprocessing mask, this will take a while." + \ 54 | "But don't worry, it only run once for each split.") 55 | tbar = tqdm(self.mask_paths) 56 | for fname in tbar: 57 | tbar.set_description("Preprocessing masks {}".format(fname)) 58 | mask = Image.fromarray(self._class_to_index( 59 | np.array(Image.open(fname))).astype('int8')) 60 | masks.append(mask) 61 | torch.save(masks, mask_file) 62 | return masks 63 | 64 | def __getitem__(self, index): 65 | img = Image.open(self.images[index]).convert('RGB') 66 | if self.mode == 'test': 67 | if self.transform is not None: 68 | img = self.transform(img) 69 | return img, os.path.basename(self.images[index]) 70 | mask = Image.open(self.mask_paths[index]) 71 | # synchrosized transform 72 | if self.mode == 'train': 73 | img, mask = self._sync_transform(img, mask) 74 | elif self.mode == 'val': 75 | img, mask = self._val_sync_transform(img, mask) 76 | else: 77 | assert self.mode == 'testval' 78 | mask = self._mask_transform(mask) 79 | # general resize, normalize and toTensor 80 | if self.transform is not None: 81 | img = self.transform(img) 82 | if self.target_transform is not None: 83 | mask = self.target_transform(mask) 84 | return img, mask 85 | 86 | def _sync_transform(self, img, mask): 87 | # random mirror 88 | if random.random() < 0.5: 89 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 90 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT) 91 | crop_size = self.crop_size 92 | # random scale (short edge from 480 to 720) 93 | short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0)) 94 | w, h = img.size 95 | if h > w: 96 | ow = short_size 97 | oh = int(1.0 * h * ow / w) 98 | else: 99 | oh = short_size 100 | ow = int(1.0 * w * oh / h) 101 | img = img.resize((ow, oh), Image.BILINEAR) 102 | mask = mask.resize((ow, oh), Image.NEAREST) 103 | # random rotate -10~10, mask using NN rotate 104 | deg = random.uniform(-10, 10) 105 | img = img.rotate(deg, resample=Image.BILINEAR) 106 | mask = mask.rotate(deg, resample=Image.NEAREST) 107 | # pad crop 108 | if short_size < crop_size: 109 | padh = crop_size - oh if oh < crop_size else 0 110 | padw = crop_size - ow if ow < crop_size else 0 111 | img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) 112 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) 113 | # random crop crop_size 114 | w, h = img.size 115 | x1 = random.randint(0, w - crop_size) 116 | y1 = random.randint(0, h - crop_size) 117 | img = img.crop((x1, y1, x1+crop_size, y1+crop_size)) 118 | mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size)) 119 | # gaussian blur as in PSP 120 | if random.random() < 0.5: 121 | img = img.filter(ImageFilter.GaussianBlur( 122 | radius=random.random())) 123 | # final transform 124 | return img, self._mask_transform(mask) 125 | 126 | def _mask_transform(self, mask): 127 | target = self._class_to_index(np.array(mask).astype('int32')) 128 | return torch.from_numpy(target).long() 129 | 130 | def __len__(self): 131 | return len(self.images) 132 | 133 | def make_pred(self, mask): 134 | values = np.unique(mask) 135 | for i in range(len(values)): 136 | assert(values[i] in self._indices) 137 | index = np.digitize(mask.ravel(), self._indices, right=True) 138 | return self._classes[index].reshape(mask.shape) 139 | 140 | 141 | def get_city_pairs(folder, split='train'): 142 | def get_path_pairs(img_folder, mask_folder): 143 | img_paths = [] 144 | mask_paths = [] 145 | for root, directories, files in os.walk(img_folder): 146 | for filename in files: 147 | if filename.endswith(".png"): 148 | imgpath = os.path.join(root, filename) 149 | foldername = os.path.basename(os.path.dirname(imgpath)) 150 | maskname = filename.replace('leftImg8bit','gtFine_labelIds') 151 | maskpath = os.path.join(mask_folder, foldername, maskname) 152 | if os.path.isfile(imgpath) and os.path.isfile(maskpath): 153 | img_paths.append(imgpath) 154 | mask_paths.append(maskpath) 155 | else: 156 | print('cannot find the mask or image:', imgpath, maskpath) 157 | print('Found {} images in the folder {}'.format(len(img_paths), img_folder)) 158 | return img_paths, mask_paths 159 | 160 | if split == 'train' or split == 'val' or split == 'test': 161 | img_folder = os.path.join(folder, 'leftImg8bit/' + split) 162 | mask_folder = os.path.join(folder, 'gtFine/'+ split) 163 | img_paths, mask_paths = get_path_pairs(img_folder, mask_folder) 164 | return img_paths, mask_paths 165 | else: 166 | assert split == 'trainval' 167 | print('trainval set') 168 | train_img_folder = os.path.join(folder, 'leftImg8bit/train') 169 | train_mask_folder = os.path.join(folder, 'gtFine/train') 170 | val_img_folder = os.path.join(folder, 'leftImg8bit/val') 171 | val_mask_folder = os.path.join(folder, 'gtFine/val') 172 | train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder) 173 | val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder) 174 | img_paths = train_img_paths + val_img_paths 175 | mask_paths = train_mask_paths + val_mask_paths 176 | return img_paths, mask_paths 177 | -------------------------------------------------------------------------------- /encoding/datasets/coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | import torch 5 | 6 | from PIL import Image 7 | from tqdm import trange 8 | 9 | from .base import BaseDataset 10 | 11 | class COCOSegmentation(BaseDataset): 12 | NUM_CLASS = 21 13 | CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 14 | 1, 64, 20, 63, 7, 72] 15 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 16 | mode=None, transform=None, target_transform=None, **kwargs): 17 | super(COCOSegmentation, self).__init__( 18 | root, split, mode, transform, target_transform, **kwargs) 19 | from pycocotools.coco import COCO 20 | from pycocotools import mask 21 | if split == 'train': 22 | print('train set') 23 | ann_file = os.path.join(root, 'annotations/instances_train2017.json') 24 | ids_file = os.path.join(root, 'annotations/train_ids.pth') 25 | self.root = os.path.join(root, 'train2017') 26 | else: 27 | print('val set') 28 | ann_file = os.path.join(root, 'annotations/instances_val2017.json') 29 | ids_file = os.path.join(root, 'annotations/val_ids.pth') 30 | self.root = os.path.join(root, 'val2017') 31 | self.coco = COCO(ann_file) 32 | self.coco_mask = mask 33 | if os.path.exists(ids_file): 34 | self.ids = torch.load(ids_file) 35 | else: 36 | ids = list(self.coco.imgs.keys()) 37 | self.ids = self._preprocess(ids, ids_file) 38 | self.transform = transform 39 | self.target_transform = target_transform 40 | 41 | def __getitem__(self, index): 42 | coco = self.coco 43 | img_id = self.ids[index] 44 | img_metadata = coco.loadImgs(img_id)[0] 45 | path = img_metadata['file_name'] 46 | img = Image.open(os.path.join(self.root, path)).convert('RGB') 47 | cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id)) 48 | mask = Image.fromarray(self._gen_seg_mask( 49 | cocotarget, img_metadata['height'], img_metadata['width'])) 50 | # synchrosized transform 51 | if self.mode == 'train': 52 | img, mask = self._sync_transform(img, mask) 53 | elif self.mode == 'val': 54 | img, mask = self._val_sync_transform(img, mask) 55 | else: 56 | assert self.mode == 'testval' 57 | mask = self._mask_transform(mask) 58 | # general resize, normalize and toTensor 59 | if self.transform is not None: 60 | img = self.transform(img) 61 | if self.target_transform is not None: 62 | mask = self.target_transform(mask) 63 | return img, mask 64 | 65 | def __len__(self): 66 | return len(self.ids) 67 | 68 | def _gen_seg_mask(self, target, h, w): 69 | mask = np.zeros((h, w), dtype=np.uint8) 70 | coco_mask = self.coco_mask 71 | for instance in target: 72 | rle = coco_mask.frPyObjects(instance['segmentation'], h, w) 73 | m = coco_mask.decode(rle) 74 | cat = instance['category_id'] 75 | if cat in self.CAT_LIST: 76 | c = self.CAT_LIST.index(cat) 77 | else: 78 | continue 79 | if len(m.shape) < 3: 80 | mask[:, :] += (mask == 0) * (m * c) 81 | else: 82 | mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8) 83 | return mask 84 | 85 | def _preprocess(self, ids, ids_file): 86 | print("Preprocessing mask, this will take a while." + \ 87 | "But don't worry, it only run once for each split.") 88 | tbar = trange(len(ids)) 89 | new_ids = [] 90 | for i in tbar: 91 | img_id = ids[i] 92 | cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id)) 93 | img_metadata = self.coco.loadImgs(img_id)[0] 94 | mask = self._gen_seg_mask(cocotarget, img_metadata['height'], 95 | img_metadata['width']) 96 | # more than 1k pixels 97 | if (mask > 0).sum() > 1000: 98 | new_ids.append(img_id) 99 | tbar.set_description('Doing: {}/{}, got {} qualified images'.\ 100 | format(i, len(ids), len(new_ids))) 101 | print('Found number of qualified images: ', len(new_ids)) 102 | torch.save(new_ids, ids_file) 103 | return new_ids 104 | """ 105 | NUM_CHANNEL = 91 106 | [] background 107 | [5] airplane 108 | [2] bicycle 109 | [16] bird 110 | [9] boat 111 | [44] bottle 112 | [6] bus 113 | [3] car 114 | [17] cat 115 | [62] chair 116 | [21] cow 117 | [67] dining table 118 | [18] dog 119 | [19] horse 120 | [4] motorcycle 121 | [1] person 122 | [64] potted plant 123 | [20] sheep 124 | [63] couch 125 | [7] train 126 | [72] tv 127 | """ 128 | -------------------------------------------------------------------------------- /encoding/datasets/pascal_aug.py: -------------------------------------------------------------------------------- 1 | import os 2 | import scipy.io 3 | 4 | from PIL import Image 5 | 6 | from .base import BaseDataset 7 | 8 | class VOCAugSegmentation(BaseDataset): 9 | voc = [ 10 | 'background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', 11 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 12 | 'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train', 13 | 'tv' 14 | ] 15 | NUM_CLASS = 21 16 | TRAIN_BASE_DIR = 'VOCaug/dataset/' 17 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 18 | mode=None, transform=None, target_transform=None, **kwargs): 19 | super(VOCAugSegmentation, self).__init__(root, split, mode, transform, 20 | target_transform, **kwargs) 21 | # train/val/test splits are pre-cut 22 | _voc_root = os.path.join(root, self.TRAIN_BASE_DIR) 23 | _mask_dir = os.path.join(_voc_root, 'cls') 24 | _image_dir = os.path.join(_voc_root, 'img') 25 | if self.split == 'train': 26 | _split_f = os.path.join(_voc_root, 'trainval.txt') 27 | elif self.split == 'val': 28 | _split_f = os.path.join(_voc_root, 'val.txt') 29 | else: 30 | raise RuntimeError('Unknown dataset split.') 31 | self.images = [] 32 | self.masks = [] 33 | with open(os.path.join(_split_f), "r") as lines: 34 | for line in lines: 35 | _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg") 36 | assert os.path.isfile(_image) 37 | self.images.append(_image) 38 | if self.mode != 'test': 39 | _mask = os.path.join(_mask_dir, line.rstrip('\n')+".mat") 40 | assert os.path.isfile(_mask) 41 | self.masks.append(_mask) 42 | 43 | assert (len(self.images) == len(self.masks)) 44 | 45 | def __getitem__(self, index): 46 | _img = Image.open(self.images[index]).convert('RGB') 47 | if self.mode == 'test': 48 | if self.transform is not None: 49 | _img = self.transform(_img) 50 | return _img, os.path.basename(self.images[index]) 51 | _target = self._load_mat(self.masks[index]) 52 | # synchrosized transform 53 | if self.mode == 'train': 54 | _img, _target = self._sync_transform( _img, _target) 55 | elif self.mode == 'val': 56 | _img, _target = self._val_sync_transform( _img, _target) 57 | # general resize, normalize and toTensor 58 | if self.transform is not None: 59 | _img = self.transform(_img) 60 | if self.target_transform is not None: 61 | _target = self.target_transform(_target) 62 | return _img, _target 63 | 64 | def _load_mat(self, filename): 65 | mat = scipy.io.loadmat(filename, mat_dtype=True, squeeze_me=True, 66 | struct_as_record=False) 67 | mask = mat['GTcls'].Segmentation 68 | return Image.fromarray(mask) 69 | 70 | def __len__(self): 71 | return len(self.images) 72 | -------------------------------------------------------------------------------- /encoding/datasets/pascal_voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | import torch 5 | 6 | from PIL import Image 7 | from tqdm import tqdm 8 | 9 | from .base import BaseDataset 10 | 11 | class VOCSegmentation(BaseDataset): 12 | CLASSES = [ 13 | 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 14 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 15 | 'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train', 16 | 'tv/monitor', 'ambigious' 17 | ] 18 | NUM_CLASS = 21 19 | BASE_DIR = 'VOCdevkit/VOC2012' 20 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 21 | mode=None, transform=None, target_transform=None, **kwargs): 22 | super(VOCSegmentation, self).__init__(root, split, mode, transform, 23 | target_transform, **kwargs) 24 | _voc_root = os.path.join(self.root, self.BASE_DIR) 25 | _mask_dir = os.path.join(_voc_root, 'SegmentationClass') 26 | _image_dir = os.path.join(_voc_root, 'JPEGImages') 27 | # train/val/test splits are pre-cut 28 | _splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation') 29 | if self.split == 'train': 30 | _split_f = os.path.join(_splits_dir, 'trainval.txt') 31 | elif self.split == 'val': 32 | _split_f = os.path.join(_splits_dir, 'val.txt') 33 | elif self.split == 'test': 34 | _split_f = os.path.join(_splits_dir, 'test.txt') 35 | else: 36 | raise RuntimeError('Unknown dataset split.') 37 | self.images = [] 38 | self.masks = [] 39 | with open(os.path.join(_split_f), "r") as lines: 40 | for line in tqdm(lines): 41 | _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg") 42 | assert os.path.isfile(_image) 43 | self.images.append(_image) 44 | if self.mode != 'test': 45 | _mask = os.path.join(_mask_dir, line.rstrip('\n')+".png") 46 | assert os.path.isfile(_mask) 47 | self.masks.append(_mask) 48 | 49 | if self.mode != 'test': 50 | assert (len(self.images) == len(self.masks)) 51 | 52 | def __getitem__(self, index): 53 | img = Image.open(self.images[index]).convert('RGB') 54 | if self.mode == 'test': 55 | if self.transform is not None: 56 | img = self.transform(img) 57 | return img, os.path.basename(self.images[index]) 58 | target = Image.open(self.masks[index]) 59 | # synchrosized transform 60 | if self.mode == 'train': 61 | img, target = self._sync_transform( img, target) 62 | elif self.mode == 'val': 63 | img, target = self._val_sync_transform( img, target) 64 | else: 65 | assert self.mode == 'testval' 66 | target = self._mask_transform(target) 67 | # general resize, normalize and toTensor 68 | if self.transform is not None: 69 | img = self.transform(img) 70 | if self.target_transform is not None: 71 | target = self.target_transform(target) 72 | return img, target 73 | 74 | def _mask_transform(self, mask): 75 | target = np.array(mask).astype('int32') 76 | target[target == 255] = -1 77 | return torch.from_numpy(target).long() 78 | 79 | def __len__(self): 80 | return len(self.images) 81 | 82 | @property 83 | def pred_offset(self): 84 | return 0 85 | -------------------------------------------------------------------------------- /encoding/datasets/pcontext.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | 8 | import os 9 | import numpy as np 10 | 11 | import torch 12 | 13 | from PIL import Image 14 | from tqdm import trange 15 | 16 | from .base import BaseDataset 17 | 18 | class ContextSegmentation(BaseDataset): 19 | BASE_DIR = 'VOCdevkit/VOC2010' 20 | NUM_CLASS = 59 21 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 22 | mode=None, transform=None, target_transform=None, **kwargs): 23 | super(ContextSegmentation, self).__init__( 24 | root, split, mode, transform, target_transform, **kwargs) 25 | from detail import Detail 26 | #from detail import mask 27 | root = os.path.join(root, self.BASE_DIR) 28 | annFile = os.path.join(root, 'trainval_merged.json') 29 | imgDir = os.path.join(root, 'JPEGImages') 30 | # training mode 31 | self.detail = Detail(annFile, imgDir, split) 32 | self.transform = transform 33 | self.target_transform = target_transform 34 | self.ids = self.detail.getImgs() 35 | # generate masks 36 | self._mapping = np.sort(np.array([ 37 | 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 38 | 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 39 | 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 40 | 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 41 | 98, 187, 104, 105, 366, 189, 368, 113, 115])) 42 | self._key = np.array(range(len(self._mapping))).astype('uint8') 43 | mask_file = os.path.join(root, self.split+'.pth') 44 | print('mask_file:', mask_file) 45 | if os.path.exists(mask_file): 46 | self.masks = torch.load(mask_file) 47 | else: 48 | self.masks = self._preprocess(mask_file) 49 | 50 | def _class_to_index(self, mask): 51 | # assert the values 52 | values = np.unique(mask) 53 | for i in range(len(values)): 54 | assert(values[i] in self._mapping) 55 | index = np.digitize(mask.ravel(), self._mapping, right=True) 56 | return self._key[index].reshape(mask.shape) 57 | 58 | def _preprocess(self, mask_file): 59 | masks = {} 60 | tbar = trange(len(self.ids)) 61 | print("Preprocessing mask, this will take a while." + \ 62 | "But don't worry, it only run once for each split.") 63 | for i in tbar: 64 | img_id = self.ids[i] 65 | mask = Image.fromarray(self._class_to_index( 66 | self.detail.getMask(img_id))) 67 | masks[img_id['image_id']] = mask 68 | tbar.set_description("Preprocessing masks {}".format(img_id['image_id'])) 69 | torch.save(masks, mask_file) 70 | return masks 71 | 72 | def __getitem__(self, index): 73 | img_id = self.ids[index] 74 | path = img_id['file_name'] 75 | iid = img_id['image_id'] 76 | img = Image.open(os.path.join(self.detail.img_folder, path)).convert('RGB') 77 | if self.mode == 'test': 78 | if self.transform is not None: 79 | img = self.transform(img) 80 | return img, os.path.basename(path) 81 | # convert mask to 60 categories 82 | mask = self.masks[iid] 83 | # synchrosized transform 84 | if self.mode == 'train': 85 | img, mask = self._sync_transform(img, mask) 86 | elif self.mode == 'val': 87 | img, mask = self._val_sync_transform(img, mask) 88 | else: 89 | assert self.mode == 'testval' 90 | mask = self._mask_transform(mask) 91 | # general resize, normalize and toTensor 92 | if self.transform is not None: 93 | img = self.transform(img) 94 | if self.target_transform is not None: 95 | mask = self.target_transform(mask) 96 | return img, mask 97 | 98 | def _mask_transform(self, mask): 99 | target = np.array(mask).astype('int32') - 1 100 | return torch.from_numpy(target).long() 101 | 102 | def __len__(self): 103 | return len(self.ids) 104 | 105 | @property 106 | def pred_offset(self): 107 | return 1 108 | -------------------------------------------------------------------------------- /encoding/dilated/__init__.py: -------------------------------------------------------------------------------- 1 | """Dilated ResNet and DenseNet""" 2 | from .resnet import * 3 | -------------------------------------------------------------------------------- /encoding/dilated/resnet.py: -------------------------------------------------------------------------------- 1 | """Dilated ResNet""" 2 | import math 3 | import torch 4 | import torch.nn as nn 5 | import torch.utils.model_zoo as model_zoo 6 | 7 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 8 | 'resnet152', 'BasicBlock', 'Bottleneck'] 9 | 10 | model_urls = { 11 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 12 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 13 | } 14 | 15 | 16 | def conv3x3(in_planes, out_planes, stride=1): 17 | "3x3 convolution with padding" 18 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 19 | padding=1, bias=False) 20 | 21 | 22 | class BasicBlock(nn.Module): 23 | """ResNet BasicBlock 24 | """ 25 | expansion = 1 26 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, previous_dilation=1, 27 | norm_layer=None): 28 | super(BasicBlock, self).__init__() 29 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, 30 | padding=dilation, dilation=dilation, bias=False) 31 | self.bn1 = norm_layer(planes) 32 | self.relu = nn.ReLU(inplace=True) 33 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, 34 | padding=previous_dilation, dilation=previous_dilation, bias=False) 35 | self.bn2 = norm_layer(planes) 36 | self.downsample = downsample 37 | self.stride = stride 38 | 39 | def forward(self, x): 40 | residual = x 41 | 42 | out = self.conv1(x) 43 | out = self.bn1(out) 44 | out = self.relu(out) 45 | 46 | out = self.conv2(out) 47 | out = self.bn2(out) 48 | 49 | if self.downsample is not None: 50 | residual = self.downsample(x) 51 | 52 | out += residual 53 | out = self.relu(out) 54 | 55 | return out 56 | 57 | 58 | class Bottleneck(nn.Module): 59 | """ResNet Bottleneck 60 | """ 61 | # pylint: disable=unused-argument 62 | expansion = 4 63 | def __init__(self, inplanes, planes, stride=1, dilation=1, 64 | downsample=None, previous_dilation=1, norm_layer=None): 65 | super(Bottleneck, self).__init__() 66 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 67 | self.bn1 = norm_layer(planes) 68 | self.conv2 = nn.Conv2d( 69 | planes, planes, kernel_size=3, stride=stride, 70 | padding=dilation, dilation=dilation, bias=False) 71 | self.bn2 = norm_layer(planes) 72 | self.conv3 = nn.Conv2d( 73 | planes, planes * 4, kernel_size=1, bias=False) 74 | self.bn3 = norm_layer(planes * 4) 75 | self.relu = nn.ReLU(inplace=True) 76 | self.downsample = downsample 77 | self.dilation = dilation 78 | self.stride = stride 79 | 80 | def _sum_each(self, x, y): 81 | assert(len(x) == len(y)) 82 | z = [] 83 | for i in range(len(x)): 84 | z.append(x[i]+y[i]) 85 | return z 86 | 87 | def forward(self, x): 88 | residual = x 89 | 90 | out = self.conv1(x) 91 | out = self.bn1(out) 92 | out = self.relu(out) 93 | 94 | out = self.conv2(out) 95 | out = self.bn2(out) 96 | out = self.relu(out) 97 | 98 | out = self.conv3(out) 99 | out = self.bn3(out) 100 | 101 | if self.downsample is not None: 102 | residual = self.downsample(x) 103 | 104 | out += residual 105 | out = self.relu(out) 106 | 107 | return out 108 | 109 | 110 | class ResNet(nn.Module): 111 | """Dilated Pre-trained ResNet Model, which preduces the stride of 8 featuremaps at conv5. 112 | 113 | Parameters 114 | ---------- 115 | block : Block 116 | Class for the residual block. Options are BasicBlockV1, BottleneckV1. 117 | layers : list of int 118 | Numbers of layers in each block 119 | classes : int, default 1000 120 | Number of classification classes. 121 | dilated : bool, default False 122 | Applying dilation strategy to pretrained ResNet yielding a stride-8 model, 123 | typically used in Semantic Segmentation. 124 | norm_layer : object 125 | Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`; 126 | for Synchronized Cross-GPU BachNormalization). 127 | 128 | Reference: 129 | 130 | - He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016. 131 | 132 | - Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions." 133 | """ 134 | # pylint: disable=unused-variable 135 | def __init__(self, block, layers, num_classes=1000, dilated=True, 136 | deep_base=True, norm_layer=nn.BatchNorm2d, output_size=8): 137 | self.inplanes = 128 if deep_base else 64 138 | super(ResNet, self).__init__() 139 | if deep_base: 140 | self.conv1 = nn.Sequential( 141 | nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False), 142 | norm_layer(64), 143 | nn.ReLU(inplace=True), 144 | nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False), 145 | norm_layer(64), 146 | nn.ReLU(inplace=True), 147 | nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False), 148 | ) 149 | else: 150 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 151 | bias=False) 152 | self.bn1 = norm_layer(self.inplanes) 153 | self.relu = nn.ReLU(inplace=True) 154 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 155 | self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer) 156 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer) 157 | 158 | dilation_rate = 2 159 | if dilated and output_size <= 8: 160 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, 161 | dilation=dilation_rate, norm_layer=norm_layer) 162 | dilation_rate *= 2 163 | else: 164 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 165 | norm_layer=norm_layer) 166 | 167 | if dilated and output_size <= 16: 168 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, 169 | dilation=dilation_rate, norm_layer=norm_layer) 170 | else: 171 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 172 | norm_layer=norm_layer) 173 | 174 | self.avgpool = nn.AvgPool2d(7, stride=1) 175 | self.fc = nn.Linear(512 * block.expansion, num_classes) 176 | 177 | for m in self.modules(): 178 | if isinstance(m, nn.Conv2d): 179 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 180 | m.weight.data.normal_(0, math.sqrt(2. / n)) 181 | elif isinstance(m, norm_layer): 182 | m.weight.data.fill_(1) 183 | m.bias.data.zero_() 184 | 185 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None): 186 | downsample = None 187 | if stride != 1 or self.inplanes != planes * block.expansion: 188 | downsample = nn.Sequential( 189 | nn.Conv2d(self.inplanes, planes * block.expansion, 190 | kernel_size=1, stride=stride, bias=False), 191 | norm_layer(planes * block.expansion), 192 | ) 193 | 194 | layers = [] 195 | if dilation == 1 or dilation == 2: 196 | layers.append(block(self.inplanes, planes, stride, dilation=1, 197 | downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer)) 198 | elif dilation == 4: 199 | layers.append(block(self.inplanes, planes, stride, dilation=2, 200 | downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer)) 201 | else: 202 | raise RuntimeError("=> unknown dilation size: {}".format(dilation)) 203 | 204 | self.inplanes = planes * block.expansion 205 | for i in range(1, blocks): 206 | layers.append(block(self.inplanes, planes, dilation=dilation, previous_dilation=dilation, 207 | norm_layer=norm_layer)) 208 | 209 | return nn.Sequential(*layers) 210 | 211 | def forward(self, x): 212 | x = self.conv1(x) 213 | x = self.bn1(x) 214 | x = self.relu(x) 215 | x = self.maxpool(x) 216 | 217 | x = self.layer1(x) 218 | x = self.layer2(x) 219 | x = self.layer3(x) 220 | x = self.layer4(x) 221 | 222 | x = self.avgpool(x) 223 | x = x.view(x.size(0), -1) 224 | x = self.fc(x) 225 | 226 | return x 227 | 228 | 229 | def resnet18(pretrained=False, **kwargs): 230 | """Constructs a ResNet-18 model. 231 | 232 | Args: 233 | pretrained (bool): If True, returns a model pre-trained on ImageNet 234 | """ 235 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 236 | if pretrained: 237 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 238 | return model 239 | 240 | 241 | def resnet34(pretrained=False, **kwargs): 242 | """Constructs a ResNet-34 model. 243 | 244 | Args: 245 | pretrained (bool): If True, returns a model pre-trained on ImageNet 246 | """ 247 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 248 | if pretrained: 249 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 250 | return model 251 | 252 | 253 | def resnet50(pretrained=False, root='~/.encoding/models', **kwargs): 254 | """Constructs a ResNet-50 model. 255 | 256 | Args: 257 | pretrained (bool): If True, returns a model pre-trained on ImageNet 258 | """ 259 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 260 | if pretrained: 261 | from ..models.model_store import get_model_file 262 | model.load_state_dict(torch.load( 263 | get_model_file('resnet50', root=root)), strict=False) 264 | return model 265 | 266 | 267 | def resnet101(pretrained=False, root='~/.encoding/models', **kwargs): 268 | """Constructs a ResNet-101 model. 269 | 270 | Args: 271 | pretrained (bool): If True, returns a model pre-trained on ImageNet 272 | """ 273 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 274 | if pretrained: 275 | from ..models.model_store import get_model_file 276 | model.load_state_dict(torch.load( 277 | get_model_file('resnet101', root=root)), strict=False) 278 | return model 279 | 280 | 281 | def resnet152(pretrained=False, root='~/.encoding/models', **kwargs): 282 | """Constructs a ResNet-152 model. 283 | 284 | Args: 285 | pretrained (bool): If True, returns a model pre-trained on ImageNet 286 | """ 287 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 288 | if pretrained: 289 | from ..models.model_store import get_model_file 290 | model.load_state_dict(torch.load( 291 | get_model_file('resnet152', root=root)), strict=False) 292 | return model 293 | -------------------------------------------------------------------------------- /encoding/functions/__init__.py: -------------------------------------------------------------------------------- 1 | """Encoding Autograd Fuctions""" 2 | from .syncbn import * 3 | from .encoding import * 4 | -------------------------------------------------------------------------------- /encoding/functions/encoding.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2018 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | """Functions for Encoding Layer""" 11 | import torch 12 | 13 | from torch.autograd import Function 14 | 15 | __all__ = ['aggregate', 'scaled_l2'] 16 | 17 | class Aggregate(Function): 18 | @staticmethod 19 | def forward(ctx, A, X, C): 20 | ctx.save_for_backward(A, X, C) 21 | 22 | return (X.unsqueeze(2).expand(X.size(0), X.size(1), C.size(0), C.size(1)) - 23 | C.unsqueeze(0).unsqueeze(0)).mul_(A.unsqueeze(3)).sum(1) 24 | 25 | @staticmethod 26 | def backward(ctx, GE): 27 | A, X, C = ctx.saved_variables 28 | 29 | gradA = (X.unsqueeze(2).expand(X.size(0), X.size(1), C.size(0), C.size(1)) - 30 | C.unsqueeze(0).unsqueeze(0)).mul_(GE.unsqueeze(1)).sum(3) 31 | gradX = torch.bmm(A, GE) 32 | gradC = A.sum(1).unsqueeze(2).mul(GE).mul_(-1).sum(0) 33 | 34 | return gradA, gradX, gradC 35 | 36 | def aggregate(A, X, C): 37 | r""" Aggregate operation, aggregate the residuals of inputs (:math:`X`) with repect 38 | to the codewords (:math:`C`) with assignment weights (:math:`A`). 39 | 40 | .. math:: 41 | 42 | e_{k} = \sum_{i=1}^{N} a_{ik} (x_i - d_k) 43 | 44 | Shape: 45 | - Input: :math:`A\in\mathcal{R}^{B\times N\times K}` 46 | :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` 47 | (where :math:`B` is batch, :math:`N` is total number of features, 48 | :math:`K` is number is codewords, :math:`D` is feature dimensions.) 49 | - Output: :math:`E\in\mathcal{R}^{B\times K\times D}` 50 | 51 | Examples: 52 | >>> B,N,K,D = 2,3,4,5 53 | >>> A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), requires_grad=True) 54 | >>> X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), requires_grad=True) 55 | >>> C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), requires_grad=True) 56 | >>> func = encoding.aggregate() 57 | >>> E = func(A, X, C) 58 | """ 59 | return Aggregate.apply(A, X, C) 60 | 61 | class ScaledL2(Function): 62 | @staticmethod 63 | def forward(ctx, X, C, S): 64 | SL = (X.unsqueeze(2).expand(X.size(0), X.size(1), C.size(0), C.size(1)) - 65 | C.unsqueeze(0).unsqueeze(0)).pow_(2).sum(3).mul_(S.view(1, 1, C.size(0))) 66 | ctx.save_for_backward(X, C, S, SL) 67 | return SL 68 | 69 | @staticmethod 70 | def backward(ctx, GSL): 71 | X, C, S, SL = ctx.saved_variables 72 | 73 | tmp = (X.unsqueeze(2).expand(X.size(0), X.size(1), C.size(0), C.size(1)) - C.unsqueeze(0).unsqueeze(0)).mul_( 74 | (2 * GSL).mul_(S.view(1, 1, C.size(0))).unsqueeze(3) 75 | ) 76 | 77 | GX = tmp.sum(2) 78 | GC = tmp.sum((0, 1)).mul_(-1) 79 | GS = SL.div(S.view(1, 1, C.size(0))).mul_(GSL).sum((0, 1)) 80 | 81 | return GX, GC, GS 82 | 83 | def scaled_l2(X, C, S): 84 | r""" scaled_l2 distance 85 | 86 | .. math:: 87 | sl_{ik} = s_k \|x_i-c_k\|^2 88 | 89 | Shape: 90 | - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` 91 | :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K` 92 | (where :math:`B` is batch, :math:`N` is total number of features, 93 | :math:`K` is number is codewords, :math:`D` is feature dimensions.) 94 | - Output: :math:`E\in\mathcal{R}^{B\times N\times K}` 95 | """ 96 | return ScaledL2.apply(X, C, S) 97 | 98 | if __name__ == '__main__': 99 | B, N, D, K = 3, 4, 5, 6 100 | X = torch.randn((B, N, D), dtype=torch.double,requires_grad=True).cuda() 101 | C = torch.randn((K, D), dtype=torch.double,requires_grad=True).cuda() 102 | S = torch.randn((K,), dtype=torch.double,requires_grad=True).cuda() 103 | assert torch.autograd.gradcheck(scaled_l2, (X, C, S)) 104 | 105 | A = torch.randn((B, N, K), dtype=torch.double, requires_grad=True).cuda() 106 | X = torch.randn((B, N, D), dtype=torch.double, requires_grad=True).cuda() 107 | C = torch.randn((K, D), dtype=torch.double, requires_grad=True).cuda() 108 | assert torch.autograd.gradcheck(aggregate, (A, X, C)) 109 | -------------------------------------------------------------------------------- /encoding/functions/syncbn.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2018 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | """Synchronized Cross-GPU Batch Normalization functions""" 11 | from torch.autograd import Function 12 | 13 | __all__ = ['normalization'] 14 | 15 | 16 | class Normalization(Function): 17 | @staticmethod 18 | def forward(ctx, input, mean, inv_std, gamma, beta): 19 | ctx.save_for_backward(input, mean, inv_std, gamma, beta) 20 | 21 | return (input - mean.unsqueeze(-1)).mul_((inv_std*gamma).unsqueeze(-1)).add_(beta.unsqueeze(-1)) 22 | 23 | @staticmethod 24 | def backward(ctx, gradOutput): 25 | input, mean, inv_std, gamma, beta = ctx.saved_variables 26 | 27 | gradInputMean = gradOutput * (inv_std*gamma).unsqueeze(-1) 28 | gradInput = gradInputMean 29 | gradMean = gradInputMean.sum((0, 2)).mul_(-1) 30 | 31 | gradInvStdGamma = (input - mean.unsqueeze(-1)).mul_(gradOutput).sum((0, 2)) 32 | gradInvStd = gradInvStdGamma * gamma 33 | gradGamma = gradInvStdGamma * inv_std 34 | 35 | gradBeta = gradOutput.sum((0, 2)) 36 | 37 | return gradInput, gradMean, gradInvStd, gradGamma, gradBeta 38 | 39 | 40 | def normalization(input, mean, inv_std, gamma, beta): 41 | r"""Applies Batch Normalization over a 3d input that is seen as a 42 | mini-batch. 43 | 44 | .. _encoding.normalization: 45 | 46 | .. math:: 47 | 48 | y = \frac{x - \mu[x]}{ \sqrt{var[x] + \epsilon}} * \gamma + \beta 49 | 50 | Shape: 51 | - Input: :math:`(N, C)` or :math:`(N, C, L)` 52 | - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input) 53 | 54 | """ 55 | return Normalization.apply(input, mean, inv_std, gamma, beta) 56 | 57 | if __name__ == '__main__': 58 | import torch 59 | 60 | input = torch.randn((3,4,5), dtype=torch.float64, requires_grad=True).cuda() 61 | mean = torch.randn((input.size(1),), dtype=torch.float64, requires_grad=True).cuda() 62 | inv_std = torch.randn((input.size(1),), dtype=torch.float64, requires_grad=True).cuda() 63 | gamma = torch.randn((input.size(1),), dtype=torch.float64, requires_grad=True).cuda() 64 | beta = torch.randn((input.size(1),), dtype=torch.float64, requires_grad=True).cuda() 65 | 66 | assert torch.autograd.gradcheck(normalization, (input, mean, inv_std, gamma, beta)) 67 | -------------------------------------------------------------------------------- /encoding/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .model_zoo import get_model 2 | from .model_store import get_model_file 3 | from .base import * 4 | from .fcn import * 5 | from .psp import * 6 | from .encnet import * 7 | from .deeplabv3 import * 8 | 9 | 10 | def get_segmentation_model(name, **kwargs): 11 | from .fcn import get_fcn 12 | models = { 13 | 'fcn': get_fcn, 14 | 'psp': get_psp, 15 | 'encnet': get_encnet, 16 | 'deeplab': get_deeplab 17 | } 18 | return models[name.lower()](**kwargs) 19 | -------------------------------------------------------------------------------- /encoding/models/base.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import math 8 | import numpy as np 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | from torch.nn.parallel.data_parallel import DataParallel 15 | 16 | from ..nn import JPU, JPU_X 17 | from .. import dilated as resnet 18 | from ..utils import batch_pix_accuracy, batch_intersection_union 19 | 20 | up_kwargs = {'mode': 'bilinear', 'align_corners': True} 21 | 22 | __all__ = ['BaseNet', 'MultiEvalModule'] 23 | 24 | class BaseNet(nn.Module): 25 | def __init__(self, nclass, backbone, aux, se_loss, jpu=True, dilated=False, norm_layer=None, 26 | base_size=520, crop_size=480, mean=[.485, .456, .406], 27 | std=[.229, .224, .225], root='~/.encoding/models', **kwargs): 28 | super(BaseNet, self).__init__() 29 | self.nclass = nclass 30 | self.aux = aux 31 | self.se_loss = se_loss 32 | self.mean = mean 33 | self.std = std 34 | self.base_size = base_size 35 | self.crop_size = crop_size 36 | # copying modules from pretrained models 37 | if backbone == 'resnet50': 38 | self.pretrained = resnet.resnet50(pretrained=True, dilated=dilated, 39 | norm_layer=norm_layer, root=root) 40 | elif backbone == 'resnet101': 41 | self.pretrained = resnet.resnet101(pretrained=True, dilated=dilated, 42 | norm_layer=norm_layer, root=root) 43 | elif backbone == 'resnet152': 44 | self.pretrained = resnet.resnet152(pretrained=True, dilated=dilated, 45 | norm_layer=norm_layer, root=root) 46 | else: 47 | raise RuntimeError('unknown backbone: {}'.format(backbone)) 48 | # bilinear upsample options 49 | self._up_kwargs = up_kwargs 50 | self.backbone = backbone 51 | self.jpu = None 52 | if jpu == 'JPU': 53 | self.jpu = JPU([512, 1024, 2048], width=512, norm_layer=norm_layer, up_kwargs=up_kwargs) 54 | elif jpu == 'JPU_X': 55 | self.jpu = JPU_X([512, 1024, 2048], width=512, norm_layer=norm_layer, up_kwargs=up_kwargs) 56 | 57 | def base_forward(self, x): 58 | x = self.pretrained.conv1(x) 59 | x = self.pretrained.bn1(x) 60 | x = self.pretrained.relu(x) 61 | x = self.pretrained.maxpool(x) 62 | c1 = self.pretrained.layer1(x) 63 | c2 = self.pretrained.layer2(c1) 64 | c3 = self.pretrained.layer3(c2) 65 | c4 = self.pretrained.layer4(c3) 66 | 67 | if self.jpu: 68 | return self.jpu(c1, c2, c3, c4) 69 | else: 70 | return c1, c2, c3, c4 71 | 72 | def evaluate(self, x, target=None): 73 | pred = self.forward(x) 74 | if isinstance(pred, (tuple, list)): 75 | pred = pred[0] 76 | if target is None: 77 | return pred 78 | correct, labeled = batch_pix_accuracy(pred.data, target.data) 79 | inter, union = batch_intersection_union(pred.data, target.data, self.nclass) 80 | return correct, labeled, inter, union 81 | 82 | 83 | class MultiEvalModule(DataParallel): 84 | """Multi-size Segmentation Eavluator""" 85 | def __init__(self, module, nclass, device_ids=None, flip=True, 86 | scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75]): 87 | super(MultiEvalModule, self).__init__(module, device_ids) 88 | self.nclass = nclass 89 | self.base_size = module.base_size 90 | self.crop_size = module.crop_size 91 | self.scales = scales 92 | self.flip = flip 93 | print('MultiEvalModule: base_size {}, crop_size {}'. \ 94 | format(self.base_size, self.crop_size)) 95 | 96 | def parallel_forward(self, inputs, **kwargs): 97 | """Multi-GPU Mult-size Evaluation 98 | 99 | Args: 100 | inputs: list of Tensors 101 | """ 102 | inputs = [(input.unsqueeze(0).cuda(device),) 103 | for input, device in zip(inputs, self.device_ids)] 104 | replicas = self.replicate(self, self.device_ids[:len(inputs)]) 105 | kwargs = [] 106 | if len(inputs) < len(kwargs): 107 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 108 | elif len(kwargs) < len(inputs): 109 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 110 | outputs = self.parallel_apply(replicas, inputs, kwargs) 111 | #for out in outputs: 112 | # print('out.size()', out.size()) 113 | return outputs 114 | 115 | def forward(self, image): 116 | """Mult-size Evaluation""" 117 | # only single image is supported for evaluation 118 | batch, _, h, w = image.size() 119 | assert(batch == 1) 120 | stride_rate = 2.0/3.0 121 | crop_size = self.crop_size 122 | stride = int(crop_size * stride_rate) 123 | with torch.cuda.device_of(image): 124 | scores = image.new().resize_(batch,self.nclass,h,w).zero_().cuda() 125 | 126 | for scale in self.scales: 127 | long_size = int(math.ceil(self.base_size * scale)) 128 | if h > w: 129 | height = long_size 130 | width = int(1.0 * w * long_size / h + 0.5) 131 | short_size = width 132 | else: 133 | width = long_size 134 | height = int(1.0 * h * long_size / w + 0.5) 135 | short_size = height 136 | # resize image to current size 137 | cur_img = resize_image(image, height, width, **self.module._up_kwargs) 138 | if long_size <= crop_size: 139 | pad_img = pad_image(cur_img, self.module.mean, 140 | self.module.std, crop_size) 141 | outputs = module_inference(self.module, pad_img, self.flip) 142 | outputs = crop_image(outputs, 0, height, 0, width) 143 | else: 144 | if short_size < crop_size: 145 | # pad if needed 146 | pad_img = pad_image(cur_img, self.module.mean, 147 | self.module.std, crop_size) 148 | else: 149 | pad_img = cur_img 150 | _,_,ph,pw = pad_img.size() 151 | assert(ph >= height and pw >= width) 152 | # grid forward and normalize 153 | h_grids = int(math.ceil(1.0 * (ph-crop_size)/stride)) + 1 154 | w_grids = int(math.ceil(1.0 * (pw-crop_size)/stride)) + 1 155 | with torch.cuda.device_of(image): 156 | outputs = image.new().resize_(batch,self.nclass,ph,pw).zero_().cuda() 157 | count_norm = image.new().resize_(batch,1,ph,pw).zero_().cuda() 158 | # grid evaluation 159 | for idh in range(h_grids): 160 | for idw in range(w_grids): 161 | h0 = idh * stride 162 | w0 = idw * stride 163 | h1 = min(h0 + crop_size, ph) 164 | w1 = min(w0 + crop_size, pw) 165 | crop_img = crop_image(pad_img, h0, h1, w0, w1) 166 | # pad if needed 167 | pad_crop_img = pad_image(crop_img, self.module.mean, 168 | self.module.std, crop_size) 169 | output = module_inference(self.module, pad_crop_img, self.flip) 170 | outputs[:,:,h0:h1,w0:w1] += crop_image(output, 171 | 0, h1-h0, 0, w1-w0) 172 | count_norm[:,:,h0:h1,w0:w1] += 1 173 | assert((count_norm==0).sum()==0) 174 | outputs = outputs / count_norm 175 | outputs = outputs[:,:,:height,:width] 176 | 177 | score = resize_image(outputs, h, w, **self.module._up_kwargs) 178 | scores += score 179 | 180 | return scores 181 | 182 | 183 | def module_inference(module, image, flip=True): 184 | output = module.evaluate(image) 185 | if flip: 186 | fimg = flip_image(image) 187 | foutput = module.evaluate(fimg) 188 | output += flip_image(foutput) 189 | return output.exp() 190 | 191 | def resize_image(img, h, w, **up_kwargs): 192 | return F.interpolate(img, (h, w), **up_kwargs) 193 | 194 | def pad_image(img, mean, std, crop_size): 195 | b,c,h,w = img.size() 196 | assert(c==3) 197 | padh = crop_size - h if h < crop_size else 0 198 | padw = crop_size - w if w < crop_size else 0 199 | pad_values = -np.array(mean) / np.array(std) 200 | img_pad = img.new().resize_(b,c,h+padh,w+padw) 201 | for i in range(c): 202 | # note that pytorch pad params is in reversed orders 203 | img_pad[:,i,:,:] = F.pad(img[:,i,:,:], (0, padw, 0, padh), value=pad_values[i]) 204 | assert(img_pad.size(2)>=crop_size and img_pad.size(3)>=crop_size) 205 | return img_pad 206 | 207 | def crop_image(img, h0, h1, w0, w1): 208 | return img[:,:,h0:h1,w0:w1] 209 | 210 | def flip_image(img): 211 | assert(img.dim()==4) 212 | with torch.cuda.device_of(img): 213 | idx = torch.arange(img.size(3)-1, -1, -1).type_as(img).long() 214 | return img.index_select(3, idx) 215 | -------------------------------------------------------------------------------- /encoding/models/deeplabv3.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | from .fcn import FCNHead 8 | from .base import BaseNet 9 | 10 | __all__ = ['DeepLabV3', 'get_deeplab'] 11 | 12 | class DeepLabV3(BaseNet): 13 | def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs): 14 | super(DeepLabV3, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs) 15 | 16 | self.head = DeepLabV3Head(2048, nclass, norm_layer, self._up_kwargs) 17 | if aux: 18 | self.auxlayer = FCNHead(1024, nclass, norm_layer) 19 | 20 | def forward(self, x): 21 | _, _, h, w = x.size() 22 | _, _, c3, c4 = self.base_forward(x) 23 | 24 | outputs = [] 25 | x = self.head(c4) 26 | x = F.interpolate(x, (h,w), **self._up_kwargs) 27 | outputs.append(x) 28 | if self.aux: 29 | auxout = self.auxlayer(c3) 30 | auxout = F.interpolate(auxout, (h,w), **self._up_kwargs) 31 | outputs.append(auxout) 32 | 33 | return tuple(outputs) 34 | 35 | 36 | class DeepLabV3Head(nn.Module): 37 | def __init__(self, in_channels, out_channels, norm_layer, up_kwargs, atrous_rates=(12, 24, 36)): 38 | super(DeepLabV3Head, self).__init__() 39 | inter_channels = in_channels // 8 40 | self.aspp = ASPP_Module(in_channels, atrous_rates, norm_layer, up_kwargs) 41 | self.block = nn.Sequential( 42 | nn.Conv2d(inter_channels, inter_channels, 3, padding=1, bias=False), 43 | norm_layer(inter_channels), 44 | nn.ReLU(True), 45 | nn.Dropout2d(0.1, False), 46 | nn.Conv2d(inter_channels, out_channels, 1)) 47 | 48 | def forward(self, x): 49 | x = self.aspp(x) 50 | x = self.block(x) 51 | return x 52 | 53 | 54 | def ASPPConv(in_channels, out_channels, atrous_rate, norm_layer): 55 | block = nn.Sequential( 56 | nn.Conv2d(in_channels, out_channels, 3, padding=atrous_rate, 57 | dilation=atrous_rate, bias=False), 58 | norm_layer(out_channels), 59 | nn.ReLU(True)) 60 | return block 61 | 62 | class AsppPooling(nn.Module): 63 | def __init__(self, in_channels, out_channels, norm_layer, up_kwargs): 64 | super(AsppPooling, self).__init__() 65 | self._up_kwargs = up_kwargs 66 | self.gap = nn.Sequential(nn.AdaptiveAvgPool2d(1), 67 | nn.Conv2d(in_channels, out_channels, 1, bias=False), 68 | norm_layer(out_channels), 69 | nn.ReLU(True)) 70 | 71 | def forward(self, x): 72 | _, _, h, w = x.size() 73 | pool = self.gap(x) 74 | 75 | return F.interpolate(pool, (h,w), **self._up_kwargs) 76 | 77 | class ASPP_Module(nn.Module): 78 | def __init__(self, in_channels, atrous_rates, norm_layer, up_kwargs): 79 | super(ASPP_Module, self).__init__() 80 | out_channels = in_channels // 8 81 | rate1, rate2, rate3 = tuple(atrous_rates) 82 | self.b0 = nn.Sequential( 83 | nn.Conv2d(in_channels, out_channels, 1, bias=False), 84 | norm_layer(out_channels), 85 | nn.ReLU(True)) 86 | self.b1 = ASPPConv(in_channels, out_channels, rate1, norm_layer) 87 | self.b2 = ASPPConv(in_channels, out_channels, rate2, norm_layer) 88 | self.b3 = ASPPConv(in_channels, out_channels, rate3, norm_layer) 89 | self.b4 = AsppPooling(in_channels, out_channels, norm_layer, up_kwargs) 90 | 91 | self.project = nn.Sequential( 92 | nn.Conv2d(5*out_channels, out_channels, 1, bias=False), 93 | norm_layer(out_channels), 94 | nn.ReLU(True), 95 | nn.Dropout2d(0.5, False)) 96 | 97 | def forward(self, x): 98 | feat0 = self.b0(x) 99 | feat1 = self.b1(x) 100 | feat2 = self.b2(x) 101 | feat3 = self.b3(x) 102 | feat4 = self.b4(x) 103 | 104 | y = torch.cat((feat0, feat1, feat2, feat3, feat4), 1) 105 | 106 | return self.project(y) 107 | 108 | 109 | def get_deeplab(dataset='pascal_voc', backbone='resnet50', pretrained=False, 110 | root='~/.encoding/models', **kwargs): 111 | # infer number of classes 112 | from ..datasets import datasets 113 | model = DeepLabV3(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) 114 | if pretrained: 115 | raise NotImplementedError 116 | 117 | return model 118 | -------------------------------------------------------------------------------- /encoding/models/encnet.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | import encoding 12 | 13 | from .base import BaseNet 14 | from .fcn import FCNHead 15 | 16 | __all__ = ['EncNet', 'EncModule', 'get_encnet', 'get_encnet_resnet50_pcontext', 17 | 'get_encnet_resnet101_pcontext', 'get_encnet_resnet50_ade', 18 | 'get_encnet_resnet101_ade'] 19 | 20 | class EncNet(BaseNet): 21 | def __init__(self, nclass, backbone, aux=True, se_loss=True, 22 | norm_layer=nn.BatchNorm2d, **kwargs): 23 | super(EncNet, self).__init__(nclass, backbone, aux, se_loss, 24 | norm_layer=norm_layer, **kwargs) 25 | self.head = EncHead([512, 1024, 2048], self.nclass, se_loss=se_loss, jpu=kwargs['jpu'], 26 | lateral=kwargs['lateral'], norm_layer=norm_layer, 27 | up_kwargs=self._up_kwargs) 28 | if aux: 29 | self.auxlayer = FCNHead(1024, nclass, norm_layer=norm_layer) 30 | 31 | def forward(self, x): 32 | imsize = x.size()[2:] 33 | features = self.base_forward(x) 34 | 35 | x = list(self.head(*features)) 36 | x[0] = F.interpolate(x[0], imsize, **self._up_kwargs) 37 | if self.aux: 38 | auxout = self.auxlayer(features[2]) 39 | auxout = F.interpolate(auxout, imsize, **self._up_kwargs) 40 | x.append(auxout) 41 | return tuple(x) 42 | 43 | 44 | class EncModule(nn.Module): 45 | def __init__(self, in_channels, nclass, ncodes=32, se_loss=True, norm_layer=None): 46 | super(EncModule, self).__init__() 47 | self.se_loss = se_loss 48 | self.encoding = nn.Sequential( 49 | nn.Conv2d(in_channels, in_channels, 1, bias=False), 50 | norm_layer(in_channels), 51 | nn.ReLU(inplace=True), 52 | encoding.nn.Encoding(D=in_channels, K=ncodes), 53 | norm_layer(ncodes), 54 | nn.ReLU(inplace=True), 55 | encoding.nn.Mean(dim=1)) 56 | self.fc = nn.Sequential( 57 | nn.Linear(in_channels, in_channels), 58 | nn.Sigmoid()) 59 | if self.se_loss: 60 | self.selayer = nn.Linear(in_channels, nclass) 61 | 62 | def forward(self, x): 63 | en = self.encoding(x) 64 | b, c, _, _ = x.size() 65 | gamma = self.fc(en) 66 | y = gamma.view(b, c, 1, 1) 67 | outputs = [F.relu_(x + x * y)] 68 | if self.se_loss: 69 | outputs.append(self.selayer(en)) 70 | return tuple(outputs) 71 | 72 | 73 | class EncHead(nn.Module): 74 | def __init__(self, in_channels, out_channels, se_loss=True, jpu=True, lateral=False, 75 | norm_layer=None, up_kwargs=None): 76 | super(EncHead, self).__init__() 77 | self.se_loss = se_loss 78 | self.lateral = lateral 79 | self.up_kwargs = up_kwargs 80 | self.conv5 = nn.Sequential(nn.Conv2d(in_channels[-1], 512, 1, bias=False), 81 | norm_layer(512), 82 | nn.ReLU(inplace=True)) if jpu else \ 83 | nn.Sequential(nn.Conv2d(in_channels[-1], 512, 3, padding=1, bias=False), 84 | norm_layer(512), 85 | nn.ReLU(inplace=True)) 86 | if lateral: 87 | self.connect = nn.ModuleList([ 88 | nn.Sequential( 89 | nn.Conv2d(in_channels[0], 512, kernel_size=1, bias=False), 90 | norm_layer(512), 91 | nn.ReLU(inplace=True)), 92 | nn.Sequential( 93 | nn.Conv2d(in_channels[1], 512, kernel_size=1, bias=False), 94 | norm_layer(512), 95 | nn.ReLU(inplace=True)), 96 | ]) 97 | self.fusion = nn.Sequential( 98 | nn.Conv2d(3*512, 512, kernel_size=3, padding=1, bias=False), 99 | norm_layer(512), 100 | nn.ReLU(inplace=True)) 101 | self.encmodule = EncModule(512, out_channels, ncodes=32, 102 | se_loss=se_loss, norm_layer=norm_layer) 103 | self.conv6 = nn.Sequential(nn.Dropout2d(0.1, False), 104 | nn.Conv2d(512, out_channels, 1)) 105 | 106 | def forward(self, *inputs): 107 | feat = self.conv5(inputs[-1]) 108 | if self.lateral: 109 | c2 = self.connect[0](inputs[1]) 110 | c3 = self.connect[1](inputs[2]) 111 | feat = self.fusion(torch.cat([feat, c2, c3], 1)) 112 | outs = list(self.encmodule(feat)) 113 | outs[0] = self.conv6(outs[0]) 114 | return tuple(outs) 115 | 116 | 117 | def get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False, 118 | root='~/.encoding/models', **kwargs): 119 | r"""EncNet model from the paper `"Context Encoding for Semantic Segmentation" 120 | `_ 121 | 122 | Parameters 123 | ---------- 124 | dataset : str, default pascal_voc 125 | The dataset that model pretrained on. (pascal_voc, ade20k) 126 | backbone : str, default resnet50 127 | The backbone network. (resnet50, 101, 152) 128 | pretrained : bool, default False 129 | Whether to load the pretrained weights for model. 130 | root : str, default '~/.encoding/models' 131 | Location for keeping the model parameters. 132 | 133 | 134 | Examples 135 | -------- 136 | >>> model = get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False) 137 | >>> print(model) 138 | """ 139 | acronyms = { 140 | 'pascal_voc': 'voc', 141 | 'ade20k': 'ade', 142 | 'pcontext': 'pcontext', 143 | } 144 | # infer number of classes 145 | from ..datasets import datasets 146 | model = EncNet(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) 147 | if pretrained: 148 | from .model_store import get_model_file 149 | model.load_state_dict(torch.load( 150 | get_model_file('encnet_%s_%s'%(backbone, acronyms[dataset]), root=root))) 151 | return model 152 | 153 | def get_encnet_resnet50_pcontext(pretrained=False, root='~/.encoding/models', **kwargs): 154 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation" 155 | `_ 156 | 157 | Parameters 158 | ---------- 159 | pretrained : bool, default False 160 | Whether to load the pretrained weights for model. 161 | root : str, default '~/.encoding/models' 162 | Location for keeping the model parameters. 163 | 164 | 165 | Examples 166 | -------- 167 | >>> model = get_encnet_resnet50_pcontext(pretrained=True) 168 | >>> print(model) 169 | """ 170 | return get_encnet('pcontext', 'resnet50', pretrained, root=root, aux=True, 171 | base_size=520, crop_size=480, **kwargs) 172 | 173 | def get_encnet_resnet101_pcontext(pretrained=False, root='~/.encoding/models', **kwargs): 174 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation" 175 | `_ 176 | 177 | Parameters 178 | ---------- 179 | pretrained : bool, default False 180 | Whether to load the pretrained weights for model. 181 | root : str, default '~/.encoding/models' 182 | Location for keeping the model parameters. 183 | 184 | 185 | Examples 186 | -------- 187 | >>> model = get_encnet_resnet101_pcontext(pretrained=True) 188 | >>> print(model) 189 | """ 190 | return get_encnet('pcontext', 'resnet101', pretrained, root=root, aux=True, 191 | base_size=520, crop_size=480, lateral=True, **kwargs) 192 | 193 | def get_encnet_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs): 194 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation" 195 | `_ 196 | 197 | Parameters 198 | ---------- 199 | pretrained : bool, default False 200 | Whether to load the pretrained weights for model. 201 | root : str, default '~/.encoding/models' 202 | Location for keeping the model parameters. 203 | 204 | 205 | Examples 206 | -------- 207 | >>> model = get_encnet_resnet50_ade(pretrained=True) 208 | >>> print(model) 209 | """ 210 | return get_encnet('ade20k', 'resnet50', pretrained, root=root, aux=True, 211 | base_size=520, crop_size=480, **kwargs) 212 | 213 | def get_encnet_resnet101_ade(pretrained=False, root='~/.encoding/models', **kwargs): 214 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation" 215 | `_ 216 | 217 | Parameters 218 | ---------- 219 | pretrained : bool, default False 220 | Whether to load the pretrained weights for model. 221 | root : str, default '~/.encoding/models' 222 | Location for keeping the model parameters. 223 | 224 | 225 | Examples 226 | -------- 227 | >>> model = get_encnet_resnet50_ade(pretrained=True) 228 | >>> print(model) 229 | """ 230 | return get_encnet('ade20k', 'resnet101', pretrained, root=root, aux=True, 231 | base_size=640, crop_size=576, lateral=True, **kwargs) 232 | 233 | def get_encnet_resnet152_ade(pretrained=False, root='~/.encoding/models', **kwargs): 234 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation" 235 | `_ 236 | 237 | Parameters 238 | ---------- 239 | pretrained : bool, default False 240 | Whether to load the pretrained weights for model. 241 | root : str, default '~/.encoding/models' 242 | Location for keeping the model parameters. 243 | 244 | 245 | Examples 246 | -------- 247 | >>> model = get_encnet_resnet50_ade(pretrained=True) 248 | >>> print(model) 249 | """ 250 | return get_encnet('ade20k', 'resnet152', pretrained, root=root, aux=True, 251 | base_size=520, crop_size=480, **kwargs) 252 | -------------------------------------------------------------------------------- /encoding/models/fcn.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | from __future__ import division 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | from torch.nn.functional import interpolate 12 | 13 | from .base import BaseNet 14 | 15 | __all__ = ['FCN', 'get_fcn', 'get_fcn_resnet50_pcontext', 'get_fcn_resnet50_ade'] 16 | 17 | class FCN(BaseNet): 18 | r"""Fully Convolutional Networks for Semantic Segmentation 19 | 20 | Parameters 21 | ---------- 22 | nclass : int 23 | Number of categories for the training dataset. 24 | backbone : string 25 | Pre-trained dilated backbone network type (default:'resnet50'; 'resnet50', 26 | 'resnet101' or 'resnet152'). 27 | norm_layer : object 28 | Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`; 29 | 30 | 31 | Reference: 32 | 33 | Long, Jonathan, Evan Shelhamer, and Trevor Darrell. "Fully convolutional networks 34 | for semantic segmentation." *CVPR*, 2015 35 | 36 | Examples 37 | -------- 38 | >>> model = FCN(nclass=21, backbone='resnet50') 39 | >>> print(model) 40 | """ 41 | def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs): 42 | super(FCN, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs) 43 | self.head = FCNHead(2048, nclass, norm_layer) 44 | if aux: 45 | self.auxlayer = FCNHead(1024, nclass, norm_layer) 46 | 47 | def forward(self, x): 48 | imsize = x.size()[2:] 49 | _, _, c3, c4 = self.base_forward(x) 50 | 51 | x = self.head(c4) 52 | x = interpolate(x, imsize, **self._up_kwargs) 53 | outputs = [x] 54 | if self.aux: 55 | auxout = self.auxlayer(c3) 56 | auxout = interpolate(auxout, imsize, **self._up_kwargs) 57 | outputs.append(auxout) 58 | return tuple(outputs) 59 | 60 | 61 | class FCNHead(nn.Module): 62 | def __init__(self, in_channels, out_channels, norm_layer): 63 | super(FCNHead, self).__init__() 64 | inter_channels = in_channels // 4 65 | self.conv5 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), 66 | norm_layer(inter_channels), 67 | nn.ReLU(), 68 | nn.Dropout2d(0.1, False), 69 | nn.Conv2d(inter_channels, out_channels, 1)) 70 | 71 | def forward(self, x): 72 | return self.conv5(x) 73 | 74 | 75 | def get_fcn(dataset='pascal_voc', backbone='resnet50', pretrained=False, 76 | root='~/.encoding/models', **kwargs): 77 | r"""FCN model from the paper `"Fully Convolutional Network for semantic segmentation" 78 | `_ 79 | Parameters 80 | ---------- 81 | dataset : str, default pascal_voc 82 | The dataset that model pretrained on. (pascal_voc, ade20k) 83 | pretrained : bool, default False 84 | Whether to load the pretrained weights for model. 85 | root : str, default '~/.encoding/models' 86 | Location for keeping the model parameters. 87 | Examples 88 | -------- 89 | >>> model = get_fcn(dataset='pascal_voc', backbone='resnet50', pretrained=False) 90 | >>> print(model) 91 | """ 92 | acronyms = { 93 | 'pascal_voc': 'voc', 94 | 'pascal_aug': 'voc', 95 | 'pcontext': 'pcontext', 96 | 'ade20k': 'ade', 97 | } 98 | # infer number of classes 99 | from ..datasets import datasets 100 | model = FCN(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) 101 | if pretrained: 102 | from .model_store import get_model_file 103 | model.load_state_dict(torch.load( 104 | get_model_file('fcn_%s_%s'%(backbone, acronyms[dataset]), root=root))) 105 | return model 106 | 107 | def get_fcn_resnet50_pcontext(pretrained=False, root='~/.encoding/models', **kwargs): 108 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation" 109 | `_ 110 | 111 | Parameters 112 | ---------- 113 | pretrained : bool, default False 114 | Whether to load the pretrained weights for model. 115 | root : str, default '~/.encoding/models' 116 | Location for keeping the model parameters. 117 | 118 | 119 | Examples 120 | -------- 121 | >>> model = get_fcn_resnet50_pcontext(pretrained=True) 122 | >>> print(model) 123 | """ 124 | return get_fcn('pcontext', 'resnet50', pretrained, root=root, aux=False, **kwargs) 125 | 126 | def get_fcn_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs): 127 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation" 128 | `_ 129 | 130 | Parameters 131 | ---------- 132 | pretrained : bool, default False 133 | Whether to load the pretrained weights for model. 134 | root : str, default '~/.encoding/models' 135 | Location for keeping the model parameters. 136 | 137 | 138 | Examples 139 | -------- 140 | >>> model = get_fcn_resnet50_ade(pretrained=True) 141 | >>> print(model) 142 | """ 143 | return get_fcn('ade20k', 'resnet50', pretrained, root=root, **kwargs) 144 | -------------------------------------------------------------------------------- /encoding/models/model_store.py: -------------------------------------------------------------------------------- 1 | """Model store which provides pretrained models.""" 2 | from __future__ import print_function 3 | 4 | import os 5 | import zipfile 6 | 7 | from ..utils import download, check_sha1 8 | 9 | __all__ = ['get_model_file', 'purge'] 10 | 11 | _model_sha1 = {name: checksum for checksum, name in [ 12 | ('ebb6acbbd1d1c90b7f446ae59d30bf70c74febc1', 'resnet50'), 13 | ('2a57e44de9c853fa015b172309a1ee7e2d0e4e2a', 'resnet101'), 14 | ('0d43d698c66aceaa2bc0309f55efdd7ff4b143af', 'resnet152'), 15 | ('662e979de25a389f11c65e9f1df7e06c2c356381', 'fcn_resnet50_ade'), 16 | ('eeed8e582f0fdccdba8579e7490570adc6d85c7c', 'fcn_resnet50_pcontext'), 17 | ('54f70c772505064e30efd1ddd3a14e1759faa363', 'psp_resnet50_ade'), 18 | ('075195c5237b778c718fd73ceddfa1376c18dfd0', 'deeplab_resnet50_ade'), 19 | ('5ee47ee28b480cc781a195d13b5806d5bbc616bf', 'encnet_resnet101_coco'), 20 | ('4de91d5922d4d3264f678b663f874da72e82db00', 'encnet_resnet50_pcontext'), 21 | ('9f27ea13d514d7010e59988341bcbd4140fcc33d', 'encnet_resnet101_pcontext'), 22 | ('07ac287cd77e53ea583f37454e17d30ce1509a4a', 'encnet_resnet50_ade'), 23 | ('3f54fa3b67bac7619cd9b3673f5c8227cf8f4718', 'encnet_resnet101_ade'), 24 | ]} 25 | 26 | encoding_repo_url = 'https://hangzh.s3.amazonaws.com/' 27 | _url_format = '{repo_url}encoding/models/{file_name}.zip' 28 | 29 | def short_hash(name): 30 | if name not in _model_sha1: 31 | raise ValueError('Pretrained model for {name} is not available.'.format(name=name)) 32 | return _model_sha1[name][:8] 33 | 34 | def get_model_file(name, root=os.path.join('~', '.encoding', 'models')): 35 | r"""Return location for the pretrained on local file system. 36 | 37 | This function will download from online model zoo when model cannot be found or has mismatch. 38 | The root directory will be created if it doesn't exist. 39 | 40 | Parameters 41 | ---------- 42 | name : str 43 | Name of the model. 44 | root : str, default '~/.encoding/models' 45 | Location for keeping the model parameters. 46 | 47 | Returns 48 | ------- 49 | file_path 50 | Path to the requested pretrained model file. 51 | """ 52 | file_name = '{name}-{short_hash}'.format(name=name, short_hash=short_hash(name)) 53 | root = os.path.expanduser(root) 54 | file_path = os.path.join(root, file_name+'.pth') 55 | sha1_hash = _model_sha1[name] 56 | if os.path.exists(file_path): 57 | if check_sha1(file_path, sha1_hash): 58 | return file_path 59 | else: 60 | print('Mismatch in the content of model file {} detected.' + 61 | ' Downloading again.'.format(file_path)) 62 | else: 63 | print('Model file {} is not found. Downloading.'.format(file_path)) 64 | 65 | if not os.path.exists(root): 66 | os.makedirs(root) 67 | 68 | zip_file_path = os.path.join(root, file_name+'.zip') 69 | repo_url = os.environ.get('ENCODING_REPO', encoding_repo_url) 70 | if repo_url[-1] != '/': 71 | repo_url = repo_url + '/' 72 | download(_url_format.format(repo_url=repo_url, file_name=file_name), 73 | path=zip_file_path, 74 | overwrite=True) 75 | with zipfile.ZipFile(zip_file_path) as zf: 76 | zf.extractall(root) 77 | os.remove(zip_file_path) 78 | 79 | if check_sha1(file_path, sha1_hash): 80 | return file_path 81 | else: 82 | raise ValueError('Downloaded file has different hash. Please try again.') 83 | 84 | def purge(root=os.path.join('~', '.encoding', 'models')): 85 | r"""Purge all pretrained model files in local file store. 86 | 87 | Parameters 88 | ---------- 89 | root : str, default '~/.encoding/models' 90 | Location for keeping the model parameters. 91 | """ 92 | root = os.path.expanduser(root) 93 | files = os.listdir(root) 94 | for f in files: 95 | if f.endswith(".pth"): 96 | os.remove(os.path.join(root, f)) 97 | 98 | def pretrained_model_list(): 99 | return list(_model_sha1.keys()) 100 | -------------------------------------------------------------------------------- /encoding/models/model_zoo.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import, unused-wildcard-import 2 | from .fcn import * 3 | from .psp import * 4 | from .encnet import * 5 | 6 | __all__ = ['get_model'] 7 | 8 | 9 | def get_model(name, **kwargs): 10 | """Returns a pre-defined model by name 11 | 12 | Parameters 13 | ---------- 14 | name : str 15 | Name of the model. 16 | pretrained : bool 17 | Whether to load the pretrained weights for model. 18 | root : str, default '~/.encoding/models' 19 | Location for keeping the model parameters. 20 | 21 | Returns 22 | ------- 23 | Module: 24 | The model. 25 | """ 26 | models = { 27 | 'fcn_resnet50_pcontext': get_fcn_resnet50_pcontext, 28 | 'encnet_resnet50_pcontext': get_encnet_resnet50_pcontext, 29 | 'encnet_resnet101_pcontext': get_encnet_resnet101_pcontext, 30 | 'encnet_resnet50_ade': get_encnet_resnet50_ade, 31 | 'encnet_resnet101_ade': get_encnet_resnet101_ade, 32 | 'fcn_resnet50_ade': get_fcn_resnet50_ade, 33 | 'psp_resnet50_ade': get_psp_resnet50_ade, 34 | } 35 | name = name.lower() 36 | if name not in models: 37 | raise ValueError('%s\n\t%s' % (str(name), '\n\t'.join(sorted(models.keys())))) 38 | net = models[name](**kwargs) 39 | return net 40 | -------------------------------------------------------------------------------- /encoding/models/psp.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | from __future__ import division 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | from torch.nn.functional import interpolate 12 | 13 | from .base import BaseNet 14 | from .fcn import FCNHead 15 | from ..nn import PyramidPooling 16 | 17 | class PSP(BaseNet): 18 | def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs): 19 | super(PSP, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs) 20 | self.head = PSPHead(2048, nclass, norm_layer, self._up_kwargs) 21 | if aux: 22 | self.auxlayer = FCNHead(1024, nclass, norm_layer) 23 | 24 | def forward(self, x): 25 | _, _, h, w = x.size() 26 | _, _, c3, c4 = self.base_forward(x) 27 | 28 | outputs = [] 29 | x = self.head(c4) 30 | x = interpolate(x, (h,w), **self._up_kwargs) 31 | outputs.append(x) 32 | if self.aux: 33 | auxout = self.auxlayer(c3) 34 | auxout = interpolate(auxout, (h,w), **self._up_kwargs) 35 | outputs.append(auxout) 36 | return tuple(outputs) 37 | 38 | 39 | class PSPHead(nn.Module): 40 | def __init__(self, in_channels, out_channels, norm_layer, up_kwargs): 41 | super(PSPHead, self).__init__() 42 | inter_channels = in_channels // 4 43 | self.conv5 = nn.Sequential(PyramidPooling(in_channels, norm_layer, up_kwargs), 44 | nn.Conv2d(in_channels * 2, inter_channels, 3, padding=1, bias=False), 45 | norm_layer(inter_channels), 46 | nn.ReLU(True), 47 | nn.Dropout2d(0.1, False), 48 | nn.Conv2d(inter_channels, out_channels, 1)) 49 | 50 | def forward(self, x): 51 | return self.conv5(x) 52 | 53 | def get_psp(dataset='pascal_voc', backbone='resnet50', pretrained=False, 54 | root='~/.encoding/models', **kwargs): 55 | acronyms = { 56 | 'pascal_voc': 'voc', 57 | 'pascal_aug': 'voc', 58 | 'ade20k': 'ade', 59 | } 60 | # infer number of classes 61 | from ..datasets import datasets 62 | model = PSP(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) 63 | if pretrained: 64 | from .model_store import get_model_file 65 | model.load_state_dict(torch.load( 66 | get_model_file('psp_%s_%s'%(backbone, acronyms[dataset]), root=root))) 67 | return model 68 | 69 | def get_psp_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs): 70 | r"""PSP model from the paper `"Context Encoding for Semantic Segmentation" 71 | `_ 72 | 73 | Parameters 74 | ---------- 75 | pretrained : bool, default False 76 | Whether to load the pretrained weights for model. 77 | root : str, default '~/.encoding/models' 78 | Location for keeping the model parameters. 79 | 80 | 81 | Examples 82 | -------- 83 | >>> model = get_psp_resnet50_ade(pretrained=True) 84 | >>> print(model) 85 | """ 86 | return get_psp('ade20k', 'resnet50', pretrained, root=root, **kwargs) 87 | -------------------------------------------------------------------------------- /encoding/nn/__init__.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding NN Modules""" 12 | from .syncbn import * 13 | from .encoding import * 14 | from .customize import * 15 | -------------------------------------------------------------------------------- /encoding/nn/comm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : comm.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 27/01/2018 6 | # 7 | # This file is part of Synchronized-BatchNorm-PyTorch. 8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 9 | # Distributed under MIT License. 10 | 11 | import queue 12 | import collections 13 | 14 | import threading 15 | 16 | __all__ = ['FutureResult', 'SlavePipe', 'SyncMaster'] 17 | 18 | 19 | class FutureResult(object): 20 | """A thread-safe future implementation. Used only as one-to-one pipe.""" 21 | 22 | def __init__(self): 23 | self._result = None 24 | self._lock = threading.Lock() 25 | self._cond = threading.Condition(self._lock) 26 | 27 | def put(self, result): 28 | with self._lock: 29 | assert self._result is None, 'Previous result has\'t been fetched.' 30 | self._result = result 31 | self._cond.notify() 32 | 33 | def get(self): 34 | with self._lock: 35 | if self._result is None: 36 | self._cond.wait() 37 | 38 | res = self._result 39 | self._result = None 40 | return res 41 | 42 | 43 | _MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) 44 | _SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result']) 45 | 46 | 47 | class SlavePipe(_SlavePipeBase): 48 | """Pipe for master-slave communication.""" 49 | 50 | def run_slave(self, msg): 51 | self.queue.put((self.identifier, msg)) 52 | ret = self.result.get() 53 | self.queue.put(True) 54 | return ret 55 | 56 | 57 | class SyncMaster(object): 58 | """An abstract `SyncMaster` object. 59 | 60 | - During the replication, as the data parallel will trigger an callback of each module, all slave devices should 61 | call `register(id)` and obtain an `SlavePipe` to communicate with the master. 62 | - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected, 63 | and passed to a registered callback. 64 | - After receiving the messages, the master device should gather the information and determine to message passed 65 | back to each slave devices. 66 | """ 67 | 68 | def __init__(self, master_callback): 69 | """ 70 | 71 | Args: 72 | master_callback: a callback to be invoked after having collected messages from slave devices. 73 | """ 74 | self._master_callback = master_callback 75 | self._queue = queue.Queue() 76 | self._registry = collections.OrderedDict() 77 | self._activated = False 78 | 79 | def register_slave(self, identifier): 80 | """ 81 | Register an slave device. 82 | 83 | Args: 84 | identifier: an identifier, usually is the device id. 85 | 86 | Returns: a `SlavePipe` object which can be used to communicate with the master device. 87 | 88 | """ 89 | if self._activated: 90 | assert self._queue.empty(), 'Queue is not clean before next initialization.' 91 | self._activated = False 92 | self._registry.clear() 93 | future = FutureResult() 94 | self._registry[identifier] = _MasterRegistry(future) 95 | return SlavePipe(identifier, self._queue, future) 96 | 97 | def run_master(self, master_msg): 98 | """ 99 | Main entry for the master device in each forward pass. 100 | The messages were first collected from each devices (including the master device), and then 101 | an callback will be invoked to compute the message to be sent back to each devices 102 | (including the master device). 103 | 104 | Args: 105 | master_msg: the message that the master want to send to itself. This will be placed as the first 106 | message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example. 107 | 108 | Returns: the message to be sent back to the master device. 109 | 110 | """ 111 | self._activated = True 112 | 113 | intermediates = [(0, master_msg)] 114 | for i in range(self.nr_slaves): 115 | intermediates.append(self._queue.get()) 116 | 117 | results = self._master_callback(intermediates) 118 | assert results[0][0] == 0, 'The first result should belongs to the master.' 119 | 120 | for i, res in results: 121 | if i == 0: 122 | continue 123 | self._registry[i].result.put(res) 124 | 125 | for i in range(self.nr_slaves): 126 | assert self._queue.get() is True 127 | 128 | return results[0][1] 129 | 130 | @property 131 | def nr_slaves(self): 132 | return len(self._registry) 133 | -------------------------------------------------------------------------------- /encoding/nn/customize.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding Custermized NN Module""" 12 | import torch 13 | import torch.nn as nn 14 | 15 | from torch.nn import functional as F 16 | from torch.nn import Module, Sequential, Conv2d, ReLU, AdaptiveAvgPool2d, BCELoss, CrossEntropyLoss 17 | 18 | from torch.autograd import Variable 19 | 20 | torch_ver = torch.__version__[:3] 21 | 22 | __all__ = ['SegmentationLosses', 'PyramidPooling', 'JPU', 'JPU_X', 'Mean'] 23 | 24 | class SegmentationLosses(CrossEntropyLoss): 25 | """2D Cross Entropy Loss with Auxilary Loss""" 26 | def __init__(self, se_loss=False, se_weight=0.2, nclass=-1, 27 | aux=False, aux_weight=0.4, weight=None, 28 | size_average=True, ignore_index=-1, reduction='mean'): 29 | super(SegmentationLosses, self).__init__(weight, ignore_index=ignore_index, reduction=reduction) 30 | self.se_loss = se_loss 31 | self.aux = aux 32 | self.nclass = nclass 33 | self.se_weight = se_weight 34 | self.aux_weight = aux_weight 35 | self.bceloss = BCELoss(weight, reduction=reduction) 36 | 37 | def forward(self, *inputs): 38 | if not self.se_loss and not self.aux: 39 | return super(SegmentationLosses, self).forward(*inputs) 40 | elif not self.se_loss: 41 | pred1, pred2, target = tuple(inputs) 42 | loss1 = super(SegmentationLosses, self).forward(pred1, target) 43 | loss2 = super(SegmentationLosses, self).forward(pred2, target) 44 | return loss1 + self.aux_weight * loss2 45 | elif not self.aux: 46 | pred, se_pred, target = tuple(inputs) 47 | se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred) 48 | loss1 = super(SegmentationLosses, self).forward(pred, target) 49 | loss2 = self.bceloss(torch.sigmoid(se_pred), se_target) 50 | return loss1 + self.se_weight * loss2 51 | else: 52 | pred1, se_pred, pred2, target = tuple(inputs) 53 | se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred1) 54 | loss1 = super(SegmentationLosses, self).forward(pred1, target) 55 | loss2 = super(SegmentationLosses, self).forward(pred2, target) 56 | loss3 = self.bceloss(torch.sigmoid(se_pred), se_target) 57 | return loss1 + self.aux_weight * loss2 + self.se_weight * loss3 58 | 59 | @staticmethod 60 | def _get_batch_label_vector(target, nclass): 61 | # target is a 3D Variable BxHxW, output is 2D BxnClass 62 | batch = target.size(0) 63 | tvect = Variable(torch.zeros(batch, nclass)) 64 | for i in range(batch): 65 | hist = torch.histc(target[i].cpu().data.float(), 66 | bins=nclass, min=0, 67 | max=nclass-1) 68 | vect = hist>0 69 | tvect[i] = vect 70 | return tvect 71 | 72 | 73 | class Normalize(Module): 74 | r"""Performs :math:`L_p` normalization of inputs over specified dimension. 75 | 76 | Does: 77 | 78 | .. math:: 79 | v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)} 80 | 81 | for each subtensor v over dimension dim of input. Each subtensor is 82 | flattened into a vector, i.e. :math:`\lVert v \rVert_p` is not a matrix 83 | norm. 84 | 85 | With default arguments normalizes over the second dimension with Euclidean 86 | norm. 87 | 88 | Args: 89 | p (float): the exponent value in the norm formulation. Default: 2 90 | dim (int): the dimension to reduce. Default: 1 91 | """ 92 | def __init__(self, p=2, dim=1): 93 | super(Normalize, self).__init__() 94 | self.p = p 95 | self.dim = dim 96 | 97 | def forward(self, x): 98 | return F.normalize(x, self.p, self.dim, eps=1e-8) 99 | 100 | 101 | class PyramidPooling(Module): 102 | """ 103 | Reference: 104 | Zhao, Hengshuang, et al. *"Pyramid scene parsing network."* 105 | """ 106 | def __init__(self, in_channels, norm_layer, up_kwargs): 107 | super(PyramidPooling, self).__init__() 108 | self.pool1 = AdaptiveAvgPool2d(1) 109 | self.pool2 = AdaptiveAvgPool2d(2) 110 | self.pool3 = AdaptiveAvgPool2d(3) 111 | self.pool4 = AdaptiveAvgPool2d(6) 112 | 113 | out_channels = int(in_channels/4) 114 | self.conv1 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False), 115 | norm_layer(out_channels), 116 | ReLU(True)) 117 | self.conv2 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False), 118 | norm_layer(out_channels), 119 | ReLU(True)) 120 | self.conv3 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False), 121 | norm_layer(out_channels), 122 | ReLU(True)) 123 | self.conv4 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False), 124 | norm_layer(out_channels), 125 | ReLU(True)) 126 | # bilinear upsample options 127 | self._up_kwargs = up_kwargs 128 | 129 | def forward(self, x): 130 | _, _, h, w = x.size() 131 | feat1 = F.interpolate(self.conv1(self.pool1(x)), (h, w), **self._up_kwargs) 132 | feat2 = F.interpolate(self.conv2(self.pool2(x)), (h, w), **self._up_kwargs) 133 | feat3 = F.interpolate(self.conv3(self.pool3(x)), (h, w), **self._up_kwargs) 134 | feat4 = F.interpolate(self.conv4(self.pool4(x)), (h, w), **self._up_kwargs) 135 | return torch.cat((x, feat1, feat2, feat3, feat4), 1) 136 | 137 | 138 | class SeparableConv2d(nn.Module): 139 | def __init__(self, inplanes, planes, kernel_size=3, stride=1, padding=1, dilation=1, bias=False, norm_layer=nn.BatchNorm2d): 140 | super(SeparableConv2d, self).__init__() 141 | 142 | self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, padding, dilation, groups=inplanes, bias=bias) 143 | self.bn = norm_layer(inplanes) 144 | self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias) 145 | 146 | def forward(self, x): 147 | x = self.conv1(x) 148 | x = self.bn(x) 149 | x = self.pointwise(x) 150 | return x 151 | 152 | 153 | class JPU(nn.Module): 154 | def __init__(self, in_channels, width=512, norm_layer=None, up_kwargs=None): 155 | super(JPU, self).__init__() 156 | self.up_kwargs = up_kwargs 157 | 158 | self.conv5 = nn.Sequential( 159 | nn.Conv2d(in_channels[-1], width, 3, padding=1, bias=False), 160 | norm_layer(width), 161 | nn.ReLU(inplace=True)) 162 | self.conv4 = nn.Sequential( 163 | nn.Conv2d(in_channels[-2], width, 3, padding=1, bias=False), 164 | norm_layer(width), 165 | nn.ReLU(inplace=True)) 166 | self.conv3 = nn.Sequential( 167 | nn.Conv2d(in_channels[-3], width, 3, padding=1, bias=False), 168 | norm_layer(width), 169 | nn.ReLU(inplace=True)) 170 | 171 | self.dilation1 = nn.Sequential(SeparableConv2d(3*width, width, kernel_size=3, padding=1, dilation=1, bias=False), 172 | norm_layer(width), 173 | nn.ReLU(inplace=True)) 174 | self.dilation2 = nn.Sequential(SeparableConv2d(3*width, width, kernel_size=3, padding=2, dilation=2, bias=False), 175 | norm_layer(width), 176 | nn.ReLU(inplace=True)) 177 | self.dilation3 = nn.Sequential(SeparableConv2d(3*width, width, kernel_size=3, padding=4, dilation=4, bias=False), 178 | norm_layer(width), 179 | nn.ReLU(inplace=True)) 180 | self.dilation4 = nn.Sequential(SeparableConv2d(3*width, width, kernel_size=3, padding=8, dilation=8, bias=False), 181 | norm_layer(width), 182 | nn.ReLU(inplace=True)) 183 | 184 | def forward(self, *inputs): 185 | feats = [self.conv5(inputs[-1]), self.conv4(inputs[-2]), self.conv3(inputs[-3])] 186 | _, _, h, w = feats[-1].size() 187 | feats[-2] = F.interpolate(feats[-2], (h, w), **self.up_kwargs) 188 | feats[-3] = F.interpolate(feats[-3], (h, w), **self.up_kwargs) 189 | feat = torch.cat(feats, dim=1) 190 | feat = torch.cat([self.dilation1(feat), self.dilation2(feat), self.dilation3(feat), self.dilation4(feat)], dim=1) 191 | 192 | return inputs[0], inputs[1], inputs[2], feat 193 | 194 | 195 | class JUM(nn.Module): 196 | def __init__(self, in_channels, width, dilation, norm_layer, up_kwargs): 197 | super(JUM, self).__init__() 198 | self.up_kwargs = up_kwargs 199 | 200 | self.conv_l = nn.Sequential( 201 | nn.Conv2d(in_channels[-1], width, 3, padding=1, bias=False), 202 | norm_layer(width), 203 | nn.ReLU(inplace=True)) 204 | self.conv_h = nn.Sequential( 205 | nn.Conv2d(in_channels[-2], width, 3, padding=1, bias=False), 206 | norm_layer(width), 207 | nn.ReLU(inplace=True)) 208 | 209 | norm_layer = lambda n_channels: nn.GroupNorm(32, n_channels) 210 | self.dilation1 = nn.Sequential(SeparableConv2d(2*width, width, kernel_size=3, padding=dilation, dilation=dilation, bias=False, norm_layer=norm_layer), 211 | norm_layer(width), 212 | nn.ReLU(inplace=True)) 213 | self.dilation2 = nn.Sequential(SeparableConv2d(2*width, width, kernel_size=3, padding=2*dilation, dilation=2*dilation, bias=False, norm_layer=norm_layer), 214 | norm_layer(width), 215 | nn.ReLU(inplace=True)) 216 | self.dilation3 = nn.Sequential(SeparableConv2d(2*width, width, kernel_size=3, padding=4*dilation, dilation=4*dilation, bias=False, norm_layer=norm_layer), 217 | norm_layer(width), 218 | nn.ReLU(inplace=True)) 219 | 220 | def forward(self, x_l, x_h): 221 | feats = [self.conv_l(x_l), self.conv_h(x_h)] 222 | _, _, h, w = feats[-1].size() 223 | feats[-2] = F.upsample(feats[-2], (h, w), **self.up_kwargs) 224 | feat = torch.cat(feats, dim=1) 225 | feat = torch.cat([feats[-2], self.dilation1(feat), self.dilation2(feat), self.dilation3(feat)], dim=1) 226 | 227 | return feat 228 | 229 | class JPU_X(nn.Module): 230 | def __init__(self, in_channels, width=512, norm_layer=None, up_kwargs=None): 231 | super(JPU_X, self).__init__() 232 | self.jum_1 = JUM(in_channels[:2], width//2, 1, norm_layer, up_kwargs) 233 | self.jum_2 = JUM(in_channels[1:], width, 2, norm_layer, up_kwargs) 234 | 235 | def forward(self, *inputs): 236 | feat = self.jum_1(inputs[2], inputs[1]) 237 | feat = self.jum_2(inputs[3], feat) 238 | 239 | return inputs[0], inputs[1], inputs[2], feat 240 | 241 | 242 | class Mean(Module): 243 | def __init__(self, dim, keep_dim=False): 244 | super(Mean, self).__init__() 245 | self.dim = dim 246 | self.keep_dim = keep_dim 247 | 248 | def forward(self, input): 249 | return input.mean(self.dim, self.keep_dim) 250 | -------------------------------------------------------------------------------- /encoding/nn/encoding.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding Package Core NN Modules.""" 12 | import torch 13 | import torch.nn.functional as F 14 | 15 | from torch.nn import Module, Parameter 16 | 17 | from ..functions import scaled_l2, aggregate 18 | 19 | __all__ = ['Encoding'] 20 | 21 | class Encoding(Module): 22 | r""" 23 | Encoding Layer: a learnable residual encoder. 24 | 25 | .. image:: _static/img/cvpr17.svg 26 | :width: 50% 27 | :align: center 28 | 29 | Encoding Layer accpets 3D or 4D inputs. 30 | It considers an input featuremaps with the shape of :math:`C\times H\times W` 31 | as a set of C-dimentional input features :math:`X=\{x_1, ...x_N\}`, where N is total number 32 | of features given by :math:`H\times W`, which learns an inherent codebook 33 | :math:`D=\{d_1,...d_K\}` and a set of smoothing factor of visual centers 34 | :math:`S=\{s_1,...s_K\}`. Encoding Layer outputs the residuals with soft-assignment weights 35 | :math:`e_k=\sum_{i=1}^Ne_{ik}`, where 36 | 37 | .. math:: 38 | 39 | e_{ik} = \frac{exp(-s_k\|r_{ik}\|^2)}{\sum_{j=1}^K exp(-s_j\|r_{ij}\|^2)} r_{ik} 40 | 41 | and the residuals are given by :math:`r_{ik} = x_i - d_k`. The output encoders are 42 | :math:`E=\{e_1,...e_K\}`. 43 | 44 | Args: 45 | D: dimention of the features or feature channels 46 | K: number of codeswords 47 | 48 | Shape: 49 | - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` or 50 | :math:`\mathcal{R}^{B\times D\times H\times W}` (where :math:`B` is batch, 51 | :math:`N` is total number of features or :math:`H\times W`.) 52 | - Output: :math:`E\in\mathcal{R}^{B\times K\times D}` 53 | 54 | Attributes: 55 | codewords (Tensor): the learnable codewords of shape (:math:`K\times D`) 56 | scale (Tensor): the learnable scale factor of visual centers 57 | 58 | Reference: 59 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, 60 | Amit Agrawal. “Context Encoding for Semantic Segmentation. 61 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* 62 | 63 | Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." 64 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017* 65 | 66 | Examples: 67 | >>> import encoding 68 | >>> import torch 69 | >>> import torch.nn.functional as F 70 | >>> from torch.autograd import Variable 71 | >>> B,C,H,W,K = 2,3,4,5,6 72 | >>> X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), requires_grad=True) 73 | >>> layer = encoding.Encoding(C,K).double().cuda() 74 | >>> E = layer(X) 75 | """ 76 | def __init__(self, D, K): 77 | super(Encoding, self).__init__() 78 | # init codewords and smoothing factor 79 | self.D, self.K = D, K 80 | self.codewords = Parameter(torch.Tensor(K, D), requires_grad=True) 81 | self.scale = Parameter(torch.Tensor(K), requires_grad=True) 82 | self.reset_params() 83 | 84 | def reset_params(self): 85 | std1 = 1./((self.K*self.D)**(1/2)) 86 | self.codewords.data.uniform_(-std1, std1) 87 | self.scale.data.uniform_(-1, 0) 88 | 89 | def forward(self, X): 90 | # input X is a 4D tensor 91 | assert(X.size(1) == self.D) 92 | B, D = X.size(0), self.D 93 | if X.dim() == 3: 94 | # BxDxN => BxNxD 95 | X = X.transpose(1, 2).contiguous() 96 | elif X.dim() == 4: 97 | # BxDxHxW => Bx(HW)xD 98 | X = X.view(B, D, -1).transpose(1, 2).contiguous() 99 | else: 100 | raise RuntimeError('Encoding Layer unknown input dims!') 101 | # assignment weights BxNxK 102 | A = F.softmax(scaled_l2(X, self.codewords, self.scale), dim=2) 103 | # aggregate 104 | E = aggregate(A, X, self.codewords) 105 | return E 106 | 107 | def __repr__(self): 108 | return self.__class__.__name__ + '(' \ 109 | + 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \ 110 | + str(self.D) + ')' 111 | -------------------------------------------------------------------------------- /encoding/nn/syncbn.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Synchronized Cross-GPU Batch Normalization Module""" 12 | import collections 13 | import torch 14 | 15 | from torch.nn.modules.batchnorm import _BatchNorm 16 | from torch.nn.functional import batch_norm 17 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast 18 | 19 | from ..functions import normalization 20 | from .comm import SyncMaster 21 | 22 | 23 | __all__ = ['SyncBatchNorm', 'BatchNorm'] 24 | 25 | 26 | class SyncBatchNorm(_BatchNorm): 27 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True): 28 | super(SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine) 29 | 30 | self._sync_master = SyncMaster(self._data_parallel_master) 31 | self._parallel_id = None 32 | self._slave_pipe = None 33 | 34 | def forward(self, input): 35 | if not self.training: 36 | return batch_norm( 37 | input, self.running_mean, self.running_var, self.weight, self.bias, 38 | self.training, self.momentum, self.eps) 39 | 40 | # Resize the input to (B, C, -1). 41 | input_shape = input.size() 42 | input = input.view(input_shape[0], self.num_features, -1) 43 | 44 | # sum(x) and sum(x^2) 45 | N = input.size(0) * input.size(2) 46 | xsum = input.sum((0, 2)) 47 | xsqsum = input.pow(2).sum((0, 2)) 48 | 49 | # all-reduce for global sum(x) and sum(x^2) 50 | if self._parallel_id == 0: 51 | mean, inv_std = self._sync_master.run_master(_ChildMessage(xsum, xsqsum, N)) 52 | else: 53 | mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(xsum, xsqsum, N)) 54 | # forward 55 | return normalization(input, mean, inv_std, self.weight, self.bias).view(input_shape) 56 | 57 | def __data_parallel_replicate__(self, ctx, copy_id): 58 | self._parallel_id = copy_id 59 | 60 | # parallel_id == 0 means master device. 61 | if self._parallel_id == 0: 62 | ctx.sync_master = self._sync_master 63 | else: 64 | self._slave_pipe = ctx.sync_master.register_slave(copy_id) 65 | 66 | def _data_parallel_master(self, intermediates): 67 | """Reduce the sum and square-sum, compute the statistics, and broadcast it.""" 68 | 69 | # Always using same "device order" makes the ReduceAdd operation faster. 70 | # Thanks to:: Tete Xiao (http://tetexiao.com/) 71 | intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device()) 72 | 73 | to_reduce = [i[1][:2] for i in intermediates] 74 | to_reduce = [j for i in to_reduce for j in i] # flatten 75 | target_gpus = [i[1].sum.get_device() for i in intermediates] 76 | 77 | sum_size = sum([i[1].sum_size for i in intermediates]) 78 | sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce) 79 | mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size) 80 | 81 | broadcasted = Broadcast.apply(target_gpus, mean, inv_std) 82 | 83 | outputs = [] 84 | for i, rec in enumerate(intermediates): 85 | outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2]))) 86 | 87 | return outputs 88 | 89 | def _compute_mean_std(self, sum_, ssum, size): 90 | """Compute the mean and standard-deviation with sum and square-sum. This method 91 | also maintains the moving average on the master device.""" 92 | assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.' 93 | mean = sum_ / size 94 | sumvar = ssum - sum_ * mean 95 | unbias_var = sumvar / (size - 1) 96 | bias_var = sumvar / size 97 | 98 | self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data 99 | self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data 100 | 101 | return mean, (bias_var + self.eps) ** -0.5 102 | 103 | @classmethod 104 | def convert_sync_batchnorm(cls, module, skip_classes=()): 105 | for skip_class in skip_classes: 106 | if isinstance(module, skip_class): 107 | return module 108 | 109 | module_output = module 110 | if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): 111 | module_output = SyncBatchNorm(module.num_features, 112 | module.eps, 113 | module.momentum, 114 | module.affine) 115 | if module.affine: 116 | with torch.no_grad(): 117 | module_output.weight.copy_(module.weight) 118 | module_output.bias.copy_(module.bias) 119 | # keep requires_grad unchanged 120 | module_output.weight.requires_grad = module.weight.requires_grad 121 | module_output.bias.requires_grad = module.bias.requires_grad 122 | module_output.running_mean = module.running_mean 123 | module_output.running_var = module.running_var 124 | module_output.num_batches_tracked = module.num_batches_tracked 125 | for name, child in module.named_children(): 126 | module_output.add_module(name, cls.convert_sync_batchnorm(child, skip_classes)) 127 | del module 128 | return module_output 129 | 130 | 131 | # API adapted from https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 132 | _ChildMessage = collections.namedtuple('Message', ['sum', 'ssum', 'sum_size']) 133 | _MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std']) 134 | 135 | 136 | class BatchNorm(_BatchNorm): 137 | def _check_input_dim(self, input): 138 | pass 139 | -------------------------------------------------------------------------------- /encoding/parallel.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding Data Parallel""" 12 | import threading 13 | 14 | import torch 15 | import torch.cuda.comm as comm 16 | 17 | from torch.autograd import Variable, Function 18 | from torch.nn.parallel.data_parallel import DataParallel 19 | from torch.nn.parallel.parallel_apply import get_a_var 20 | from torch.nn.parallel._functions import Broadcast 21 | 22 | torch_ver = torch.__version__[:3] 23 | 24 | __all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion'] 25 | 26 | def allreduce(*inputs): 27 | """Cross GPU all reduce autograd operation for calculate mean and 28 | variance in SyncBN. 29 | """ 30 | return AllReduce.apply(*inputs) 31 | 32 | class AllReduce(Function): 33 | @staticmethod 34 | def forward(ctx, num_inputs, *inputs): 35 | ctx.num_inputs = num_inputs 36 | ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)] 37 | inputs = [inputs[i:i + num_inputs] 38 | for i in range(0, len(inputs), num_inputs)] 39 | # sort before reduce sum 40 | inputs = sorted(inputs, key=lambda i: i[0].get_device()) 41 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) 42 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus) 43 | return tuple([t for tensors in outputs for t in tensors]) 44 | 45 | @staticmethod 46 | def backward(ctx, *inputs): 47 | inputs = [i.data for i in inputs] 48 | inputs = [inputs[i:i + ctx.num_inputs] 49 | for i in range(0, len(inputs), ctx.num_inputs)] 50 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) 51 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus) 52 | return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors]) 53 | 54 | 55 | class Reduce(Function): 56 | @staticmethod 57 | def forward(ctx, *inputs): 58 | ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))] 59 | inputs = sorted(inputs, key=lambda i: i.get_device()) 60 | return comm.reduce_add(inputs) 61 | 62 | @staticmethod 63 | def backward(ctx, gradOutput): 64 | return Broadcast.apply(ctx.target_gpus, gradOutput) 65 | 66 | 67 | class DataParallelModel(DataParallel): 68 | """Implements data parallelism at the module level. 69 | 70 | This container parallelizes the application of the given module by 71 | splitting the input across the specified devices by chunking in the 72 | batch dimension. 73 | In the forward pass, the module is replicated on each device, 74 | and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module. 75 | Note that the outputs are not gathered, please use compatible 76 | :class:`encoding.parallel.DataParallelCriterion`. 77 | 78 | The batch size should be larger than the number of GPUs used. It should 79 | also be an integer multiple of the number of GPUs so that each chunk is 80 | the same size (so that each GPU processes the same number of samples). 81 | 82 | Args: 83 | module: module to be parallelized 84 | device_ids: CUDA devices (default: all devices) 85 | 86 | Reference: 87 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, 88 | Amit Agrawal. “Context Encoding for Semantic Segmentation. 89 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* 90 | 91 | Example:: 92 | 93 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) 94 | >>> y = net(x) 95 | """ 96 | def gather(self, outputs, output_device): 97 | return outputs 98 | 99 | def replicate(self, module, device_ids): 100 | modules = super(DataParallelModel, self).replicate(module, device_ids) 101 | execute_replication_callbacks(modules) 102 | return modules 103 | 104 | 105 | class DataParallelCriterion(DataParallel): 106 | """ 107 | Calculate loss in multiple-GPUs, which balance the memory usage for 108 | Semantic Segmentation. 109 | 110 | The targets are splitted across the specified devices by chunking in 111 | the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`. 112 | 113 | Reference: 114 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, 115 | Amit Agrawal. “Context Encoding for Semantic Segmentation. 116 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* 117 | 118 | Example:: 119 | 120 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) 121 | >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2]) 122 | >>> y = net(x) 123 | >>> loss = criterion(y, target) 124 | """ 125 | def forward(self, inputs, *targets, **kwargs): 126 | # input should be already scatterd 127 | # scattering the targets instead 128 | if not self.device_ids: 129 | return self.module(inputs, *targets, **kwargs) 130 | targets, kwargs = self.scatter(targets, kwargs, self.device_ids) 131 | if len(self.device_ids) == 1: 132 | return self.module(inputs, *targets[0], **kwargs[0]) 133 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 134 | outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs) 135 | return Reduce.apply(*outputs) / len(outputs) 136 | 137 | 138 | def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None): 139 | assert len(modules) == len(inputs) 140 | assert len(targets) == len(inputs) 141 | if kwargs_tup: 142 | assert len(modules) == len(kwargs_tup) 143 | else: 144 | kwargs_tup = ({},) * len(modules) 145 | if devices is not None: 146 | assert len(modules) == len(devices) 147 | else: 148 | devices = [None] * len(modules) 149 | 150 | lock = threading.Lock() 151 | results = {} 152 | if torch_ver != "0.3": 153 | grad_enabled = torch.is_grad_enabled() 154 | 155 | def _worker(i, module, input, target, kwargs, device=None): 156 | if torch_ver != "0.3": 157 | torch.set_grad_enabled(grad_enabled) 158 | if device is None: 159 | device = get_a_var(input).get_device() 160 | try: 161 | with torch.cuda.device(device): 162 | output = module(*(input + target), **kwargs) 163 | with lock: 164 | results[i] = output 165 | except Exception as e: 166 | with lock: 167 | results[i] = e 168 | 169 | if len(modules) > 1: 170 | threads = [threading.Thread(target=_worker, 171 | args=(i, module, input, target, 172 | kwargs, device),) 173 | for i, (module, input, target, kwargs, device) in 174 | enumerate(zip(modules, inputs, targets, kwargs_tup, devices))] 175 | 176 | for thread in threads: 177 | thread.start() 178 | for thread in threads: 179 | thread.join() 180 | else: 181 | _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0]) 182 | 183 | outputs = [] 184 | for i in range(len(inputs)): 185 | output = results[i] 186 | if isinstance(output, Exception): 187 | raise output 188 | outputs.append(output) 189 | return outputs 190 | 191 | 192 | ########################################################################### 193 | # Adapted from Synchronized-BatchNorm-PyTorch. 194 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 195 | # 196 | class CallbackContext(object): 197 | pass 198 | 199 | 200 | def execute_replication_callbacks(modules): 201 | """ 202 | Execute an replication callback `__data_parallel_replicate__` on each module created 203 | by original replication. 204 | 205 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 206 | 207 | Note that, as all modules are isomorphism, we assign each sub-module with a context 208 | (shared among multiple copies of this module on different devices). 209 | Through this context, different copies can share some information. 210 | 211 | We guarantee that the callback on the master copy (the first copy) will be called ahead 212 | of calling the callback of any slave copies. 213 | """ 214 | master_copy = modules[0] 215 | nr_modules = len(list(master_copy.modules())) 216 | ctxs = [CallbackContext() for _ in range(nr_modules)] 217 | 218 | for i, module in enumerate(modules): 219 | for j, m in enumerate(module.modules()): 220 | if hasattr(m, '__data_parallel_replicate__'): 221 | m.__data_parallel_replicate__(ctxs[j], i) 222 | -------------------------------------------------------------------------------- /encoding/utils/__init__.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding Util Tools""" 12 | from .lr_scheduler import LR_Scheduler 13 | from .metrics import SegmentationMetric, batch_intersection_union, batch_pix_accuracy 14 | from .pallete import get_mask_pallete 15 | from .files import * 16 | 17 | __all__ = ['LR_Scheduler', 'batch_pix_accuracy', 'batch_intersection_union', 18 | 'save_checkpoint', 'download', 'mkdir', 'check_sha1', 19 | 'get_mask_pallete'] 20 | -------------------------------------------------------------------------------- /encoding/utils/files.py: -------------------------------------------------------------------------------- 1 | import os 2 | import errno 3 | import shutil 4 | import hashlib 5 | import requests 6 | 7 | import torch 8 | 9 | from tqdm import tqdm 10 | 11 | __all__ = ['save_checkpoint', 'download', 'mkdir', 'check_sha1'] 12 | 13 | def save_checkpoint(state, args, is_best, filename='checkpoint.pth.tar'): 14 | """Saves checkpoint to disk""" 15 | directory = "experiments/segmentation/runs/%s/%s/%s/"%(args.dataset, args.model, args.checkname) 16 | if not os.path.exists(directory): 17 | os.makedirs(directory) 18 | filename = directory + filename 19 | torch.save(state, filename) 20 | if is_best: 21 | shutil.copyfile(filename, directory + 'model_best.pth.tar') 22 | 23 | 24 | def download(url, path=None, overwrite=False, sha1_hash=None): 25 | """Download an given URL 26 | Parameters 27 | ---------- 28 | url : str 29 | URL to download 30 | path : str, optional 31 | Destination path to store downloaded file. By default stores to the 32 | current directory with same name as in url. 33 | overwrite : bool, optional 34 | Whether to overwrite destination file if already exists. 35 | sha1_hash : str, optional 36 | Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified 37 | but doesn't match. 38 | Returns 39 | ------- 40 | str 41 | The file path of the downloaded file. 42 | """ 43 | if path is None: 44 | fname = url.split('/')[-1] 45 | else: 46 | path = os.path.expanduser(path) 47 | if os.path.isdir(path): 48 | fname = os.path.join(path, url.split('/')[-1]) 49 | else: 50 | fname = path 51 | 52 | if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)): 53 | dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname))) 54 | if not os.path.exists(dirname): 55 | os.makedirs(dirname) 56 | 57 | print('Downloading %s from %s...'%(fname, url)) 58 | r = requests.get(url, stream=True) 59 | if r.status_code != 200: 60 | raise RuntimeError("Failed downloading url %s"%url) 61 | total_length = r.headers.get('content-length') 62 | with open(fname, 'wb') as f: 63 | if total_length is None: # no content length header 64 | for chunk in r.iter_content(chunk_size=1024): 65 | if chunk: # filter out keep-alive new chunks 66 | f.write(chunk) 67 | else: 68 | total_length = int(total_length) 69 | for chunk in tqdm(r.iter_content(chunk_size=1024), 70 | total=int(total_length / 1024. + 0.5), 71 | unit='KB', unit_scale=False, dynamic_ncols=True): 72 | f.write(chunk) 73 | 74 | if sha1_hash and not check_sha1(fname, sha1_hash): 75 | raise UserWarning('File {} is downloaded but the content hash does not match. ' \ 76 | 'The repo may be outdated or download may be incomplete. ' \ 77 | 'If the "repo_url" is overridden, consider switching to ' \ 78 | 'the default repo.'.format(fname)) 79 | 80 | return fname 81 | 82 | 83 | def check_sha1(filename, sha1_hash): 84 | """Check whether the sha1 hash of the file content matches the expected hash. 85 | Parameters 86 | ---------- 87 | filename : str 88 | Path to the file. 89 | sha1_hash : str 90 | Expected sha1 hash in hexadecimal digits. 91 | Returns 92 | ------- 93 | bool 94 | Whether the file content matches the expected hash. 95 | """ 96 | sha1 = hashlib.sha1() 97 | with open(filename, 'rb') as f: 98 | while True: 99 | data = f.read(1048576) 100 | if not data: 101 | break 102 | sha1.update(data) 103 | 104 | return sha1.hexdigest() == sha1_hash 105 | 106 | 107 | def mkdir(path): 108 | """make dir exists okay""" 109 | try: 110 | os.makedirs(path) 111 | except OSError as exc: # Python >2.5 112 | if exc.errno == errno.EEXIST and os.path.isdir(path): 113 | pass 114 | else: 115 | raise 116 | -------------------------------------------------------------------------------- /encoding/utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import math 12 | 13 | class LR_Scheduler(object): 14 | """Learning Rate Scheduler 15 | 16 | Step mode: ``lr = baselr * 0.1 ^ {floor(epoch-1 / lr_step)}`` 17 | 18 | Cosine mode: ``lr = baselr * 0.5 * (1 + cos(iter/maxiter))`` 19 | 20 | Poly mode: ``lr = baselr * (1 - iter/maxiter) ^ 0.9`` 21 | 22 | Args: 23 | args: :attr:`args.lr_scheduler` lr scheduler mode (`cos`, `poly`), 24 | :attr:`args.lr` base learning rate, :attr:`args.epochs` number of epochs, 25 | :attr:`args.lr_step` 26 | 27 | iters_per_epoch: number of iterations per epoch 28 | """ 29 | def __init__(self, mode, base_lr, num_epochs, iters_per_epoch=0, 30 | lr_step=0, warmup_epochs=0): 31 | self.mode = mode 32 | print('Using {} LR Scheduler!'.format(self.mode)) 33 | self.lr = base_lr 34 | if mode == 'step': 35 | assert lr_step 36 | self.lr_step = lr_step 37 | self.iters_per_epoch = iters_per_epoch 38 | self.N = num_epochs * iters_per_epoch 39 | self.epoch = -1 40 | self.warmup_iters = warmup_epochs * iters_per_epoch 41 | 42 | def __call__(self, optimizer, i, epoch, best_pred): 43 | T = epoch * self.iters_per_epoch + i 44 | if self.mode == 'cos': 45 | lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi)) 46 | elif self.mode == 'poly': 47 | lr = self.lr * pow((1 - 1.0 * T / self.N), 0.9) 48 | elif self.mode == 'step': 49 | lr = self.lr * (0.1 ** (epoch // self.lr_step)) 50 | else: 51 | raise NotImplemented 52 | # warm up lr schedule 53 | if self.warmup_iters > 0 and T < self.warmup_iters: 54 | lr = lr * 1.0 * T / self.warmup_iters 55 | if epoch > self.epoch: 56 | print('\n=>Epoches %i, learning rate = %.4f, \ 57 | previous best = %.4f' % (epoch, lr, best_pred)) 58 | self.epoch = epoch 59 | assert lr >= 0 60 | self._adjust_learning_rate(optimizer, lr) 61 | 62 | def _adjust_learning_rate(self, optimizer, lr): 63 | if len(optimizer.param_groups) == 1: 64 | optimizer.param_groups[0]['lr'] = lr 65 | else: 66 | # enlarge the lr at the head 67 | optimizer.param_groups[0]['lr'] = lr 68 | for i in range(1, len(optimizer.param_groups)): 69 | optimizer.param_groups[i]['lr'] = lr * 10 70 | -------------------------------------------------------------------------------- /encoding/utils/metrics.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import threading 12 | import numpy as np 13 | 14 | import torch 15 | 16 | class SegmentationMetric(object): 17 | """Computes pixAcc and mIoU metric scroes 18 | """ 19 | def __init__(self, nclass): 20 | self.nclass = nclass 21 | self.lock = threading.Lock() 22 | self.reset() 23 | 24 | def update(self, labels, preds): 25 | def evaluate_worker(self, label, pred): 26 | correct, labeled = batch_pix_accuracy( 27 | pred, label) 28 | inter, union = batch_intersection_union( 29 | pred, label, self.nclass) 30 | with self.lock: 31 | self.total_correct += correct 32 | self.total_label += labeled 33 | self.total_inter += inter 34 | self.total_union += union 35 | return 36 | 37 | if isinstance(preds, torch.Tensor): 38 | evaluate_worker(self, labels, preds) 39 | elif isinstance(preds, (list, tuple)): 40 | threads = [threading.Thread(target=evaluate_worker, 41 | args=(self, label, pred), 42 | ) 43 | for (label, pred) in zip(labels, preds)] 44 | for thread in threads: 45 | thread.start() 46 | for thread in threads: 47 | thread.join() 48 | else: 49 | raise NotImplemented 50 | 51 | def get(self): 52 | pixAcc = 1.0 * self.total_correct / (np.spacing(1) + self.total_label) 53 | IoU = 1.0 * self.total_inter / (np.spacing(1) + self.total_union) 54 | mIoU = IoU.mean() 55 | return pixAcc, mIoU 56 | 57 | def reset(self): 58 | self.total_inter = 0 59 | self.total_union = 0 60 | self.total_correct = 0 61 | self.total_label = 0 62 | return 63 | 64 | 65 | def batch_pix_accuracy(output, target): 66 | """Batch Pixel Accuracy 67 | Args: 68 | predict: input 4D tensor 69 | target: label 3D tensor 70 | """ 71 | _, predict = torch.max(output, 1) 72 | 73 | predict = predict.cpu().numpy().astype('int64') + 1 74 | target = target.cpu().numpy().astype('int64') + 1 75 | 76 | pixel_labeled = np.sum(target > 0) 77 | pixel_correct = np.sum((predict == target)*(target > 0)) 78 | assert pixel_correct <= pixel_labeled, \ 79 | "Correct area should be smaller than Labeled" 80 | return pixel_correct, pixel_labeled 81 | 82 | 83 | def batch_intersection_union(output, target, nclass): 84 | """Batch Intersection of Union 85 | Args: 86 | predict: input 4D tensor 87 | target: label 3D tensor 88 | nclass: number of categories (int) 89 | """ 90 | _, predict = torch.max(output, 1) 91 | mini = 1 92 | maxi = nclass 93 | nbins = nclass 94 | predict = predict.cpu().numpy().astype('int64') + 1 95 | target = target.cpu().numpy().astype('int64') + 1 96 | 97 | predict = predict * (target > 0).astype(predict.dtype) 98 | intersection = predict * (predict == target) 99 | # areas of intersection and union 100 | area_inter, _ = np.histogram(intersection, bins=nbins, range=(mini, maxi)) 101 | area_pred, _ = np.histogram(predict, bins=nbins, range=(mini, maxi)) 102 | area_lab, _ = np.histogram(target, bins=nbins, range=(mini, maxi)) 103 | area_union = area_pred + area_lab - area_inter 104 | assert (area_inter <= area_union).all(), \ 105 | "Intersection area should be smaller than Union area" 106 | return area_inter, area_union 107 | -------------------------------------------------------------------------------- /encoding/utils/pallete.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | from PIL import Image 12 | 13 | def get_mask_pallete(npimg, dataset='detail'): 14 | """Get image color pallete for visualizing masks""" 15 | # recovery boundary 16 | dataset = dataset.lower() 17 | if dataset == 'pascal_voc': 18 | npimg[npimg==21] = 255 19 | # put colormap 20 | out_img = Image.fromarray(npimg.squeeze().astype('uint8')) 21 | if dataset == 'ade20k': 22 | out_img.putpalette(adepallete) 23 | elif dataset == 'cityscapes': 24 | out_img.putpalette(citypallete) 25 | elif dataset in ('pcontext', 'pascal_voc', 'pascal_aug'): 26 | out_img.putpalette(vocpallete) 27 | return out_img 28 | 29 | def _get_voc_pallete(num_cls): 30 | n = num_cls 31 | pallete = [0]*(n*3) 32 | for j in range(0,n): 33 | lab = j 34 | pallete[j*3+0] = 0 35 | pallete[j*3+1] = 0 36 | pallete[j*3+2] = 0 37 | i = 0 38 | while (lab > 0): 39 | pallete[j*3+0] |= (((lab >> 0) & 1) << (7-i)) 40 | pallete[j*3+1] |= (((lab >> 1) & 1) << (7-i)) 41 | pallete[j*3+2] |= (((lab >> 2) & 1) << (7-i)) 42 | i = i + 1 43 | lab >>= 3 44 | return pallete 45 | 46 | vocpallete = _get_voc_pallete(256) 47 | 48 | adepallete = [0,0,0,120,120,120,180,120,120,6,230,230,80,50,50,4,200,3,120,120,80,140,140,140,204,5,255,230,230,230,4,250,7,224,5,255,235,255,7,150,5,61,120,120,70,8,255,51,255,6,82,143,255,140,204,255,4,255,51,7,204,70,3,0,102,200,61,230,250,255,6,51,11,102,255,255,7,71,255,9,224,9,7,230,220,220,220,255,9,92,112,9,255,8,255,214,7,255,224,255,184,6,10,255,71,255,41,10,7,255,255,224,255,8,102,8,255,255,61,6,255,194,7,255,122,8,0,255,20,255,8,41,255,5,153,6,51,255,235,12,255,160,150,20,0,163,255,140,140,140,250,10,15,20,255,0,31,255,0,255,31,0,255,224,0,153,255,0,0,0,255,255,71,0,0,235,255,0,173,255,31,0,255,11,200,200,255,82,0,0,255,245,0,61,255,0,255,112,0,255,133,255,0,0,255,163,0,255,102,0,194,255,0,0,143,255,51,255,0,0,82,255,0,255,41,0,255,173,10,0,255,173,255,0,0,255,153,255,92,0,255,0,255,255,0,245,255,0,102,255,173,0,255,0,20,255,184,184,0,31,255,0,255,61,0,71,255,255,0,204,0,255,194,0,255,82,0,10,255,0,112,255,51,0,255,0,194,255,0,122,255,0,255,163,255,153,0,0,255,10,255,112,0,143,255,0,82,0,255,163,255,0,255,235,0,8,184,170,133,0,255,0,255,92,184,0,255,255,0,31,0,184,255,0,214,255,255,0,112,92,255,0,0,224,255,112,224,255,70,184,160,163,0,255,153,0,255,71,255,0,255,0,163,255,204,0,255,0,143,0,255,235,133,255,0,255,0,235,245,0,255,255,0,122,255,245,0,10,190,212,214,255,0,0,204,255,20,0,255,255,255,0,0,153,255,0,41,255,0,255,204,41,0,255,41,255,0,173,0,255,0,245,255,71,0,255,122,0,255,0,255,184,0,92,255,184,255,0,0,133,255,255,214,0,25,194,194,102,255,0,92,0,255] 49 | 50 | citypallete = [ 51 | 128,64,128,244,35,232,70,70,70,102,102,156,190,153,153,153,153,153,250,170,30,220,220,0,107,142,35,152,251,152,70,130,180,220,20,60,255,0,0,0,0,142,0,0,70,0,60,100,0,80,100,0,0,230,119,11,32,128,192,0,0,64,128,128,64,128,0,192,128,128,192,128,64,64,0,192,64,0,64,192,0,192,192,0,64,64,128,192,64,128,64,192,128,192,192,128,0,0,64,128,0,64,0,128,64,128,128,64,0,0,192,128,0,192,0,128,192,128,128,192,64,0,64,192,0,64,64,128,64,192,128,64,64,0,192,192,0,192,64,128,192,192,128,192,0,64,64,128,64,64,0,192,64,128,192,64,0,64,192,128,64,192,0,192,192,128,192,192,64,64,64,192,64,64,64,192,64,192,192,64,64,64,192,192,64,192,64,192,192,192,192,192,32,0,0,160,0,0,32,128,0,160,128,0,32,0,128,160,0,128,32,128,128,160,128,128,96,0,0,224,0,0,96,128,0,224,128,0,96,0,128,224,0,128,96,128,128,224,128,128,32,64,0,160,64,0,32,192,0,160,192,0,32,64,128,160,64,128,32,192,128,160,192,128,96,64,0,224,64,0,96,192,0,224,192,0,96,64,128,224,64,128,96,192,128,224,192,128,32,0,64,160,0,64,32,128,64,160,128,64,32,0,192,160,0,192,32,128,192,160,128,192,96,0,64,224,0,64,96,128,64,224,128,64,96,0,192,224,0,192,96,128,192,224,128,192,32,64,64,160,64,64,32,192,64,160,192,64,32,64,192,160,64,192,32,192,192,160,192,192,96,64,64,224,64,64,96,192,64,224,192,64,96,64,192,224,64,192,96,192,192,224,192,192,0,32,0,128,32,0,0,160,0,128,160,0,0,32,128,128,32,128,0,160,128,128,160,128,64,32,0,192,32,0,64,160,0,192,160,0,64,32,128,192,32,128,64,160,128,192,160,128,0,96,0,128,96,0,0,224,0,128,224,0,0,96,128,128,96,128,0,224,128,128,224,128,64,96,0,192,96,0,64,224,0,192,224,0,64,96,128,192,96,128,64,224,128,192,224,128,0,32,64,128,32,64,0,160,64,128,160,64,0,32,192,128,32,192,0,160,192,128,160,192,64,32,64,192,32,64,64,160,64,192,160,64,64,32,192,192,32,192,64,160,192,192,160,192,0,96,64,128,96,64,0,224,64,128,224,64,0,96,192,128,96,192,0,224,192,128,224,192,64,96,64,192,96,64,64,224,64,192,224,64,64,96,192,192,96,192,64,224,192,192,224,192,32,32,0,160,32,0,32,160,0,160,160,0,32,32,128,160,32,128,32,160,128,160,160,128,96,32,0,224,32,0,96,160,0,224,160,0,96,32,128,224,32,128,96,160,128,224,160,128,32,96,0,160,96,0,32,224,0,160,224,0,32,96,128,160,96,128,32,224,128,160,224,128,96,96,0,224,96,0,96,224,0,224,224,0,96,96,128,224,96,128,96,224,128,224,224,128,32,32,64,160,32,64,32,160,64,160,160,64,32,32,192,160,32,192,32,160,192,160,160,192,96,32,64,224,32,64,96,160,64,224,160,64,96,32,192,224,32,192,96,160,192,224,160,192,32,96,64,160,96,64,32,224,64,160,224,64,32,96,192,160,96,192,32,224,192,160,224,192,96,96,64,224,96,64,96,224,64,224,224,64,96,96,192,224,96,192,96,224,192,0,0,0] 52 | -------------------------------------------------------------------------------- /experiments/segmentation/option.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | import argparse 7 | 8 | import torch 9 | 10 | class Options(): 11 | def __init__(self): 12 | parser = argparse.ArgumentParser(description='PyTorch \ 13 | Segmentation') 14 | # model and dataset 15 | parser.add_argument('--model', type=str, default='encnet', 16 | help='model name (default: encnet)') 17 | parser.add_argument('--backbone', type=str, default='resnet50', 18 | help='backbone name (default: resnet50)') 19 | parser.add_argument('--jpu', type=str, default=None, 20 | help='JPU name') 21 | parser.add_argument('--dilated', action='store_true', default= 22 | False, help='dilation') 23 | parser.add_argument('--lateral', action='store_true', default= 24 | False, help='employ FPN') 25 | parser.add_argument('--dataset', type=str, default='ade20k', 26 | help='dataset name (default: pascal12)') 27 | parser.add_argument('--workers', type=int, default=16, 28 | metavar='N', help='dataloader threads') 29 | parser.add_argument('--base-size', type=int, default=520, 30 | help='base image size') 31 | parser.add_argument('--crop-size', type=int, default=480, 32 | help='crop image size') 33 | parser.add_argument('--train-split', type=str, default='train', 34 | help='dataset train split (default: train)') 35 | # training hyper params 36 | parser.add_argument('--aux', action='store_true', default= False, 37 | help='Auxilary Loss') 38 | parser.add_argument('--aux-weight', type=float, default=0.2, 39 | help='Auxilary loss weight (default: 0.2)') 40 | parser.add_argument('--se-loss', action='store_true', default= False, 41 | help='Semantic Encoding Loss SE-loss') 42 | parser.add_argument('--se-weight', type=float, default=0.2, 43 | help='SE-loss weight (default: 0.2)') 44 | parser.add_argument('--epochs', type=int, default=None, metavar='N', 45 | help='number of epochs to train (default: auto)') 46 | parser.add_argument('--start_epoch', type=int, default=0, 47 | metavar='N', help='start epochs (default:0)') 48 | parser.add_argument('--batch-size', type=int, default=None, 49 | metavar='N', help='input batch size for \ 50 | training (default: auto)') 51 | parser.add_argument('--test-batch-size', type=int, default=None, 52 | metavar='N', help='input batch size for \ 53 | testing (default: same as batch size)') 54 | # optimizer params 55 | parser.add_argument('--lr', type=float, default=None, metavar='LR', 56 | help='learning rate (default: auto)') 57 | parser.add_argument('--lr-scheduler', type=str, default='poly', 58 | help='learning rate scheduler (default: poly)') 59 | parser.add_argument('--momentum', type=float, default=0.9, 60 | metavar='M', help='momentum (default: 0.9)') 61 | parser.add_argument('--weight-decay', type=float, default=1e-4, 62 | metavar='M', help='w-decay (default: 1e-4)') 63 | # cuda, seed and logging 64 | parser.add_argument('--no-cuda', action='store_true', default= 65 | False, help='disables CUDA training') 66 | parser.add_argument('--seed', type=int, default=1, metavar='S', 67 | help='random seed (default: 1)') 68 | # checking point 69 | parser.add_argument('--resume', type=str, default=None, 70 | help='put the path to resuming file if needed') 71 | parser.add_argument('--checkname', type=str, default='default', 72 | help='set the checkpoint name') 73 | parser.add_argument('--model-zoo', type=str, default=None, 74 | help='evaluating on model zoo model') 75 | # finetuning pre-trained models 76 | parser.add_argument('--ft', action='store_true', default= False, 77 | help='finetuning on a different dataset') 78 | # evaluation option 79 | parser.add_argument('--split', default='val') 80 | parser.add_argument('--mode', default='testval') 81 | parser.add_argument('--ms', action='store_true', default=False, 82 | help='multi scale & flip') 83 | parser.add_argument('--no-val', action='store_true', default= False, 84 | help='skip validation during training') 85 | parser.add_argument('--save-folder', type=str, default='experiments/segmentation/results', 86 | help = 'path to save images') 87 | 88 | # the parser 89 | self.parser = parser 90 | 91 | def parse(self): 92 | args = self.parser.parse_args() 93 | args.cuda = not args.no_cuda and torch.cuda.is_available() 94 | # default settings for epochs, batch_size and lr 95 | if args.epochs is None: 96 | epoches = { 97 | 'coco': 30, 98 | 'citys': 240, 99 | 'pascal_voc': 50, 100 | 'pascal_aug': 50, 101 | 'pcontext': 80, 102 | 'ade20k': 120, 103 | } 104 | args.epochs = epoches[args.dataset.lower()] 105 | if args.batch_size is None: 106 | args.batch_size = 16 107 | if args.test_batch_size is None: 108 | args.test_batch_size = args.batch_size 109 | if args.lr is None: 110 | lrs = { 111 | 'coco': 0.01, 112 | 'citys': 0.01, 113 | 'pascal_voc': 0.0001, 114 | 'pascal_aug': 0.001, 115 | 'pcontext': 0.001, 116 | 'ade20k': 0.01, 117 | } 118 | args.lr = lrs[args.dataset.lower()] / 16 * args.batch_size 119 | print(args) 120 | return args 121 | -------------------------------------------------------------------------------- /experiments/segmentation/scripts/deeplab_res50_pcontext.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #train 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset pcontext \ 5 | --model deeplab --jpu [JPU|JPU_X] --aux --aux-weight 0.4 \ 6 | --backbone resnet50 --checkname deeplab_res50_pcontext 7 | 8 | #test [single-scale] 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 10 | --model deeplab --jpu [JPU|JPU_X] --aux \ 11 | --backbone resnet50 --resume {MODEL} --split val --mode testval 12 | 13 | #test [multi-scale] 14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 15 | --model deeplab --jpu [JPU|JPU_X] --aux \ 16 | --backbone resnet50 --resume {MODEL} --split val --mode testval --ms 17 | 18 | #predict [single-scale] 19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 20 | --model deeplab --jpu [JPU|JPU_X] --aux \ 21 | --backbone resnet50 --resume {MODEL} --split val --mode test 22 | 23 | #predict [multi-scale] 24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 25 | --model deeplab --jpu [JPU|JPU_X] --aux \ 26 | --backbone resnet50 --resume {MODEL} --split val --mode test --ms 27 | 28 | #fps 29 | CUDA_VISIBLE_DEVICES=0 python -m experiments.segmentation.test_fps_params --dataset pcontext \ 30 | --model deeplab --jpu [JPU|JPU_X] --aux \ 31 | --backbone resnet50 32 | -------------------------------------------------------------------------------- /experiments/segmentation/scripts/encnet_res101_ade20k_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #train 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \ 5 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 6 | --backbone resnet101 --checkname encnet_res101_ade20k_train 7 | 8 | #test [single-scale] 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 10 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 11 | --backbone resnet101 --resume {MODEL} --split val --mode testval 12 | 13 | #test [multi-scale] 14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 15 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 16 | --backbone resnet101 --resume {MODEL} --split val --mode testval --ms 17 | 18 | #predict [single-scale] 19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 20 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 21 | --backbone resnet101 --resume {MODEL} --split val --mode test 22 | 23 | #predict [multi-scale] 24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 25 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 26 | --backbone resnet101 --resume {MODEL} --split val --mode test --ms 27 | -------------------------------------------------------------------------------- /experiments/segmentation/scripts/encnet_res101_ade20k_trainval.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #train 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \ 5 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 6 | --backbone resnet101 --checkname encnet_res101_ade20k_train 7 | 8 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \ 9 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 10 | --backbone resnet101 --checkname encnet_res101_ade20k_trainval \ 11 | --train-split trainval --lr 0.001 --epochs 20 --ft --resume {MODEL_PATH} 12 | 13 | #predict [single-scale] 14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 15 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 16 | --backbone resnet101 --resume {MODEL} --split test --mode test 17 | 18 | #predict [multi-scale] 19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 20 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 21 | --backbone resnet101 --resume {MODEL} --split test --mode test --ms 22 | -------------------------------------------------------------------------------- /experiments/segmentation/scripts/encnet_res101_pcontext.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #train 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset pcontext \ 5 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 6 | --backbone resnet101 --checkname encnet_res101_pcontext 7 | 8 | #test [single-scale] 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 10 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 11 | --backbone resnet101 --resume {MODEL} --split val --mode testval 12 | 13 | #test [multi-scale] 14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 15 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 16 | --backbone resnet101 --resume {MODEL} --split val --mode testval --ms 17 | 18 | #predict [single-scale] 19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 20 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 21 | --backbone resnet101 --resume {MODEL} --split val --mode test 22 | 23 | #predict [multi-scale] 24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 25 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 26 | --backbone resnet101 --resume {MODEL} --split val --mode test --ms 27 | 28 | #fps 29 | CUDA_VISIBLE_DEVICES=0 python -m experiments.segmentation.test_fps_params --dataset pcontext \ 30 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 31 | --backbone resnet101 32 | -------------------------------------------------------------------------------- /experiments/segmentation/scripts/encnet_res50_ade20k_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #train 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \ 5 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 6 | --backbone resnet50 --checkname encnet_res50_ade20k_train 7 | 8 | #test [single-scale] 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 10 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 11 | --backbone resnet50 --resume {MODEL} --split val --mode testval 12 | 13 | #test [multi-scale] 14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 15 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 16 | --backbone resnet50 --resume {MODEL} --split val --mode testval --ms 17 | 18 | #predict [single-scale] 19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 20 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 21 | --backbone resnet50 --resume {MODEL} --split val --mode test 22 | 23 | #predict [multi-scale] 24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 25 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 26 | --backbone resnet50 --resume {MODEL} --split val --mode test --ms 27 | -------------------------------------------------------------------------------- /experiments/segmentation/scripts/encnet_res50_ade20k_trainval.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #train 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \ 5 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 6 | --backbone resnet50 --checkname encnet_res50_ade20k_train 7 | 8 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \ 9 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 10 | --backbone resnet50 --checkname encnet_res50_ade20k_trainval \ 11 | --train-split trainval --lr 0.001 --epochs 20 --ft --resume {MODEL_PATH} 12 | 13 | #predict [single-scale] 14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 15 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 16 | --backbone resnet50 --resume {MODEL} --split test --mode test 17 | 18 | #predict [multi-scale] 19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \ 20 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 21 | --backbone resnet50 --resume {MODEL} --split test --mode test --ms 22 | -------------------------------------------------------------------------------- /experiments/segmentation/scripts/encnet_res50_pcontext.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #train 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset pcontext \ 5 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 6 | --backbone resnet50 --checkname encnet_res50_pcontext 7 | 8 | #test [single-scale] 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 10 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 11 | --backbone resnet50 --resume {MODEL} --split val --mode testval 12 | 13 | #test [multi-scale] 14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 15 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 16 | --backbone resnet50 --resume {MODEL} --split val --mode testval --ms 17 | 18 | #predict [single-scale] 19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 20 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 21 | --backbone resnet50 --resume {MODEL} --split val --mode test 22 | 23 | #predict [multi-scale] 24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 25 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 26 | --backbone resnet50 --resume {MODEL} --split val --mode test --ms 27 | 28 | #fps 29 | CUDA_VISIBLE_DEVICES=0 python -m experiments.segmentation.test_fps_params --dataset pcontext \ 30 | --model encnet --jpu [JPU|JPU_X] --aux --se-loss \ 31 | --backbone resnet50 32 | -------------------------------------------------------------------------------- /experiments/segmentation/scripts/psp_res50_pcontext.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #train 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset pcontext \ 5 | --model psp --jpu [JPU|JPU_X] --aux --aux-weight 0.4 \ 6 | --backbone resnet50 --checkname psp_res50_pcontext 7 | 8 | #test [single-scale] 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 10 | --model psp --jpu [JPU|JPU_X] --aux \ 11 | --backbone resnet50 --resume {MODEL} --split val --mode testval 12 | 13 | #test [multi-scale] 14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 15 | --model psp --jpu [JPU|JPU_X] --aux \ 16 | --backbone resnet50 --resume {MODEL} --split val --mode testval --ms 17 | 18 | #predict [single-scale] 19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 20 | --model psp --jpu [JPU|JPU_X] --aux \ 21 | --backbone resnet50 --resume {MODEL} --split val --mode test 22 | 23 | #predict [multi-scale] 24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \ 25 | --model psp --jpu [JPU|JPU_X] --aux \ 26 | --backbone resnet50 --resume {MODEL} --split val --mode test --ms 27 | 28 | #fps 29 | CUDA_VISIBLE_DEVICES=0 python -m experiments.segmentation.test_fps_params --dataset pcontext \ 30 | --model psp --jpu [JPU|JPU_X] --aux \ 31 | --backbone resnet50 32 | -------------------------------------------------------------------------------- /experiments/segmentation/test.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import os 8 | 9 | import torch 10 | import torchvision.transforms as transform 11 | 12 | import encoding.utils as utils 13 | 14 | from tqdm import tqdm 15 | 16 | from torch.utils import data 17 | 18 | from encoding.nn import BatchNorm 19 | from encoding.datasets import get_segmentation_dataset, test_batchify_fn 20 | from encoding.models import get_model, get_segmentation_model, MultiEvalModule 21 | 22 | from .option import Options 23 | 24 | 25 | def test(args): 26 | # output folder 27 | outdir = args.save_folder 28 | if not os.path.exists(outdir): 29 | os.makedirs(outdir) 30 | # data transforms 31 | input_transform = transform.Compose([ 32 | transform.ToTensor(), 33 | transform.Normalize([.485, .456, .406], [.229, .224, .225])]) 34 | # dataset 35 | testset = get_segmentation_dataset(args.dataset, split=args.split, mode=args.mode, 36 | transform=input_transform) 37 | # dataloader 38 | loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \ 39 | if args.cuda else {} 40 | test_data = data.DataLoader(testset, batch_size=args.test_batch_size, 41 | drop_last=False, shuffle=False, 42 | collate_fn=test_batchify_fn, **loader_kwargs) 43 | # model 44 | if args.model_zoo is not None: 45 | model = get_model(args.model_zoo, pretrained=True) 46 | else: 47 | model = get_segmentation_model(args.model, dataset = args.dataset, 48 | backbone = args.backbone, dilated = args.dilated, 49 | lateral = args.lateral, jpu = args.jpu, aux = args.aux, 50 | se_loss = args.se_loss, norm_layer = BatchNorm, 51 | base_size = args.base_size, crop_size = args.crop_size) 52 | # resuming checkpoint 53 | if args.resume is None or not os.path.isfile(args.resume): 54 | raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) 55 | checkpoint = torch.load(args.resume) 56 | # strict=False, so that it is compatible with old pytorch saved models 57 | model.load_state_dict(checkpoint['state_dict']) 58 | print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) 59 | 60 | print(model) 61 | scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \ 62 | [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 63 | if not args.ms: 64 | scales = [1.0] 65 | evaluator = MultiEvalModule(model, testset.num_class, scales=scales, flip=args.ms).cuda() 66 | evaluator.eval() 67 | metric = utils.SegmentationMetric(testset.num_class) 68 | 69 | tbar = tqdm(test_data) 70 | for i, (image, dst) in enumerate(tbar): 71 | if 'val' in args.mode: 72 | with torch.no_grad(): 73 | predicts = evaluator.parallel_forward(image) 74 | metric.update(dst, predicts) 75 | pixAcc, mIoU = metric.get() 76 | tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) 77 | else: 78 | with torch.no_grad(): 79 | outputs = evaluator.parallel_forward(image) 80 | predicts = [testset.make_pred(torch.max(output, 1)[1].cpu().numpy()) 81 | for output in outputs] 82 | for predict, impath in zip(predicts, dst): 83 | mask = utils.get_mask_pallete(predict, args.dataset) 84 | outname = os.path.splitext(impath)[0] + '.png' 85 | mask.save(os.path.join(outdir, outname)) 86 | 87 | if __name__ == "__main__": 88 | args = Options().parse() 89 | torch.manual_seed(args.seed) 90 | args.test_batch_size = torch.cuda.device_count() 91 | test(args) 92 | -------------------------------------------------------------------------------- /experiments/segmentation/test_fps_params.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import torch 4 | import encoding 5 | 6 | from encoding.nn import BatchNorm 7 | from .option import Options 8 | 9 | 10 | if __name__ == "__main__": 11 | args = Options().parse() 12 | model = encoding.models.get_segmentation_model(args.model, dataset = args.dataset, 13 | backbone = args.backbone, dilated = args.dilated, 14 | lateral = args.lateral, jpu = args.jpu, aux = args.aux, 15 | se_loss = args.se_loss, norm_layer = BatchNorm) 16 | 17 | num_parameters = sum([l.nelement() for l in model.pretrained.parameters()]) 18 | print(num_parameters) 19 | num_parameters = sum([l.nelement() for l in model.head.parameters()]) 20 | print(num_parameters) 21 | 22 | model.cuda() 23 | model.eval() 24 | x = torch.Tensor(1, 3, 512, 512).cuda() 25 | 26 | N = 10 27 | with torch.no_grad(): 28 | for _ in range(N): 29 | out = model(x) 30 | 31 | result = [] 32 | for _ in range(10): 33 | st = time.time() 34 | for _ in range(N): 35 | out = model(x) 36 | result.append(N/(time.time()-st)) 37 | 38 | import numpy as np 39 | print(np.mean(result), np.std(result)) 40 | -------------------------------------------------------------------------------- /experiments/segmentation/test_single_image.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import os 8 | 9 | import torch 10 | import torchvision.transforms as transform 11 | 12 | import encoding.utils as utils 13 | 14 | from PIL import Image 15 | 16 | from encoding.nn import BatchNorm 17 | from encoding.datasets import datasets 18 | from encoding.models import get_model, get_segmentation_model, MultiEvalModule 19 | 20 | from .option import Options 21 | 22 | 23 | def test(args): 24 | # data transforms 25 | input_transform = transform.Compose([ 26 | transform.ToTensor(), 27 | transform.Normalize([.485, .456, .406], [.229, .224, .225])]) 28 | # model 29 | if args.model_zoo is not None: 30 | model = get_model(args.model_zoo, pretrained=True) 31 | else: 32 | model = get_segmentation_model(args.model, dataset = args.dataset, 33 | backbone = args.backbone, dilated = args.dilated, 34 | lateral = args.lateral, jpu = args.jpu, aux = args.aux, 35 | se_loss = args.se_loss, norm_layer = BatchNorm, 36 | base_size = args.base_size, crop_size = args.crop_size) 37 | # resuming checkpoint 38 | if args.resume is None or not os.path.isfile(args.resume): 39 | raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) 40 | checkpoint = torch.load(args.resume) 41 | # strict=False, so that it is compatible with old pytorch saved models 42 | model.load_state_dict(checkpoint['state_dict'], strict=False) 43 | print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) 44 | 45 | print(model) 46 | scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \ 47 | [0.5, 0.75, 1.0, 1.25, 1.5, 1.75] 48 | if not args.ms: 49 | scales = [1.0] 50 | num_classes = datasets[args.dataset.lower()].NUM_CLASS 51 | evaluator = MultiEvalModule(model, num_classes, scales=scales, flip=args.ms).cuda() 52 | evaluator.eval() 53 | 54 | img = input_transform(Image.open(args.input_path).convert('RGB')).unsqueeze(0) 55 | 56 | with torch.no_grad(): 57 | output = evaluator.parallel_forward(img)[0] 58 | predict = torch.max(output, 1)[1].cpu().numpy() 59 | mask = utils.get_mask_pallete(predict, args.dataset) 60 | mask.save(args.save_path) 61 | 62 | 63 | if __name__ == "__main__": 64 | option = Options() 65 | option.parser.add_argument('--input-path', type=str, required=True, help='path to read input image') 66 | option.parser.add_argument('--save-path', type=str, required=True, help='path to save output image') 67 | args = option.parse() 68 | 69 | torch.manual_seed(args.seed) 70 | 71 | test(args) 72 | -------------------------------------------------------------------------------- /experiments/segmentation/train.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import os 8 | import numpy as np 9 | from tqdm import tqdm 10 | 11 | import torch 12 | from torch.utils import data 13 | import torchvision.transforms as transform 14 | from torch.nn.parallel.scatter_gather import gather 15 | 16 | import encoding.utils as utils 17 | from encoding.nn import SegmentationLosses, SyncBatchNorm 18 | from encoding.parallel import DataParallelModel, DataParallelCriterion 19 | from encoding.datasets import get_segmentation_dataset 20 | from encoding.models import get_segmentation_model 21 | 22 | from .option import Options 23 | 24 | torch_ver = torch.__version__[:3] 25 | if torch_ver == '0.3': 26 | from torch.autograd import Variable 27 | 28 | class Trainer(): 29 | def __init__(self, args): 30 | self.args = args 31 | # data transforms 32 | input_transform = transform.Compose([ 33 | transform.ToTensor(), 34 | transform.Normalize([.485, .456, .406], [.229, .224, .225])]) 35 | # dataset 36 | data_kwargs = {'transform': input_transform, 'base_size': args.base_size, 37 | 'crop_size': args.crop_size} 38 | trainset = get_segmentation_dataset(args.dataset, split=args.train_split, mode='train', 39 | **data_kwargs) 40 | testset = get_segmentation_dataset(args.dataset, split='val', mode ='val', 41 | **data_kwargs) 42 | # dataloader 43 | kwargs = {'num_workers': args.workers, 'pin_memory': True} \ 44 | if args.cuda else {} 45 | self.trainloader = data.DataLoader(trainset, batch_size=args.batch_size, 46 | drop_last=True, shuffle=True, **kwargs) 47 | self.valloader = data.DataLoader(testset, batch_size=args.batch_size, 48 | drop_last=False, shuffle=False, **kwargs) 49 | self.nclass = trainset.num_class 50 | # model 51 | model = get_segmentation_model(args.model, dataset = args.dataset, 52 | backbone = args.backbone, dilated = args.dilated, 53 | lateral = args.lateral, jpu = args.jpu, aux = args.aux, 54 | se_loss = args.se_loss, norm_layer = SyncBatchNorm, 55 | base_size = args.base_size, crop_size = args.crop_size) 56 | print(model) 57 | # optimizer using different LR 58 | params_list = [{'params': model.pretrained.parameters(), 'lr': args.lr},] 59 | if hasattr(model, 'jpu'): 60 | params_list.append({'params': model.jpu.parameters(), 'lr': args.lr*10}) 61 | if hasattr(model, 'head'): 62 | params_list.append({'params': model.head.parameters(), 'lr': args.lr*10}) 63 | if hasattr(model, 'auxlayer'): 64 | params_list.append({'params': model.auxlayer.parameters(), 'lr': args.lr*10}) 65 | optimizer = torch.optim.SGD(params_list, lr=args.lr, 66 | momentum=args.momentum, weight_decay=args.weight_decay) 67 | # criterions 68 | self.criterion = SegmentationLosses(se_loss=args.se_loss, aux=args.aux, 69 | nclass=self.nclass, 70 | se_weight=args.se_weight, 71 | aux_weight=args.aux_weight) 72 | self.model, self.optimizer = model, optimizer 73 | # using cuda 74 | if args.cuda: 75 | self.model = DataParallelModel(self.model).cuda() 76 | self.criterion = DataParallelCriterion(self.criterion).cuda() 77 | # resuming checkpoint 78 | self.best_pred = 0.0 79 | if args.resume is not None: 80 | if not os.path.isfile(args.resume): 81 | raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume)) 82 | checkpoint = torch.load(args.resume) 83 | args.start_epoch = checkpoint['epoch'] 84 | if args.cuda: 85 | self.model.module.load_state_dict(checkpoint['state_dict']) 86 | else: 87 | self.model.load_state_dict(checkpoint['state_dict']) 88 | if not args.ft: 89 | self.optimizer.load_state_dict(checkpoint['optimizer']) 90 | self.best_pred = checkpoint['best_pred'] 91 | print("=> loaded checkpoint '{}' (epoch {})" 92 | .format(args.resume, checkpoint['epoch'])) 93 | # clear start epoch if fine-tuning 94 | if args.ft: 95 | args.start_epoch = 0 96 | # lr scheduler 97 | self.scheduler = utils.LR_Scheduler(args.lr_scheduler, args.lr, 98 | args.epochs, len(self.trainloader)) 99 | 100 | def training(self, epoch): 101 | train_loss = 0.0 102 | self.model.train() 103 | tbar = tqdm(self.trainloader) 104 | for i, (image, target) in enumerate(tbar): 105 | self.scheduler(self.optimizer, i, epoch, self.best_pred) 106 | self.optimizer.zero_grad() 107 | if torch_ver == "0.3": 108 | image = Variable(image) 109 | target = Variable(target) 110 | outputs = self.model(image) 111 | loss = self.criterion(outputs, target) 112 | loss.backward() 113 | self.optimizer.step() 114 | train_loss += loss.item() 115 | tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1))) 116 | 117 | if self.args.no_val: 118 | # save checkpoint every epoch 119 | is_best = False 120 | utils.save_checkpoint({ 121 | 'epoch': epoch + 1, 122 | 'state_dict': self.model.module.state_dict(), 123 | 'optimizer': self.optimizer.state_dict(), 124 | 'best_pred': self.best_pred, 125 | }, self.args, is_best, filename='checkpoint_{}.pth.tar'.format(epoch)) 126 | 127 | 128 | def validation(self, epoch): 129 | # Fast test during the training 130 | def eval_batch(model, image, target): 131 | outputs = model(image) 132 | outputs = gather(outputs, 0, dim=0) 133 | pred = outputs[0] 134 | target = target.cuda() 135 | correct, labeled = utils.batch_pix_accuracy(pred.data, target) 136 | inter, union = utils.batch_intersection_union(pred.data, target, self.nclass) 137 | return correct, labeled, inter, union 138 | 139 | is_best = False 140 | self.model.eval() 141 | total_inter, total_union, total_correct, total_label = 0, 0, 0, 0 142 | tbar = tqdm(self.valloader, desc='\r') 143 | for i, (image, target) in enumerate(tbar): 144 | if torch_ver == "0.3": 145 | image = Variable(image, volatile=True) 146 | correct, labeled, inter, union = eval_batch(self.model, image, target) 147 | else: 148 | with torch.no_grad(): 149 | correct, labeled, inter, union = eval_batch(self.model, image, target) 150 | 151 | total_correct += correct 152 | total_label += labeled 153 | total_inter += inter 154 | total_union += union 155 | pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label) 156 | IoU = 1.0 * total_inter / (np.spacing(1) + total_union) 157 | mIoU = IoU.mean() 158 | tbar.set_description( 159 | 'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) 160 | 161 | new_pred = (pixAcc + mIoU)/2 162 | if new_pred > self.best_pred: 163 | is_best = True 164 | self.best_pred = new_pred 165 | utils.save_checkpoint({ 166 | 'epoch': epoch + 1, 167 | 'state_dict': self.model.module.state_dict(), 168 | 'optimizer': self.optimizer.state_dict(), 169 | 'best_pred': new_pred, 170 | }, self.args, is_best) 171 | 172 | 173 | if __name__ == "__main__": 174 | args = Options().parse() 175 | torch.manual_seed(args.seed) 176 | trainer = Trainer(args) 177 | print('Starting Epoch:', trainer.args.start_epoch) 178 | print('Total Epoches:', trainer.args.epochs) 179 | for epoch in range(trainer.args.start_epoch, trainer.args.epochs): 180 | trainer.training(epoch) 181 | if not trainer.args.no_val: 182 | trainer.validation(epoch) 183 | -------------------------------------------------------------------------------- /images/Framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/Framework.png -------------------------------------------------------------------------------- /images/JPU.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/JPU.png -------------------------------------------------------------------------------- /images/encnet_2009_001858.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/encnet_2009_001858.png -------------------------------------------------------------------------------- /images/encnet_ADE_val_00001086.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/encnet_ADE_val_00001086.png -------------------------------------------------------------------------------- /images/gt_2009_001858.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/gt_2009_001858.png -------------------------------------------------------------------------------- /images/gt_ADE_val_00001086.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/gt_ADE_val_00001086.png -------------------------------------------------------------------------------- /images/img_2009_001858.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/img_2009_001858.jpg -------------------------------------------------------------------------------- /images/img_ADE_val_00001086.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/img_ADE_val_00001086.jpg -------------------------------------------------------------------------------- /images/ours_2009_001858.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/ours_2009_001858.png -------------------------------------------------------------------------------- /images/ours_ADE_val_00001086.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/ours_ADE_val_00001086.png -------------------------------------------------------------------------------- /scripts/prepare_ade20k.py: -------------------------------------------------------------------------------- 1 | """Prepare ADE20K dataset""" 2 | import os 3 | import zipfile 4 | import argparse 5 | 6 | from encoding.utils import download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Initialize ADE20K dataset.', 13 | epilog='Example: python prepare_ade20k.py', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk') 16 | args = parser.parse_args() 17 | return args 18 | 19 | def download_ade(path, overwrite=False): 20 | _AUG_DOWNLOAD_URLS = [ 21 | ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip', '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'), 22 | ('http://data.csail.mit.edu/places/ADEchallenge/release_test.zip', 'e05747892219d10e9243933371a497e905a4860c'),] 23 | download_dir = os.path.join(path, 'downloads') 24 | mkdir(download_dir) 25 | for url, checksum in _AUG_DOWNLOAD_URLS: 26 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 27 | # extract 28 | with zipfile.ZipFile(filename,"r") as zip_ref: 29 | zip_ref.extractall(path=path) 30 | 31 | 32 | if __name__ == '__main__': 33 | args = parse_args() 34 | mkdir(os.path.expanduser('~/.encoding/data')) 35 | if args.download_dir is not None: 36 | if os.path.isdir(_TARGET_DIR): 37 | os.remove(_TARGET_DIR) 38 | # make symlink 39 | os.symlink(args.download_dir, _TARGET_DIR) 40 | else: 41 | download_ade(_TARGET_DIR, overwrite=False) 42 | -------------------------------------------------------------------------------- /scripts/prepare_cityscapes.py: -------------------------------------------------------------------------------- 1 | """Prepare ADE20K dataset""" 2 | import os 3 | import zipfile 4 | import argparse 5 | 6 | from encoding.utils import mkdir, check_sha1 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Initialize ADE20K dataset.', 13 | epilog='Example: python prepare_cityscapes.py', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk') 16 | args = parser.parse_args() 17 | return args 18 | 19 | def download_city(path): 20 | _CITY_DOWNLOAD_URLS = [ 21 | ('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'), 22 | ('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')] 23 | download_dir = os.path.join(path, 'downloads') 24 | mkdir(download_dir) 25 | for filename, checksum in _CITY_DOWNLOAD_URLS: 26 | if not check_sha1(filename, checksum): 27 | raise UserWarning('File {} is downloaded but the content hash does not match. ' \ 28 | 'The repo may be outdated or download may be incomplete. ' \ 29 | 'If the "repo_url" is overridden, consider switching to ' \ 30 | 'the default repo.'.format(filename)) 31 | # extract 32 | with zipfile.ZipFile(filename,"r") as zip_ref: 33 | zip_ref.extractall(path=path) 34 | print("Extracted", filename) 35 | 36 | if __name__ == '__main__': 37 | args = parse_args() 38 | mkdir(os.path.expanduser('~/.encoding/data')) 39 | mkdir(os.path.expanduser('~/.encoding/data/cityscapes')) 40 | if args.download_dir is not None: 41 | if os.path.isdir(_TARGET_DIR): 42 | os.remove(_TARGET_DIR) 43 | # make symlink 44 | os.symlink(args.download_dir, _TARGET_DIR) 45 | else: 46 | download_city(_TARGET_DIR) 47 | -------------------------------------------------------------------------------- /scripts/prepare_coco.py: -------------------------------------------------------------------------------- 1 | """Prepare MS COCO datasets""" 2 | import os 3 | import shutil 4 | import zipfile 5 | import argparse 6 | 7 | from encoding.utils import download, mkdir 8 | 9 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Initialize MS COCO dataset.', 14 | epilog='Example: python mscoco.py --download-dir ~/mscoco', 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') 17 | args = parser.parse_args() 18 | return args 19 | 20 | def download_coco(path, overwrite=False): 21 | _DOWNLOAD_URLS = [ 22 | ('http://images.cocodataset.org/zips/train2017.zip', 23 | '10ad623668ab00c62c096f0ed636d6aff41faca5'), 24 | ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', 25 | '8551ee4bb5860311e79dace7e79cb91e432e78b3'), 26 | ('http://images.cocodataset.org/zips/val2017.zip', 27 | '4950dc9d00dbe1c933ee0170f5797584351d2a41') 28 | ] 29 | mkdir(path) 30 | for url, checksum in _DOWNLOAD_URLS: 31 | filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) 32 | # extract 33 | with zipfile.ZipFile(filename) as zf: 34 | zf.extractall(path=path) 35 | 36 | def install_coco_api(): 37 | repo_url = "https://github.com/cocodataset/cocoapi" 38 | os.system("git clone " + repo_url) 39 | os.system("cd cocoapi/PythonAPI/ && python setup.py install") 40 | shutil.rmtree('cocoapi') 41 | try: 42 | import pycocotools 43 | except Exception: 44 | print("Installing COCO API failed, please install it manually %s"%(repo_url)) 45 | 46 | 47 | if __name__ == '__main__': 48 | args = parse_args() 49 | mkdir(os.path.expanduser('~/.encoding/data')) 50 | if args.download_dir is not None: 51 | if os.path.isdir(_TARGET_DIR): 52 | os.remove(_TARGET_DIR) 53 | # make symlink 54 | os.symlink(args.download_dir, _TARGET_DIR) 55 | else: 56 | download_coco(_TARGET_DIR, overwrite=False) 57 | install_coco_api() 58 | -------------------------------------------------------------------------------- /scripts/prepare_pascal.py: -------------------------------------------------------------------------------- 1 | """Prepare PASCAL VOC datasets""" 2 | import os 3 | import shutil 4 | import tarfile 5 | import argparse 6 | 7 | from encoding.utils import download, mkdir 8 | 9 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Initialize PASCAL VOC dataset.', 14 | epilog='Example: python prepare_pascal.py', 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') 17 | parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') 18 | parser.add_argument('--overwrite', action='store_true', help='overwrite downloaded files if set, in case they are corrputed') 19 | args = parser.parse_args() 20 | return args 21 | 22 | def download_voc(path, overwrite=False): 23 | _DOWNLOAD_URLS = [ 24 | ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', 25 | '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')] 26 | download_dir = os.path.join(path, 'downloads') 27 | mkdir(download_dir) 28 | for url, checksum in _DOWNLOAD_URLS: 29 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 30 | # extract 31 | with tarfile.open(filename) as tar: 32 | tar.extractall(path=path) 33 | 34 | def download_aug(path, overwrite=False): 35 | _AUG_DOWNLOAD_URLS = [ 36 | ('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz', '7129e0a480c2d6afb02b517bb18ac54283bfaa35')] 37 | download_dir = os.path.join(path, 'downloads') 38 | mkdir(download_dir) 39 | for url, checksum in _AUG_DOWNLOAD_URLS: 40 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 41 | # extract 42 | with tarfile.open(filename) as tar: 43 | tar.extractall(path=path) 44 | shutil.move(os.path.join(path, 'benchmark_RELEASE'), 45 | os.path.join(path, 'VOCaug')) 46 | filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt'] 47 | # generate trainval.txt 48 | with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile: 49 | for fname in filenames: 50 | fname = os.path.join(path, fname) 51 | with open(fname) as infile: 52 | for line in infile: 53 | outfile.write(line) 54 | 55 | 56 | if __name__ == '__main__': 57 | args = parse_args() 58 | mkdir(os.path.expanduser('~/.encoding/datasets')) 59 | if args.download_dir is not None: 60 | if os.path.isdir(_TARGET_DIR): 61 | os.remove(_TARGET_DIR) 62 | os.symlink(args.download_dir, _TARGET_DIR) 63 | else: 64 | download_voc(_TARGET_DIR, overwrite=False) 65 | download_aug(_TARGET_DIR, overwrite=False) 66 | -------------------------------------------------------------------------------- /scripts/prepare_pcontext.py: -------------------------------------------------------------------------------- 1 | """Prepare PASCAL Context dataset""" 2 | import os 3 | import shutil 4 | import tarfile 5 | import argparse 6 | 7 | from encoding.utils import download, mkdir 8 | 9 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 10 | PASD_URL="https://codalabuser.blob.core.windows.net/public/%s" 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser( 14 | description='Initialize PASCAL Context dataset.', 15 | epilog='Example: python prepare_pcontext.py', 16 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 17 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk') 18 | args = parser.parse_args() 19 | return args 20 | 21 | def download_ade(path, overwrite=False): 22 | _AUG_DOWNLOAD_URLS = [ 23 | ('http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar', 24 | 'bf9985e9f2b064752bf6bd654d89f017c76c395a'), 25 | ('https://codalabuser.blob.core.windows.net/public/trainval_merged.json', 26 | '169325d9f7e9047537fedca7b04de4dddf10b881'), 27 | # You can skip these if the network is slow, the dataset will automatically generate them. 28 | ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/train.pth', 29 | '4bfb49e8c1cefe352df876c9b5434e655c9c1d07'), 30 | ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/val.pth', 31 | 'ebedc94247ec616c57b9a2df15091784826a7b0c'), 32 | ] 33 | download_dir = os.path.join(path, 'downloads') 34 | mkdir(download_dir) 35 | for url, checksum in _AUG_DOWNLOAD_URLS: 36 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 37 | # extract 38 | if os.path.splitext(filename)[1] == '.tar': 39 | with tarfile.open(filename) as tar: 40 | tar.extractall(path=path) 41 | else: 42 | shutil.move(filename, os.path.join(path, 'VOCdevkit/VOC2010/'+os.path.basename(filename))) 43 | 44 | def install_pcontext_api(): 45 | repo_url = "https://github.com/zhanghang1989/detail-api" 46 | os.system("git clone " + repo_url) 47 | os.system("cd detail-api/PythonAPI/ && python setup.py install") 48 | shutil.rmtree('detail-api') 49 | try: 50 | import detail 51 | except Exception: 52 | print("Installing PASCAL Context API failed, please install it manually %s"%(repo_url)) 53 | 54 | 55 | if __name__ == '__main__': 56 | args = parse_args() 57 | mkdir(os.path.expanduser('~/.encoding/data')) 58 | if args.download_dir is not None: 59 | if os.path.isdir(_TARGET_DIR): 60 | os.remove(_TARGET_DIR) 61 | # make symlink 62 | os.symlink(args.download_dir, _TARGET_DIR) 63 | else: 64 | download_ade(_TARGET_DIR, overwrite=False) 65 | install_pcontext_api() 66 | --------------------------------------------------------------------------------