├── .editorconfig
├── .gitignore
├── LICENSE
├── README.md
├── encoding
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── ade20k.py
    │   ├── base.py
    │   ├── cityscapes.py
    │   ├── coco.py
    │   ├── pascal_aug.py
    │   ├── pascal_voc.py
    │   └── pcontext.py
    ├── dilated
    │   ├── __init__.py
    │   └── resnet.py
    ├── functions
    │   ├── __init__.py
    │   ├── encoding.py
    │   └── syncbn.py
    ├── models
    │   ├── __init__.py
    │   ├── base.py
    │   ├── deeplabv3.py
    │   ├── encnet.py
    │   ├── fcn.py
    │   ├── model_store.py
    │   ├── model_zoo.py
    │   └── psp.py
    ├── nn
    │   ├── __init__.py
    │   ├── comm.py
    │   ├── customize.py
    │   ├── encoding.py
    │   └── syncbn.py
    ├── parallel.py
    └── utils
    │   ├── __init__.py
    │   ├── files.py
    │   ├── lr_scheduler.py
    │   ├── metrics.py
    │   └── pallete.py
├── experiments
    └── segmentation
    │   ├── option.py
    │   ├── scripts
    │       ├── deeplab_res50_pcontext.sh
    │       ├── encnet_res101_ade20k_train.sh
    │       ├── encnet_res101_ade20k_trainval.sh
    │       ├── encnet_res101_pcontext.sh
    │       ├── encnet_res50_ade20k_train.sh
    │       ├── encnet_res50_ade20k_trainval.sh
    │       ├── encnet_res50_pcontext.sh
    │       └── psp_res50_pcontext.sh
    │   ├── test.py
    │   ├── test_fps_params.py
    │   ├── test_single_image.py
    │   └── train.py
├── images
    ├── Framework.png
    ├── JPU.png
    ├── encnet_2009_001858.png
    ├── encnet_ADE_val_00001086.png
    ├── gt_2009_001858.png
    ├── gt_ADE_val_00001086.png
    ├── img_2009_001858.jpg
    ├── img_ADE_val_00001086.jpg
    ├── ours_2009_001858.png
    └── ours_ADE_val_00001086.png
└── scripts
    ├── prepare_ade20k.py
    ├── prepare_cityscapes.py
    ├── prepare_coco.py
    ├── prepare_pascal.py
    └── prepare_pcontext.py


/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 | 
3 | [*]
4 | indent_style = space
5 | indent_size = 4
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.DS_Store
 2 | *.swp
 3 | *.pyc
 4 | 
 5 | version.py
 6 | 
 7 | runs/
 8 | data/
 9 | build/
10 | results/
11 | 
12 | .idea


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017-     Hang Zhang. All rights reserved.
 4 | Copyright (c) 2018-     Amazon.com, Inc. or its affiliates. All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | 1. Redistributions of source code must retain the above copyright
14 |    notice, this list of conditions and the following disclaimer.
15 | 
16 | 2. Redistributions in binary form must reproduce the above copyright
17 |    notice, this list of conditions and the following disclaimer in the
18 |    documentation and/or other materials provided with the distribution.
19 | 
20 | 3. Neither the name of Amazon Inc nor the names of the contributors may be
21 |    used to endorse or promote products derived from this software without
22 |    specific prior written permission.
23 | 
24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 | SOFTWARE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation
  2 | [[Project]](http://wuhuikai.me/FastFCNProject/)    [[Paper]](http://wuhuikai.me/FastFCNProject/fast_fcn.pdf)    [[arXiv]](https://arxiv.org/abs/1903.11816)    [[Home]](http://wuhuikai.me) 
  3 | 
  4 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/fastfcn-rethinking-dilated-convolution-in-the/semantic-segmentation-pascal-context)](https://paperswithcode.com/sota/semantic-segmentation-pascal-context?p=fastfcn-rethinking-dilated-convolution-in-the)
  5 | 
  6 | Official implementation of **FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation**.   
  7 | A **Faster**, **Stronger** and **Lighter** framework for semantic segmentation, achieving the state-of-the-art performance and more than **3x** acceleration.
  8 | ```
  9 | @inproceedings{wu2019fastfcn,
 10 |   title     = {FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation},
 11 |   author    = {Wu, Huikai and Zhang, Junge and Huang, Kaiqi and Liang, Kongming and Yu Yizhou},
 12 |   booktitle = {arXiv preprint arXiv:1903.11816},
 13 |   year = {2019}
 14 | }
 15 | ```
 16 | Contact: Hui-Kai Wu (huikaiwu@icloud.com)
 17 | 
 18 | ## Update
 19 | **2020-04-15: Now support inference on a single image !!!**
 20 | ```bash
 21 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test_single_image --dataset [pcontext|ade20k] \
 22 |     --model [encnet|deeplab|psp] --jpu [JPU|JPU_X] \
 23 |     --backbone [resnet50|resnet101] [--ms] --resume {MODEL} --input-path {INPUT} --save-path {OUTPUT}
 24 | ```
 25 | 
 26 | **2020-04-15: New joint upsampling module is now available !!!**
 27 | - `--jpu [JPU|JPU_X]`: JPU is the original module in the arXiv paper; JPU_X is a pyramid version of JPU.
 28 | 
 29 | **2020-02-20: `FastFCN` can now run on every `OS` with `PyTorch>=1.1.0` and `Python==3.*.*`**
 30 | - Replace all `C/C++` extensions with `pure python` extensions.
 31 | 
 32 | ## Version
 33 | 1. Original code, producing the results reported in the arXiv paper. [[branch:v1.0.0]](https://github.com/wuhuikai/FastFCN/tree/v1.0.0)
 34 | 2. Pure PyTorch code, with `torch.nn.DistributedDataParallel` and `torch.nn.SyncBatchNorm`. [[branch:latest]](https://github.com/wuhuikai/FastFCN/tree/latest)
 35 | 3. Pure Python code. [[branch:master]](https://github.com/wuhuikai/FastFCN)
 36 | 
 37 | ## Overview
 38 | ### Framework
 39 | ![](images/Framework.png)
 40 | ### Joint Pyramid Upsampling (JPU)
 41 | ![](images/JPU.png)
 42 | 
 43 | ## Install
 44 | 1. [PyTorch >= 1.1.0](https://pytorch.org/get-started/locally) (Note: The code is test in the environment with `python=3.6, cuda=9.0`)
 45 | 2. Download **FastFCN**
 46 |    ```
 47 |    git clone https://github.com/wuhuikai/FastFCN.git
 48 |    cd FastFCN
 49 |    ```
 50 | 3. Install Requirements
 51 |    ```
 52 |    nose
 53 |    tqdm
 54 |    scipy
 55 |    cython
 56 |    requests
 57 |    ```
 58 | 
 59 | ## Train and Test
 60 | ### PContext
 61 | ```
 62 | python -m scripts.prepare_pcontext
 63 | ```
 64 | | Method | Backbone | mIoU | FPS | Model | Scripts |
 65 | |:----|:----|:---:|:---:|:---:|:---:|
 66 | | EncNet | ResNet-50 | 49.91 | 18.77 |  |  |
 67 | | EncNet+JPU (ours) | ResNet-50 | **51.05** | **37.56** | [GoogleDrive](https://drive.google.com/open?id=1Hy_GWVnTyJBNv4Hejwh5LKa8S_ph27y0) | [bash](experiments/segmentation/scripts/encnet_res50_pcontext.sh) |
 68 | | PSP | ResNet-50 | 50.58 | 18.08 |  |  |
 69 | | PSP+JPU (ours) | ResNet-50 | **50.89** | **28.48** | [GoogleDrive](https://drive.google.com/open?id=1fJItp7B7uz6s69fmquqtm18A72EJE5jm) | [bash](experiments/segmentation/scripts/psp_res50_pcontext.sh) |
 70 | | DeepLabV3 | ResNet-50 | 49.19 | 15.99 |  |  |
 71 | | DeepLabV3+JPU (ours) | ResNet-50 | **50.07** | **20.67** | [GoogleDrive](https://drive.google.com/open?id=11s20bUkPrZXXmFqYpwC_h1G57CB8g2u9) | [bash](experiments/segmentation/scripts/deeplab_res50_pcontext.sh) |
 72 | | EncNet | ResNet-101 | 52.60 (MS) | 10.51 |  |  |
 73 | | EncNet+JPU (ours) | ResNet-101 | **54.03 (MS)** | **32.02** | [GoogleDrive](https://drive.google.com/open?id=1GOIma8cXTKfTa2qSIcDO8EmctyoDzHuV) | [bash](experiments/segmentation/scripts/encnet_res101_pcontext.sh) |
 74 | 
 75 | ### ADE20K
 76 | ```
 77 | python -m scripts.prepare_ade20k
 78 | ```
 79 | #### Training Set
 80 | | Method | Backbone | mIoU (MS) | Model | Scripts |
 81 | |:----|:----|:---:|:---:|:---:|
 82 | | EncNet | ResNet-50 | 41.11 | | |
 83 | | EncNet+JPU (ours) | ResNet-50 | **42.75** | [GoogleDrive](https://drive.google.com/open?id=1EdHDjNDtPmVgSD7RYjeyXy7SSYpTzYyN) | [bash](experiments/segmentation/scripts/encnet_res50_ade20k_train.sh) |
 84 | | EncNet | ResNet-101 | 44.65 | | |
 85 | | EncNet+JPU (ours) | ResNet-101 | 44.34 | [GoogleDrive](https://drive.google.com/open?id=1WFkbf8OWJmLGnOz5M_IxIZtiHKn2_bEp) | [bash](experiments/segmentation/scripts/encnet_res101_ade20k_train.sh) |
 86 | #### Training Set + Val Set
 87 | | Method | Backbone | FinalScore (MS) | Model | Scripts |
 88 | |:----|:----|:---:|:---:|:---:|
 89 | | EncNet+JPU (ours) | ResNet-50 |  | [GoogleDrive](https://drive.google.com/open?id=10u8ISncp0NukwQb0K94GsH_AHgT6hgxc) | [bash](experiments/segmentation/scripts/encnet_res50_ade20k_trainval.sh) |
 90 | | EncNet | ResNet-101 | 55.67 | | |
 91 | | EncNet+JPU (ours) | ResNet-101 | **55.84** | [GoogleDrive](https://drive.google.com/open?id=15gdJeKFy7OXhAr6mQNYvu25LiPwFfQ-Z) | [bash](experiments/segmentation/scripts/encnet_res101_ade20k_trainval.sh) |
 92 | 
 93 | **Note:** EncNet (ResNet-101) is trained with `crop_size=576`, while EncNet+JPU (ResNet-101) is trained with `crop_size=480` for fitting 4 images into a 12G GPU.
 94 | 
 95 | ## Visual Results
 96 | |Dataset|Input|GT|EncNet|Ours|
 97 | |:----|:---:|:---:|:---:|:---:|
 98 | |PContext|![](images/img_2009_001858.jpg)|![](images/gt_2009_001858.png)|![](images/encnet_2009_001858.png)|![](images/ours_2009_001858.png)|
 99 | |ADE20K|![](images/img_ADE_val_00001086.jpg)|![](images/gt_ADE_val_00001086.png)|![](images/encnet_ADE_val_00001086.png)|![](images/ours_ADE_val_00001086.png)|
100 | 
101 | ### [More Visual Results](http://wuhuikai.me/FastFCNProject/#visual)
102 | 
103 | ## Acknowledgement
104 | Code borrows heavily from [PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding).
105 | 


--------------------------------------------------------------------------------
/encoding/__init__.py:
--------------------------------------------------------------------------------
 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 | ## Created by: Hang Zhang
 3 | ## ECE Department, Rutgers University
 4 | ## Email: zhang.hang@rutgers.edu
 5 | ## Copyright (c) 2017
 6 | ##
 7 | ## This source code is licensed under the MIT-style license found in the
 8 | ## LICENSE file in the root directory of this source tree
 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 | 
11 | """An optimized PyTorch package with CUDA backend."""
12 | from . import nn, functions, dilated, parallel, utils, models, datasets
13 | 


--------------------------------------------------------------------------------
/encoding/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import *
 2 | from .coco import COCOSegmentation
 3 | from .ade20k import ADE20KSegmentation
 4 | from .pascal_voc import VOCSegmentation
 5 | from .pascal_aug import VOCAugSegmentation
 6 | from .pcontext import ContextSegmentation
 7 | from .cityscapes import CitySegmentation
 8 | 
 9 | datasets = {
10 |     'coco': COCOSegmentation,
11 |     'ade20k': ADE20KSegmentation,
12 |     'pascal_voc': VOCSegmentation,
13 |     'pascal_aug': VOCAugSegmentation,
14 |     'pcontext': ContextSegmentation,
15 |     'citys': CitySegmentation,
16 | }
17 | 
18 | def get_segmentation_dataset(name, **kwargs):
19 |     return datasets[name.lower()](**kwargs)
20 | 


--------------------------------------------------------------------------------
/encoding/datasets/ade20k.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | # Created by: Hang Zhang
  3 | # Email: zhang.hang@rutgers.edu
  4 | # Copyright (c) 2017
  5 | ###########################################################################
  6 | 
  7 | import os
  8 | import numpy as np
  9 | 
 10 | import torch
 11 | 
 12 | from PIL import Image
 13 | from .base import BaseDataset
 14 | 
 15 | class ADE20KSegmentation(BaseDataset):
 16 |     BASE_DIR = 'ADEChallengeData2016'
 17 |     NUM_CLASS = 150
 18 |     def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
 19 |                  mode=None, transform=None, target_transform=None, **kwargs):
 20 |         super(ADE20KSegmentation, self).__init__(
 21 |             root, split, mode, transform, target_transform, **kwargs)
 22 |         # assert exists and prepare dataset automatically
 23 |         root = os.path.join(root, self.BASE_DIR)
 24 |         assert os.path.exists(root), "Please setup the dataset using" + \
 25 |             "encoding/scripts/prepare_ade20k.py"
 26 |         self.images, self.masks = _get_ade20k_pairs(root, split)
 27 |         if split != 'test':
 28 |             assert (len(self.images) == len(self.masks))
 29 |         if len(self.images) == 0:
 30 |             raise(RuntimeError("Found 0 images in subfolders of: \
 31 |                 " + root + "\n"))
 32 | 
 33 |     def __getitem__(self, index):
 34 |         img = Image.open(self.images[index]).convert('RGB')
 35 |         if self.mode == 'test':
 36 |             if self.transform is not None:
 37 |                 img = self.transform(img)
 38 |             return img, os.path.basename(self.images[index])
 39 |         mask = Image.open(self.masks[index])
 40 |         # synchrosized transform
 41 |         if self.mode == 'train':
 42 |             img, mask = self._sync_transform(img, mask)
 43 |         elif self.mode == 'val':
 44 |             img, mask = self._val_sync_transform(img, mask)
 45 |         else:
 46 |             assert self.mode == 'testval'
 47 |             mask = self._mask_transform(mask)
 48 |         # general resize, normalize and toTensor
 49 |         if self.transform is not None:
 50 |             img = self.transform(img)
 51 |         if self.target_transform is not None:
 52 |             mask = self.target_transform(mask)
 53 |         return img, mask
 54 | 
 55 |     def _mask_transform(self, mask):
 56 |         target = np.array(mask).astype('int64') - 1
 57 |         return torch.from_numpy(target)
 58 | 
 59 |     def __len__(self):
 60 |         return len(self.images)
 61 | 
 62 |     @property
 63 |     def pred_offset(self):
 64 |         return 1
 65 | 
 66 | 
 67 | def _get_ade20k_pairs(folder, split='train'):
 68 |     def get_path_pairs(img_folder, mask_folder):
 69 |         img_paths = []
 70 |         mask_paths = []
 71 |         for filename in os.listdir(img_folder):
 72 |             basename, _ = os.path.splitext(filename)
 73 |             if filename.endswith(".jpg"):
 74 |                 imgpath = os.path.join(img_folder, filename)
 75 |                 maskname = basename + '.png'
 76 |                 maskpath = os.path.join(mask_folder, maskname)
 77 |                 if os.path.isfile(maskpath):
 78 |                     img_paths.append(imgpath)
 79 |                     mask_paths.append(maskpath)
 80 |                 else:
 81 |                     print('cannot find the mask:', maskpath)
 82 |         return img_paths, mask_paths
 83 | 
 84 |     if split == 'train':
 85 |         img_folder = os.path.join(folder, 'images/training')
 86 |         mask_folder = os.path.join(folder, 'annotations/training')
 87 |         img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
 88 |         print('len(img_paths):', len(img_paths))
 89 |         assert len(img_paths) == 20210
 90 |     elif split == 'val':
 91 |         img_folder = os.path.join(folder, 'images/validation')
 92 |         mask_folder = os.path.join(folder, 'annotations/validation')
 93 |         img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
 94 |         assert len(img_paths) == 2000
 95 |     elif split == 'test':
 96 |         folder = os.path.join(folder, '../release_test')
 97 |         with open(os.path.join(folder, 'list.txt')) as f:
 98 |             img_paths = [os.path.join(folder, 'testing', line.strip()) for line in f]
 99 |         assert len(img_paths) == 3352
100 |         return img_paths, None
101 |     else:
102 |         assert split == 'trainval'
103 |         train_img_folder = os.path.join(folder, 'images/training')
104 |         train_mask_folder = os.path.join(folder, 'annotations/training')
105 |         val_img_folder = os.path.join(folder, 'images/validation')
106 |         val_mask_folder = os.path.join(folder, 'annotations/validation')
107 |         train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
108 |         val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
109 |         img_paths = train_img_paths + val_img_paths
110 |         mask_paths = train_mask_paths + val_mask_paths
111 |         assert len(img_paths) == 22210
112 |     return img_paths, mask_paths
113 | 


--------------------------------------------------------------------------------
/encoding/datasets/base.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | # Created by: Hang Zhang 
  3 | # Email: zhang.hang@rutgers.edu 
  4 | # Copyright (c) 2017
  5 | ###########################################################################
  6 | 
  7 | import random
  8 | import numpy as np
  9 | 
 10 | import torch
 11 | import torch.utils.data as data
 12 | 
 13 | from PIL import Image, ImageOps, ImageFilter
 14 | 
 15 | __all__ = ['BaseDataset', 'test_batchify_fn']
 16 | 
 17 | class BaseDataset(data.Dataset):
 18 |     def __init__(self, root, split, mode=None, transform=None, 
 19 |                  target_transform=None, base_size=520, crop_size=480):
 20 |         self.root = root
 21 |         self.transform = transform
 22 |         self.target_transform = target_transform
 23 |         self.split = split
 24 |         self.mode = mode if mode is not None else split
 25 |         self.base_size = base_size
 26 |         self.crop_size = crop_size
 27 |         if self.mode == 'train':
 28 |             print('BaseDataset: base_size {}, crop_size {}'. \
 29 |                 format(base_size, crop_size))
 30 | 
 31 |     def __getitem__(self, index):
 32 |         raise NotImplemented
 33 | 
 34 |     @property
 35 |     def num_class(self):
 36 |         return self.NUM_CLASS
 37 | 
 38 |     @property
 39 |     def pred_offset(self):
 40 |         raise NotImplemented
 41 | 
 42 |     def make_pred(self, x):
 43 |         return x + self.pred_offset
 44 | 
 45 |     def _val_sync_transform(self, img, mask):
 46 |         outsize = self.crop_size
 47 |         short_size = outsize
 48 |         w, h = img.size
 49 |         if w > h:
 50 |             oh = short_size
 51 |             ow = int(1.0 * w * oh / h)
 52 |         else:
 53 |             ow = short_size
 54 |             oh = int(1.0 * h * ow / w)
 55 |         img = img.resize((ow, oh), Image.BILINEAR)
 56 |         mask = mask.resize((ow, oh), Image.NEAREST)
 57 |         # center crop
 58 |         w, h = img.size
 59 |         x1 = int(round((w - outsize) / 2.))
 60 |         y1 = int(round((h - outsize) / 2.))
 61 |         img = img.crop((x1, y1, x1+outsize, y1+outsize))
 62 |         mask = mask.crop((x1, y1, x1+outsize, y1+outsize))
 63 |         # final transform
 64 |         return img, self._mask_transform(mask)
 65 | 
 66 |     def _sync_transform(self, img, mask):
 67 |         # random mirror
 68 |         if random.random() < 0.5:
 69 |             img = img.transpose(Image.FLIP_LEFT_RIGHT)
 70 |             mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
 71 |         crop_size = self.crop_size
 72 |         # random scale (short edge from 480 to 720)
 73 |         short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0))
 74 |         w, h = img.size
 75 |         if h > w:
 76 |             ow = short_size
 77 |             oh = int(1.0 * h * ow / w)
 78 |         else:
 79 |             oh = short_size
 80 |             ow = int(1.0 * w * oh / h)
 81 |         img = img.resize((ow, oh), Image.BILINEAR)
 82 |         mask = mask.resize((ow, oh), Image.NEAREST)
 83 |         # pad crop
 84 |         if short_size < crop_size:
 85 |             padh = crop_size - oh if oh < crop_size else 0
 86 |             padw = crop_size - ow if ow < crop_size else 0
 87 |             img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
 88 |             mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
 89 |         # random crop crop_size
 90 |         w, h = img.size
 91 |         x1 = random.randint(0, w - crop_size)
 92 |         y1 = random.randint(0, h - crop_size)
 93 |         img = img.crop((x1, y1, x1+crop_size, y1+crop_size))
 94 |         mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size))
 95 |         # gaussian blur as in PSP
 96 |         if random.random() < 0.5:
 97 |             img = img.filter(ImageFilter.GaussianBlur(
 98 |                 radius=random.random()))
 99 |         # final transform
100 |         return img, self._mask_transform(mask)
101 | 
102 |     def _mask_transform(self, mask):
103 |         return torch.from_numpy(np.array(mask)).long()
104 | 
105 | 
106 | def test_batchify_fn(data):
107 |     error_msg = "batch must contain tensors, tuples or lists; found {}"
108 |     if isinstance(data[0], (str, torch.Tensor)):
109 |         return list(data)
110 |     elif isinstance(data[0], (tuple, list)):
111 |         data = zip(*data)
112 |         return [test_batchify_fn(i) for i in data]
113 |     raise TypeError((error_msg.format(type(data[0]))))
114 | 


--------------------------------------------------------------------------------
/encoding/datasets/cityscapes.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | # Created by: Hang Zhang
  3 | # Email: zhang.hang@rutgers.edu
  4 | # Copyright (c) 2018
  5 | ###########################################################################
  6 | 
  7 | import os
  8 | import random
  9 | import numpy as np
 10 | 
 11 | import torch
 12 | 
 13 | from tqdm import tqdm
 14 | from PIL import Image, ImageOps, ImageFilter
 15 | 
 16 | from .base import BaseDataset
 17 | 
 18 | class CitySegmentation(BaseDataset):
 19 |     NUM_CLASS = 19
 20 |     def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
 21 |                  mode=None, transform=None, target_transform=None, **kwargs):
 22 |         super(CitySegmentation, self).__init__(
 23 |             root, split, mode, transform, target_transform, **kwargs)
 24 |         self.images, self.mask_paths = get_city_pairs(self.root, self.split)
 25 |         assert (len(self.images) == len(self.mask_paths))
 26 |         if len(self.images) == 0:
 27 |             raise RuntimeError("Found 0 images in subfolders of: \
 28 |                 " + self.root + "\n")
 29 |         self._indices = np.array(range(-1, 19))
 30 |         self._classes = np.array([0, 7, 8, 11, 12, 13, 17, 19, 20, 21, 22,
 31 |                                   23, 24, 25, 26, 27, 28, 31, 32, 33])
 32 |         self._key = np.array([-1, -1, -1, -1, -1, -1,
 33 |                               -1, -1,  0,  1, -1, -1, 
 34 |                               2,   3,  4, -1, -1, -1,
 35 |                               5,  -1,  6,  7,  8,  9,
 36 |                               10, 11, 12, 13, 14, 15,
 37 |                               -1, -1, 16, 17, 18])
 38 |         self._mapping = np.array(range(-1, len(self._key)-1)).astype('int32')
 39 | 
 40 |     def _class_to_index(self, mask):
 41 |         # assert the values
 42 |         values = np.unique(mask)
 43 |         for i in range(len(values)):
 44 |             assert(values[i] in self._mapping)
 45 |         index = np.digitize(mask.ravel(), self._mapping, right=True)
 46 |         return self._key[index].reshape(mask.shape)
 47 | 
 48 |     def _preprocess(self, mask_file):
 49 |         if os.path.exists(mask_file):
 50 |             masks = torch.load(mask_file)
 51 |             return masks
 52 |         masks = []
 53 |         print("Preprocessing mask, this will take a while." + \
 54 |             "But don't worry, it only run once for each split.")
 55 |         tbar = tqdm(self.mask_paths)
 56 |         for fname in tbar:
 57 |             tbar.set_description("Preprocessing masks {}".format(fname))
 58 |             mask = Image.fromarray(self._class_to_index(
 59 |                 np.array(Image.open(fname))).astype('int8'))
 60 |             masks.append(mask)
 61 |         torch.save(masks, mask_file)
 62 |         return masks
 63 | 
 64 |     def __getitem__(self, index):
 65 |         img = Image.open(self.images[index]).convert('RGB')
 66 |         if self.mode == 'test':
 67 |             if self.transform is not None:
 68 |                 img = self.transform(img)
 69 |             return img, os.path.basename(self.images[index])
 70 |         mask = Image.open(self.mask_paths[index])
 71 |         # synchrosized transform
 72 |         if self.mode == 'train':
 73 |             img, mask = self._sync_transform(img, mask)
 74 |         elif self.mode == 'val':
 75 |             img, mask = self._val_sync_transform(img, mask)
 76 |         else:
 77 |             assert self.mode == 'testval'
 78 |             mask = self._mask_transform(mask)
 79 |         # general resize, normalize and toTensor
 80 |         if self.transform is not None:
 81 |             img = self.transform(img)
 82 |         if self.target_transform is not None:
 83 |             mask = self.target_transform(mask)
 84 |         return img, mask
 85 | 
 86 |     def _sync_transform(self, img, mask):
 87 |         # random mirror
 88 |         if random.random() < 0.5:
 89 |             img = img.transpose(Image.FLIP_LEFT_RIGHT)
 90 |             mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
 91 |         crop_size = self.crop_size
 92 |         # random scale (short edge from 480 to 720)
 93 |         short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0))
 94 |         w, h = img.size
 95 |         if h > w:
 96 |             ow = short_size
 97 |             oh = int(1.0 * h * ow / w)
 98 |         else:
 99 |             oh = short_size
100 |             ow = int(1.0 * w * oh / h)
101 |         img = img.resize((ow, oh), Image.BILINEAR)
102 |         mask = mask.resize((ow, oh), Image.NEAREST)
103 |         # random rotate -10~10, mask using NN rotate
104 |         deg = random.uniform(-10, 10)
105 |         img = img.rotate(deg, resample=Image.BILINEAR)
106 |         mask = mask.rotate(deg, resample=Image.NEAREST)
107 |         # pad crop
108 |         if short_size < crop_size:
109 |             padh = crop_size - oh if oh < crop_size else 0
110 |             padw = crop_size - ow if ow < crop_size else 0
111 |             img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
112 |             mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
113 |         # random crop crop_size
114 |         w, h = img.size
115 |         x1 = random.randint(0, w - crop_size)
116 |         y1 = random.randint(0, h - crop_size)
117 |         img = img.crop((x1, y1, x1+crop_size, y1+crop_size))
118 |         mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size))
119 |         # gaussian blur as in PSP
120 |         if random.random() < 0.5:
121 |             img = img.filter(ImageFilter.GaussianBlur(
122 |                 radius=random.random()))
123 |         # final transform
124 |         return img, self._mask_transform(mask)
125 | 
126 |     def _mask_transform(self, mask):
127 |         target = self._class_to_index(np.array(mask).astype('int32'))
128 |         return torch.from_numpy(target).long()
129 | 
130 |     def __len__(self):
131 |         return len(self.images)
132 | 
133 |     def make_pred(self, mask):
134 |         values = np.unique(mask)
135 |         for i in range(len(values)):
136 |             assert(values[i] in self._indices)
137 |         index = np.digitize(mask.ravel(), self._indices, right=True)
138 |         return self._classes[index].reshape(mask.shape)
139 | 
140 | 
141 | def get_city_pairs(folder, split='train'):
142 |     def get_path_pairs(img_folder, mask_folder):
143 |         img_paths = []  
144 |         mask_paths = []  
145 |         for root, directories, files in os.walk(img_folder):
146 |             for filename in files:
147 |                 if filename.endswith(".png"):
148 |                     imgpath = os.path.join(root, filename)
149 |                     foldername = os.path.basename(os.path.dirname(imgpath))
150 |                     maskname = filename.replace('leftImg8bit','gtFine_labelIds')
151 |                     maskpath = os.path.join(mask_folder, foldername, maskname)
152 |                     if os.path.isfile(imgpath) and os.path.isfile(maskpath):
153 |                         img_paths.append(imgpath)
154 |                         mask_paths.append(maskpath)
155 |                     else:
156 |                         print('cannot find the mask or image:', imgpath, maskpath)
157 |         print('Found {} images in the folder {}'.format(len(img_paths), img_folder))
158 |         return img_paths, mask_paths
159 | 
160 |     if split == 'train' or split == 'val' or split == 'test':
161 |         img_folder = os.path.join(folder, 'leftImg8bit/' + split)
162 |         mask_folder = os.path.join(folder, 'gtFine/'+ split)
163 |         img_paths, mask_paths = get_path_pairs(img_folder, mask_folder)
164 |         return img_paths, mask_paths
165 |     else:
166 |         assert split == 'trainval'
167 |         print('trainval set')
168 |         train_img_folder = os.path.join(folder, 'leftImg8bit/train')
169 |         train_mask_folder = os.path.join(folder, 'gtFine/train')
170 |         val_img_folder = os.path.join(folder, 'leftImg8bit/val')
171 |         val_mask_folder = os.path.join(folder, 'gtFine/val')
172 |         train_img_paths, train_mask_paths = get_path_pairs(train_img_folder, train_mask_folder)
173 |         val_img_paths, val_mask_paths = get_path_pairs(val_img_folder, val_mask_folder)
174 |         img_paths = train_img_paths + val_img_paths
175 |         mask_paths = train_mask_paths + val_mask_paths
176 |     return img_paths, mask_paths
177 | 


--------------------------------------------------------------------------------
/encoding/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | import torch
  5 | 
  6 | from PIL import Image
  7 | from tqdm import trange
  8 | 
  9 | from .base import BaseDataset
 10 | 
 11 | class COCOSegmentation(BaseDataset):
 12 |     NUM_CLASS = 21
 13 |     CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4,
 14 |         1, 64, 20, 63, 7, 72]
 15 |     def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
 16 |                  mode=None, transform=None, target_transform=None, **kwargs):
 17 |         super(COCOSegmentation, self).__init__(
 18 |             root, split, mode, transform, target_transform, **kwargs)
 19 |         from pycocotools.coco import COCO
 20 |         from pycocotools import mask
 21 |         if split == 'train':
 22 |             print('train set')
 23 |             ann_file = os.path.join(root, 'annotations/instances_train2017.json')
 24 |             ids_file = os.path.join(root, 'annotations/train_ids.pth')
 25 |             self.root = os.path.join(root, 'train2017')
 26 |         else:
 27 |             print('val set')
 28 |             ann_file = os.path.join(root, 'annotations/instances_val2017.json')
 29 |             ids_file = os.path.join(root, 'annotations/val_ids.pth')
 30 |             self.root = os.path.join(root, 'val2017')
 31 |         self.coco = COCO(ann_file)
 32 |         self.coco_mask = mask
 33 |         if os.path.exists(ids_file):
 34 |             self.ids = torch.load(ids_file)
 35 |         else:
 36 |             ids = list(self.coco.imgs.keys())
 37 |             self.ids = self._preprocess(ids, ids_file)
 38 |         self.transform = transform
 39 |         self.target_transform = target_transform
 40 | 
 41 |     def __getitem__(self, index):
 42 |         coco = self.coco
 43 |         img_id = self.ids[index]
 44 |         img_metadata = coco.loadImgs(img_id)[0]
 45 |         path = img_metadata['file_name']
 46 |         img = Image.open(os.path.join(self.root, path)).convert('RGB')
 47 |         cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
 48 |         mask = Image.fromarray(self._gen_seg_mask(
 49 |             cocotarget, img_metadata['height'], img_metadata['width']))
 50 |         # synchrosized transform
 51 |         if self.mode == 'train':
 52 |             img, mask = self._sync_transform(img, mask)
 53 |         elif self.mode == 'val':
 54 |             img, mask = self._val_sync_transform(img, mask)
 55 |         else:
 56 |             assert self.mode == 'testval'
 57 |             mask = self._mask_transform(mask)
 58 |         # general resize, normalize and toTensor
 59 |         if self.transform is not None:
 60 |             img = self.transform(img)
 61 |         if self.target_transform is not None:
 62 |             mask = self.target_transform(mask)
 63 |         return img, mask
 64 | 
 65 |     def __len__(self):
 66 |         return len(self.ids)
 67 | 
 68 |     def _gen_seg_mask(self, target, h, w):
 69 |         mask = np.zeros((h, w), dtype=np.uint8)
 70 |         coco_mask = self.coco_mask
 71 |         for instance in target:
 72 |             rle = coco_mask.frPyObjects(instance['segmentation'], h, w)
 73 |             m = coco_mask.decode(rle)
 74 |             cat = instance['category_id']
 75 |             if cat in self.CAT_LIST:
 76 |                 c = self.CAT_LIST.index(cat)
 77 |             else:
 78 |                 continue
 79 |             if len(m.shape) < 3:
 80 |                 mask[:, :] += (mask == 0) * (m * c)
 81 |             else:
 82 |                 mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8)
 83 |         return mask
 84 | 
 85 |     def _preprocess(self, ids, ids_file):
 86 |         print("Preprocessing mask, this will take a while." + \
 87 |             "But don't worry, it only run once for each split.")
 88 |         tbar = trange(len(ids))
 89 |         new_ids = []
 90 |         for i in tbar:
 91 |             img_id = ids[i]
 92 |             cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
 93 |             img_metadata = self.coco.loadImgs(img_id)[0]
 94 |             mask = self._gen_seg_mask(cocotarget, img_metadata['height'], 
 95 |                                       img_metadata['width'])
 96 |             # more than 1k pixels
 97 |             if (mask > 0).sum() > 1000:
 98 |                 new_ids.append(img_id)
 99 |             tbar.set_description('Doing: {}/{}, got {} qualified images'.\
100 |                 format(i, len(ids), len(new_ids)))
101 |         print('Found number of qualified images: ', len(new_ids))
102 |         torch.save(new_ids, ids_file)
103 |         return new_ids
104 | """
105 | NUM_CHANNEL = 91
106 | [] background
107 | [5] airplane
108 | [2] bicycle
109 | [16] bird
110 | [9] boat
111 | [44] bottle
112 | [6] bus
113 | [3] car
114 | [17] cat
115 | [62] chair
116 | [21] cow
117 | [67] dining table
118 | [18] dog
119 | [19] horse
120 | [4] motorcycle
121 | [1] person
122 | [64] potted plant
123 | [20] sheep
124 | [63] couch
125 | [7] train
126 | [72] tv
127 | """
128 | 


--------------------------------------------------------------------------------
/encoding/datasets/pascal_aug.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import scipy.io
 3 | 
 4 | from PIL import Image
 5 | 
 6 | from .base import BaseDataset
 7 | 
 8 | class VOCAugSegmentation(BaseDataset):
 9 |     voc = [
10 |         'background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', 
11 |         'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
12 |         'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
13 |         'tv'
14 |     ]
15 |     NUM_CLASS = 21
16 |     TRAIN_BASE_DIR = 'VOCaug/dataset/'
17 |     def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
18 |                  mode=None, transform=None, target_transform=None, **kwargs):
19 |         super(VOCAugSegmentation, self).__init__(root, split, mode, transform,
20 |                                                  target_transform, **kwargs)
21 |         # train/val/test splits are pre-cut
22 |         _voc_root = os.path.join(root, self.TRAIN_BASE_DIR)
23 |         _mask_dir = os.path.join(_voc_root, 'cls')
24 |         _image_dir = os.path.join(_voc_root, 'img')
25 |         if self.split == 'train':
26 |             _split_f = os.path.join(_voc_root, 'trainval.txt')
27 |         elif self.split == 'val':
28 |             _split_f = os.path.join(_voc_root, 'val.txt')
29 |         else:
30 |             raise RuntimeError('Unknown dataset split.')
31 |         self.images = []
32 |         self.masks = []
33 |         with open(os.path.join(_split_f), "r") as lines:
34 |             for line in lines:
35 |                 _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg")
36 |                 assert os.path.isfile(_image)
37 |                 self.images.append(_image)
38 |                 if self.mode != 'test':
39 |                     _mask = os.path.join(_mask_dir, line.rstrip('\n')+".mat")
40 |                     assert os.path.isfile(_mask)
41 |                     self.masks.append(_mask)
42 | 
43 |         assert (len(self.images) == len(self.masks))
44 | 
45 |     def __getitem__(self, index):
46 |         _img = Image.open(self.images[index]).convert('RGB')
47 |         if self.mode == 'test':
48 |             if self.transform is not None:
49 |                 _img = self.transform(_img)
50 |             return _img, os.path.basename(self.images[index])
51 |         _target = self._load_mat(self.masks[index])
52 |         # synchrosized transform
53 |         if self.mode == 'train':
54 |             _img, _target = self._sync_transform( _img, _target)
55 |         elif self.mode == 'val':
56 |             _img, _target = self._val_sync_transform( _img, _target)
57 |         # general resize, normalize and toTensor
58 |         if self.transform is not None:
59 |             _img = self.transform(_img)
60 |         if self.target_transform is not None:
61 |             _target = self.target_transform(_target)
62 |         return _img, _target
63 |     
64 |     def _load_mat(self, filename):
65 |         mat = scipy.io.loadmat(filename, mat_dtype=True, squeeze_me=True, 
66 |             struct_as_record=False)
67 |         mask = mat['GTcls'].Segmentation
68 |         return Image.fromarray(mask)
69 | 
70 |     def __len__(self):
71 |         return len(self.images)
72 | 


--------------------------------------------------------------------------------
/encoding/datasets/pascal_voc.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | import torch
 5 | 
 6 | from PIL import Image
 7 | from tqdm import tqdm
 8 | 
 9 | from .base import BaseDataset
10 | 
11 | class VOCSegmentation(BaseDataset):
12 |     CLASSES = [
13 |         'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 
14 |         'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
15 |         'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
16 |         'tv/monitor', 'ambigious'
17 |     ]
18 |     NUM_CLASS = 21
19 |     BASE_DIR = 'VOCdevkit/VOC2012'
20 |     def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
21 |                  mode=None, transform=None, target_transform=None, **kwargs):
22 |         super(VOCSegmentation, self).__init__(root, split, mode, transform,
23 |                                               target_transform, **kwargs)
24 |         _voc_root = os.path.join(self.root, self.BASE_DIR)
25 |         _mask_dir = os.path.join(_voc_root, 'SegmentationClass')
26 |         _image_dir = os.path.join(_voc_root, 'JPEGImages')
27 |         # train/val/test splits are pre-cut
28 |         _splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation')
29 |         if self.split == 'train':
30 |             _split_f = os.path.join(_splits_dir, 'trainval.txt')
31 |         elif self.split == 'val':
32 |             _split_f = os.path.join(_splits_dir, 'val.txt')
33 |         elif self.split == 'test':
34 |             _split_f = os.path.join(_splits_dir, 'test.txt')
35 |         else:
36 |             raise RuntimeError('Unknown dataset split.')
37 |         self.images = []
38 |         self.masks = []
39 |         with open(os.path.join(_split_f), "r") as lines:
40 |             for line in tqdm(lines):
41 |                 _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg")
42 |                 assert os.path.isfile(_image)
43 |                 self.images.append(_image)
44 |                 if self.mode != 'test':
45 |                     _mask = os.path.join(_mask_dir, line.rstrip('\n')+".png")
46 |                     assert os.path.isfile(_mask)
47 |                     self.masks.append(_mask)
48 | 
49 |         if self.mode != 'test':
50 |             assert (len(self.images) == len(self.masks))
51 | 
52 |     def __getitem__(self, index):
53 |         img = Image.open(self.images[index]).convert('RGB')
54 |         if self.mode == 'test':
55 |             if self.transform is not None:
56 |                 img = self.transform(img)
57 |             return img, os.path.basename(self.images[index])
58 |         target = Image.open(self.masks[index])
59 |         # synchrosized transform
60 |         if self.mode == 'train':
61 |             img, target = self._sync_transform( img, target)
62 |         elif self.mode == 'val':
63 |             img, target = self._val_sync_transform( img, target)
64 |         else:
65 |             assert self.mode == 'testval'
66 |             target = self._mask_transform(target)
67 |         # general resize, normalize and toTensor
68 |         if self.transform is not None:
69 |             img = self.transform(img)
70 |         if self.target_transform is not None:
71 |             target = self.target_transform(target)
72 |         return img, target
73 | 
74 |     def _mask_transform(self, mask):
75 |         target = np.array(mask).astype('int32')
76 |         target[target == 255] = -1
77 |         return torch.from_numpy(target).long()
78 | 
79 |     def __len__(self):
80 |         return len(self.images)
81 | 
82 |     @property
83 |     def pred_offset(self):
84 |         return 0
85 | 


--------------------------------------------------------------------------------
/encoding/datasets/pcontext.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | # Created by: Hang Zhang
  3 | # Email: zhang.hang@rutgers.edu
  4 | # Copyright (c) 2017
  5 | ###########################################################################
  6 | 
  7 | 
  8 | import os
  9 | import numpy as np
 10 | 
 11 | import torch
 12 | 
 13 | from PIL import Image
 14 | from tqdm import trange
 15 | 
 16 | from .base import BaseDataset
 17 | 
 18 | class ContextSegmentation(BaseDataset):
 19 |     BASE_DIR = 'VOCdevkit/VOC2010'
 20 |     NUM_CLASS = 59
 21 |     def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
 22 |                  mode=None, transform=None, target_transform=None, **kwargs):
 23 |         super(ContextSegmentation, self).__init__(
 24 |             root, split, mode, transform, target_transform, **kwargs)
 25 |         from detail import Detail
 26 |         #from detail import mask
 27 |         root = os.path.join(root, self.BASE_DIR)
 28 |         annFile = os.path.join(root, 'trainval_merged.json')
 29 |         imgDir = os.path.join(root, 'JPEGImages')
 30 |         # training mode
 31 |         self.detail = Detail(annFile, imgDir, split)
 32 |         self.transform = transform
 33 |         self.target_transform = target_transform
 34 |         self.ids = self.detail.getImgs()
 35 |         # generate masks
 36 |         self._mapping = np.sort(np.array([
 37 |             0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 
 38 |             23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 
 39 |             427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 
 40 |             68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 
 41 |             98, 187, 104, 105, 366, 189, 368, 113, 115]))
 42 |         self._key = np.array(range(len(self._mapping))).astype('uint8')
 43 |         mask_file = os.path.join(root, self.split+'.pth')
 44 |         print('mask_file:', mask_file)
 45 |         if os.path.exists(mask_file):
 46 |             self.masks = torch.load(mask_file)
 47 |         else:
 48 |             self.masks = self._preprocess(mask_file)
 49 | 
 50 |     def _class_to_index(self, mask):
 51 |         # assert the values
 52 |         values = np.unique(mask)
 53 |         for i in range(len(values)):
 54 |             assert(values[i] in self._mapping)
 55 |         index = np.digitize(mask.ravel(), self._mapping, right=True)
 56 |         return self._key[index].reshape(mask.shape)
 57 | 
 58 |     def _preprocess(self, mask_file):
 59 |         masks = {}
 60 |         tbar = trange(len(self.ids))
 61 |         print("Preprocessing mask, this will take a while." + \
 62 |             "But don't worry, it only run once for each split.")
 63 |         for i in tbar:
 64 |             img_id = self.ids[i]
 65 |             mask = Image.fromarray(self._class_to_index(
 66 |                 self.detail.getMask(img_id)))
 67 |             masks[img_id['image_id']] = mask
 68 |             tbar.set_description("Preprocessing masks {}".format(img_id['image_id']))
 69 |         torch.save(masks, mask_file)
 70 |         return masks
 71 | 
 72 |     def __getitem__(self, index):
 73 |         img_id = self.ids[index]
 74 |         path = img_id['file_name']
 75 |         iid = img_id['image_id']
 76 |         img = Image.open(os.path.join(self.detail.img_folder, path)).convert('RGB')
 77 |         if self.mode == 'test':
 78 |             if self.transform is not None:
 79 |                 img = self.transform(img)
 80 |             return img, os.path.basename(path)
 81 |         # convert mask to 60 categories
 82 |         mask = self.masks[iid]
 83 |         # synchrosized transform
 84 |         if self.mode == 'train':
 85 |             img, mask = self._sync_transform(img, mask)
 86 |         elif self.mode == 'val':
 87 |             img, mask = self._val_sync_transform(img, mask)
 88 |         else:
 89 |             assert self.mode == 'testval'
 90 |             mask = self._mask_transform(mask)
 91 |         # general resize, normalize and toTensor
 92 |         if self.transform is not None:
 93 |             img = self.transform(img)
 94 |         if self.target_transform is not None:
 95 |             mask = self.target_transform(mask)
 96 |         return img, mask
 97 | 
 98 |     def _mask_transform(self, mask):
 99 |         target = np.array(mask).astype('int32') - 1
100 |         return torch.from_numpy(target).long()
101 | 
102 |     def __len__(self):
103 |         return len(self.ids)
104 | 
105 |     @property
106 |     def pred_offset(self):
107 |         return 1
108 | 


--------------------------------------------------------------------------------
/encoding/dilated/__init__.py:
--------------------------------------------------------------------------------
1 | """Dilated ResNet and DenseNet"""
2 | from .resnet import *
3 | 


--------------------------------------------------------------------------------
/encoding/dilated/resnet.py:
--------------------------------------------------------------------------------
  1 | """Dilated ResNet"""
  2 | import math
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.utils.model_zoo as model_zoo
  6 | 
  7 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
  8 |            'resnet152', 'BasicBlock', 'Bottleneck']
  9 | 
 10 | model_urls = {
 11 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 12 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 13 | }
 14 | 
 15 | 
 16 | def conv3x3(in_planes, out_planes, stride=1):
 17 |     "3x3 convolution with padding"
 18 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 19 |                      padding=1, bias=False)
 20 | 
 21 | 
 22 | class BasicBlock(nn.Module):
 23 |     """ResNet BasicBlock
 24 |     """
 25 |     expansion = 1
 26 |     def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, previous_dilation=1,
 27 |                  norm_layer=None):
 28 |         super(BasicBlock, self).__init__()
 29 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
 30 |                                padding=dilation, dilation=dilation, bias=False)
 31 |         self.bn1 = norm_layer(planes)
 32 |         self.relu = nn.ReLU(inplace=True)
 33 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
 34 |                                padding=previous_dilation, dilation=previous_dilation, bias=False)
 35 |         self.bn2 = norm_layer(planes)
 36 |         self.downsample = downsample
 37 |         self.stride = stride
 38 | 
 39 |     def forward(self, x):
 40 |         residual = x
 41 | 
 42 |         out = self.conv1(x)
 43 |         out = self.bn1(out)
 44 |         out = self.relu(out)
 45 | 
 46 |         out = self.conv2(out)
 47 |         out = self.bn2(out)
 48 | 
 49 |         if self.downsample is not None:
 50 |             residual = self.downsample(x)
 51 | 
 52 |         out += residual
 53 |         out = self.relu(out)
 54 | 
 55 |         return out
 56 | 
 57 | 
 58 | class Bottleneck(nn.Module):
 59 |     """ResNet Bottleneck
 60 |     """
 61 |     # pylint: disable=unused-argument
 62 |     expansion = 4
 63 |     def __init__(self, inplanes, planes, stride=1, dilation=1,
 64 |                  downsample=None, previous_dilation=1, norm_layer=None):
 65 |         super(Bottleneck, self).__init__()
 66 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 67 |         self.bn1 = norm_layer(planes)
 68 |         self.conv2 = nn.Conv2d(
 69 |             planes, planes, kernel_size=3, stride=stride,
 70 |             padding=dilation, dilation=dilation, bias=False)
 71 |         self.bn2 = norm_layer(planes)
 72 |         self.conv3 = nn.Conv2d(
 73 |             planes, planes * 4, kernel_size=1, bias=False)
 74 |         self.bn3 = norm_layer(planes * 4)
 75 |         self.relu = nn.ReLU(inplace=True)
 76 |         self.downsample = downsample
 77 |         self.dilation = dilation
 78 |         self.stride = stride
 79 | 
 80 |     def _sum_each(self, x, y):
 81 |         assert(len(x) == len(y))
 82 |         z = []
 83 |         for i in range(len(x)):
 84 |             z.append(x[i]+y[i])
 85 |         return z
 86 | 
 87 |     def forward(self, x):
 88 |         residual = x
 89 | 
 90 |         out = self.conv1(x)
 91 |         out = self.bn1(out)
 92 |         out = self.relu(out)
 93 | 
 94 |         out = self.conv2(out)
 95 |         out = self.bn2(out)
 96 |         out = self.relu(out)
 97 | 
 98 |         out = self.conv3(out)
 99 |         out = self.bn3(out)
100 | 
101 |         if self.downsample is not None:
102 |             residual = self.downsample(x)
103 | 
104 |         out += residual
105 |         out = self.relu(out)
106 | 
107 |         return out
108 | 
109 | 
110 | class ResNet(nn.Module):
111 |     """Dilated Pre-trained ResNet Model, which preduces the stride of 8 featuremaps at conv5.
112 | 
113 |     Parameters
114 |     ----------
115 |     block : Block
116 |         Class for the residual block. Options are BasicBlockV1, BottleneckV1.
117 |     layers : list of int
118 |         Numbers of layers in each block
119 |     classes : int, default 1000
120 |         Number of classification classes.
121 |     dilated : bool, default False
122 |         Applying dilation strategy to pretrained ResNet yielding a stride-8 model,
123 |         typically used in Semantic Segmentation.
124 |     norm_layer : object
125 |         Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`;
126 |         for Synchronized Cross-GPU BachNormalization).
127 | 
128 |     Reference:
129 | 
130 |         - He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.
131 | 
132 |         - Yu, Fisher, and Vladlen Koltun. "Multi-scale context aggregation by dilated convolutions."
133 |     """
134 |     # pylint: disable=unused-variable
135 |     def __init__(self, block, layers, num_classes=1000, dilated=True,
136 |                  deep_base=True, norm_layer=nn.BatchNorm2d, output_size=8):
137 |         self.inplanes = 128 if deep_base else 64
138 |         super(ResNet, self).__init__()
139 |         if deep_base:
140 |             self.conv1 = nn.Sequential(
141 |                 nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False),
142 |                 norm_layer(64),
143 |                 nn.ReLU(inplace=True),
144 |                 nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
145 |                 norm_layer(64),
146 |                 nn.ReLU(inplace=True),
147 |                 nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False),
148 |             )
149 |         else:
150 |             self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
151 |                                    bias=False)
152 |         self.bn1 = norm_layer(self.inplanes)
153 |         self.relu = nn.ReLU(inplace=True)
154 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
155 |         self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer)
156 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
157 | 
158 |         dilation_rate = 2
159 |         if dilated and output_size <= 8:
160 |             self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
161 |                                            dilation=dilation_rate, norm_layer=norm_layer)
162 |             dilation_rate *= 2
163 |         else:
164 |             self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
165 |                                            norm_layer=norm_layer)
166 | 
167 |         if dilated and output_size <= 16:
168 |             self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
169 |                                            dilation=dilation_rate, norm_layer=norm_layer)
170 |         else:
171 |             self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
172 |                                            norm_layer=norm_layer)
173 | 
174 |         self.avgpool = nn.AvgPool2d(7, stride=1)
175 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
176 | 
177 |         for m in self.modules():
178 |             if isinstance(m, nn.Conv2d):
179 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
180 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
181 |             elif isinstance(m, norm_layer):
182 |                 m.weight.data.fill_(1)
183 |                 m.bias.data.zero_()
184 | 
185 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=None):
186 |         downsample = None
187 |         if stride != 1 or self.inplanes != planes * block.expansion:
188 |             downsample = nn.Sequential(
189 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
190 |                           kernel_size=1, stride=stride, bias=False),
191 |                 norm_layer(planes * block.expansion),
192 |             )
193 | 
194 |         layers = []
195 |         if dilation == 1 or dilation == 2:
196 |             layers.append(block(self.inplanes, planes, stride, dilation=1,
197 |                                 downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer))
198 |         elif dilation == 4:
199 |             layers.append(block(self.inplanes, planes, stride, dilation=2,
200 |                                 downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer))
201 |         else:
202 |             raise RuntimeError("=> unknown dilation size: {}".format(dilation))
203 | 
204 |         self.inplanes = planes * block.expansion
205 |         for i in range(1, blocks):
206 |             layers.append(block(self.inplanes, planes, dilation=dilation, previous_dilation=dilation,
207 |                                 norm_layer=norm_layer))
208 | 
209 |         return nn.Sequential(*layers)
210 | 
211 |     def forward(self, x):
212 |         x = self.conv1(x)
213 |         x = self.bn1(x)
214 |         x = self.relu(x)
215 |         x = self.maxpool(x)
216 | 
217 |         x = self.layer1(x)
218 |         x = self.layer2(x)
219 |         x = self.layer3(x)
220 |         x = self.layer4(x)
221 | 
222 |         x = self.avgpool(x)
223 |         x = x.view(x.size(0), -1)
224 |         x = self.fc(x)
225 | 
226 |         return x
227 | 
228 | 
229 | def resnet18(pretrained=False, **kwargs):
230 |     """Constructs a ResNet-18 model.
231 | 
232 |     Args:
233 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
234 |     """
235 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
236 |     if pretrained:
237 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
238 |     return model
239 | 
240 | 
241 | def resnet34(pretrained=False, **kwargs):
242 |     """Constructs a ResNet-34 model.
243 | 
244 |     Args:
245 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
246 |     """
247 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
248 |     if pretrained:
249 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
250 |     return model
251 | 
252 | 
253 | def resnet50(pretrained=False, root='~/.encoding/models', **kwargs):
254 |     """Constructs a ResNet-50 model.
255 | 
256 |     Args:
257 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
258 |     """
259 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
260 |     if pretrained:
261 |         from ..models.model_store import get_model_file
262 |         model.load_state_dict(torch.load(
263 |             get_model_file('resnet50', root=root)), strict=False)
264 |     return model
265 | 
266 | 
267 | def resnet101(pretrained=False, root='~/.encoding/models', **kwargs):
268 |     """Constructs a ResNet-101 model.
269 | 
270 |     Args:
271 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
272 |     """
273 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
274 |     if pretrained:
275 |         from ..models.model_store import get_model_file
276 |         model.load_state_dict(torch.load(
277 |             get_model_file('resnet101', root=root)), strict=False)
278 |     return model
279 | 
280 | 
281 | def resnet152(pretrained=False, root='~/.encoding/models', **kwargs):
282 |     """Constructs a ResNet-152 model.
283 | 
284 |     Args:
285 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
286 |     """
287 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
288 |     if pretrained:
289 |         from ..models.model_store import get_model_file
290 |         model.load_state_dict(torch.load(
291 |             get_model_file('resnet152', root=root)), strict=False)
292 |     return model
293 | 


--------------------------------------------------------------------------------
/encoding/functions/__init__.py:
--------------------------------------------------------------------------------
1 | """Encoding Autograd Fuctions"""
2 | from .syncbn import *
3 | from .encoding import *
4 | 


--------------------------------------------------------------------------------
/encoding/functions/encoding.py:
--------------------------------------------------------------------------------
  1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2 | ## Created by: Hang Zhang
  3 | ## Email: zhanghang0704@gmail.com
  4 | ## Copyright (c) 2018
  5 | ##
  6 | ## This source code is licensed under the MIT-style license found in the
  7 | ## LICENSE file in the root directory of this source tree
  8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  9 | 
 10 | """Functions for Encoding Layer"""
 11 | import torch
 12 | 
 13 | from torch.autograd import Function
 14 | 
 15 | __all__ = ['aggregate', 'scaled_l2']
 16 | 
 17 | class Aggregate(Function):
 18 |     @staticmethod
 19 |     def forward(ctx, A, X, C):
 20 |         ctx.save_for_backward(A, X, C)
 21 | 
 22 |         return (X.unsqueeze(2).expand(X.size(0), X.size(1), C.size(0), C.size(1)) -
 23 |              C.unsqueeze(0).unsqueeze(0)).mul_(A.unsqueeze(3)).sum(1)
 24 | 
 25 |     @staticmethod
 26 |     def backward(ctx, GE):
 27 |         A, X, C = ctx.saved_variables
 28 | 
 29 |         gradA = (X.unsqueeze(2).expand(X.size(0), X.size(1), C.size(0), C.size(1)) -
 30 |                  C.unsqueeze(0).unsqueeze(0)).mul_(GE.unsqueeze(1)).sum(3)
 31 |         gradX = torch.bmm(A, GE)
 32 |         gradC = A.sum(1).unsqueeze(2).mul(GE).mul_(-1).sum(0)
 33 | 
 34 |         return gradA, gradX, gradC
 35 | 
 36 | def aggregate(A, X, C):
 37 |     r""" Aggregate operation, aggregate the residuals of inputs (:math:`X`) with repect
 38 |     to the codewords (:math:`C`) with assignment weights (:math:`A`).
 39 | 
 40 |     .. math::
 41 | 
 42 |         e_{k} = \sum_{i=1}^{N} a_{ik} (x_i - d_k)
 43 | 
 44 |     Shape:
 45 |         - Input: :math:`A\in\mathcal{R}^{B\times N\times K}`
 46 |           :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}`
 47 |           (where :math:`B` is batch, :math:`N` is total number of features,
 48 |           :math:`K` is number is codewords, :math:`D` is feature dimensions.)
 49 |         - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
 50 | 
 51 |     Examples:
 52 |         >>> B,N,K,D = 2,3,4,5
 53 |         >>> A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), requires_grad=True)
 54 |         >>> X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), requires_grad=True)
 55 |         >>> C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), requires_grad=True)
 56 |         >>> func = encoding.aggregate()
 57 |         >>> E = func(A, X, C)
 58 |     """
 59 |     return Aggregate.apply(A, X, C)
 60 | 
 61 | class ScaledL2(Function):
 62 |     @staticmethod
 63 |     def forward(ctx, X, C, S):
 64 |         SL = (X.unsqueeze(2).expand(X.size(0), X.size(1), C.size(0), C.size(1)) -
 65 |               C.unsqueeze(0).unsqueeze(0)).pow_(2).sum(3).mul_(S.view(1, 1, C.size(0)))
 66 |         ctx.save_for_backward(X, C, S, SL)
 67 |         return SL
 68 | 
 69 |     @staticmethod
 70 |     def backward(ctx, GSL):
 71 |         X, C, S, SL = ctx.saved_variables
 72 | 
 73 |         tmp = (X.unsqueeze(2).expand(X.size(0), X.size(1), C.size(0), C.size(1)) - C.unsqueeze(0).unsqueeze(0)).mul_(
 74 |             (2 * GSL).mul_(S.view(1, 1, C.size(0))).unsqueeze(3)
 75 |         )
 76 | 
 77 |         GX = tmp.sum(2)
 78 |         GC = tmp.sum((0, 1)).mul_(-1)
 79 |         GS = SL.div(S.view(1, 1, C.size(0))).mul_(GSL).sum((0, 1))
 80 | 
 81 |         return GX, GC, GS
 82 | 
 83 | def scaled_l2(X, C, S):
 84 |     r""" scaled_l2 distance
 85 | 
 86 |     .. math::
 87 |         sl_{ik} = s_k \|x_i-c_k\|^2
 88 | 
 89 |     Shape:
 90 |         - Input: :math:`X\in\mathcal{R}^{B\times N\times D}`
 91 |           :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K`
 92 |           (where :math:`B` is batch, :math:`N` is total number of features,
 93 |           :math:`K` is number is codewords, :math:`D` is feature dimensions.)
 94 |         - Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
 95 |     """
 96 |     return ScaledL2.apply(X, C, S)
 97 | 
 98 | if __name__ == '__main__':
 99 |     B, N, D, K = 3, 4, 5, 6
100 |     X = torch.randn((B, N, D), dtype=torch.double,requires_grad=True).cuda()
101 |     C = torch.randn((K, D), dtype=torch.double,requires_grad=True).cuda()
102 |     S = torch.randn((K,), dtype=torch.double,requires_grad=True).cuda()
103 |     assert torch.autograd.gradcheck(scaled_l2, (X, C, S))
104 | 
105 |     A = torch.randn((B, N, K), dtype=torch.double, requires_grad=True).cuda()
106 |     X = torch.randn((B, N, D), dtype=torch.double, requires_grad=True).cuda()
107 |     C = torch.randn((K, D), dtype=torch.double, requires_grad=True).cuda()
108 |     assert torch.autograd.gradcheck(aggregate, (A, X, C))
109 | 


--------------------------------------------------------------------------------
/encoding/functions/syncbn.py:
--------------------------------------------------------------------------------
 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 | ## Created by: Hang Zhang
 3 | ## Email: zhanghang0704@gmail.com
 4 | ## Copyright (c) 2018
 5 | ##
 6 | ## This source code is licensed under the MIT-style license found in the
 7 | ## LICENSE file in the root directory of this source tree
 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 9 | 
10 | """Synchronized Cross-GPU Batch Normalization functions"""
11 | from torch.autograd import Function
12 | 
13 | __all__ = ['normalization']
14 | 
15 | 
16 | class Normalization(Function):
17 |     @staticmethod
18 |     def forward(ctx, input, mean, inv_std, gamma, beta):
19 |         ctx.save_for_backward(input, mean, inv_std, gamma, beta)
20 | 
21 |         return (input - mean.unsqueeze(-1)).mul_((inv_std*gamma).unsqueeze(-1)).add_(beta.unsqueeze(-1))
22 | 
23 |     @staticmethod
24 |     def backward(ctx, gradOutput):
25 |         input, mean, inv_std, gamma, beta = ctx.saved_variables
26 | 
27 |         gradInputMean = gradOutput * (inv_std*gamma).unsqueeze(-1)
28 |         gradInput = gradInputMean
29 |         gradMean = gradInputMean.sum((0, 2)).mul_(-1)
30 | 
31 |         gradInvStdGamma = (input - mean.unsqueeze(-1)).mul_(gradOutput).sum((0, 2))
32 |         gradInvStd = gradInvStdGamma * gamma
33 |         gradGamma = gradInvStdGamma * inv_std
34 | 
35 |         gradBeta = gradOutput.sum((0, 2))
36 | 
37 |         return gradInput, gradMean, gradInvStd, gradGamma, gradBeta
38 | 
39 | 
40 | def normalization(input, mean, inv_std, gamma, beta):
41 |     r"""Applies Batch Normalization over a 3d input that is seen as a
42 |     mini-batch.
43 | 
44 |     .. _encoding.normalization:
45 | 
46 |     .. math::
47 | 
48 |         y = \frac{x - \mu[x]}{ \sqrt{var[x] + \epsilon}} * \gamma + \beta
49 | 
50 |     Shape:
51 |         - Input: :math:`(N, C)` or :math:`(N, C, L)`
52 |         - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
53 | 
54 |     """
55 |     return Normalization.apply(input, mean, inv_std, gamma, beta)
56 | 
57 | if __name__ == '__main__':
58 |     import torch
59 | 
60 |     input = torch.randn((3,4,5), dtype=torch.float64, requires_grad=True).cuda()
61 |     mean = torch.randn((input.size(1),), dtype=torch.float64, requires_grad=True).cuda()
62 |     inv_std = torch.randn((input.size(1),), dtype=torch.float64, requires_grad=True).cuda()
63 |     gamma = torch.randn((input.size(1),), dtype=torch.float64, requires_grad=True).cuda()
64 |     beta = torch.randn((input.size(1),), dtype=torch.float64, requires_grad=True).cuda()
65 | 
66 |     assert torch.autograd.gradcheck(normalization, (input, mean, inv_std, gamma, beta))
67 | 


--------------------------------------------------------------------------------
/encoding/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .model_zoo import get_model
 2 | from .model_store import get_model_file
 3 | from .base import *
 4 | from .fcn import *
 5 | from .psp import *
 6 | from .encnet import *
 7 | from .deeplabv3 import *
 8 | 
 9 | 
10 | def get_segmentation_model(name, **kwargs):
11 |     from .fcn import get_fcn
12 |     models = {
13 |         'fcn': get_fcn,
14 |         'psp': get_psp,
15 |         'encnet': get_encnet,
16 |         'deeplab': get_deeplab
17 |     }
18 |     return models[name.lower()](**kwargs)
19 | 


--------------------------------------------------------------------------------
/encoding/models/base.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | # Created by: Hang Zhang 
  3 | # Email: zhang.hang@rutgers.edu 
  4 | # Copyright (c) 2017
  5 | ###########################################################################
  6 | 
  7 | import math
  8 | import numpy as np
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | 
 14 | from torch.nn.parallel.data_parallel import DataParallel
 15 | 
 16 | from ..nn import JPU, JPU_X
 17 | from .. import dilated as resnet
 18 | from ..utils import batch_pix_accuracy, batch_intersection_union
 19 | 
 20 | up_kwargs = {'mode': 'bilinear', 'align_corners': True}
 21 | 
 22 | __all__ = ['BaseNet', 'MultiEvalModule']
 23 | 
 24 | class BaseNet(nn.Module):
 25 |     def __init__(self, nclass, backbone, aux, se_loss, jpu=True, dilated=False, norm_layer=None,
 26 |                  base_size=520, crop_size=480, mean=[.485, .456, .406],
 27 |                  std=[.229, .224, .225], root='~/.encoding/models', **kwargs):
 28 |         super(BaseNet, self).__init__()
 29 |         self.nclass = nclass
 30 |         self.aux = aux
 31 |         self.se_loss = se_loss
 32 |         self.mean = mean
 33 |         self.std = std
 34 |         self.base_size = base_size
 35 |         self.crop_size = crop_size
 36 |         # copying modules from pretrained models
 37 |         if backbone == 'resnet50':
 38 |             self.pretrained = resnet.resnet50(pretrained=True, dilated=dilated,
 39 |                                               norm_layer=norm_layer, root=root)
 40 |         elif backbone == 'resnet101':
 41 |             self.pretrained = resnet.resnet101(pretrained=True, dilated=dilated,
 42 |                                                norm_layer=norm_layer, root=root)
 43 |         elif backbone == 'resnet152':
 44 |             self.pretrained = resnet.resnet152(pretrained=True, dilated=dilated,
 45 |                                                norm_layer=norm_layer, root=root)
 46 |         else:
 47 |             raise RuntimeError('unknown backbone: {}'.format(backbone))
 48 |         # bilinear upsample options
 49 |         self._up_kwargs = up_kwargs
 50 |         self.backbone = backbone
 51 |         self.jpu = None
 52 |         if jpu == 'JPU':
 53 |             self.jpu = JPU([512, 1024, 2048], width=512, norm_layer=norm_layer, up_kwargs=up_kwargs)
 54 |         elif jpu == 'JPU_X':
 55 |             self.jpu = JPU_X([512, 1024, 2048], width=512, norm_layer=norm_layer, up_kwargs=up_kwargs)
 56 | 
 57 |     def base_forward(self, x):
 58 |         x = self.pretrained.conv1(x)
 59 |         x = self.pretrained.bn1(x)
 60 |         x = self.pretrained.relu(x)
 61 |         x = self.pretrained.maxpool(x)
 62 |         c1 = self.pretrained.layer1(x)
 63 |         c2 = self.pretrained.layer2(c1)
 64 |         c3 = self.pretrained.layer3(c2)
 65 |         c4 = self.pretrained.layer4(c3)
 66 | 
 67 |         if self.jpu:
 68 |             return self.jpu(c1, c2, c3, c4)
 69 |         else:
 70 |             return c1, c2, c3, c4
 71 | 
 72 |     def evaluate(self, x, target=None):
 73 |         pred = self.forward(x)
 74 |         if isinstance(pred, (tuple, list)):
 75 |             pred = pred[0]
 76 |         if target is None:
 77 |             return pred
 78 |         correct, labeled = batch_pix_accuracy(pred.data, target.data)
 79 |         inter, union = batch_intersection_union(pred.data, target.data, self.nclass)
 80 |         return correct, labeled, inter, union
 81 | 
 82 | 
 83 | class MultiEvalModule(DataParallel):
 84 |     """Multi-size Segmentation Eavluator"""
 85 |     def __init__(self, module, nclass, device_ids=None, flip=True,
 86 |                  scales=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75]):
 87 |         super(MultiEvalModule, self).__init__(module, device_ids)
 88 |         self.nclass = nclass
 89 |         self.base_size = module.base_size
 90 |         self.crop_size = module.crop_size
 91 |         self.scales = scales
 92 |         self.flip = flip
 93 |         print('MultiEvalModule: base_size {}, crop_size {}'. \
 94 |             format(self.base_size, self.crop_size))
 95 | 
 96 |     def parallel_forward(self, inputs, **kwargs):
 97 |         """Multi-GPU Mult-size Evaluation
 98 | 
 99 |         Args:
100 |             inputs: list of Tensors
101 |         """
102 |         inputs = [(input.unsqueeze(0).cuda(device),)
103 |                   for input, device in zip(inputs, self.device_ids)]
104 |         replicas = self.replicate(self, self.device_ids[:len(inputs)])
105 |         kwargs = []
106 |         if len(inputs) < len(kwargs):
107 |             inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
108 |         elif len(kwargs) < len(inputs):
109 |             kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
110 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
111 |         #for out in outputs:
112 |         #    print('out.size()', out.size())
113 |         return outputs
114 | 
115 |     def forward(self, image):
116 |         """Mult-size Evaluation"""
117 |         # only single image is supported for evaluation
118 |         batch, _, h, w = image.size()
119 |         assert(batch == 1)
120 |         stride_rate = 2.0/3.0
121 |         crop_size = self.crop_size
122 |         stride = int(crop_size * stride_rate)
123 |         with torch.cuda.device_of(image):
124 |             scores = image.new().resize_(batch,self.nclass,h,w).zero_().cuda()
125 | 
126 |         for scale in self.scales:
127 |             long_size = int(math.ceil(self.base_size * scale))
128 |             if h > w:
129 |                 height = long_size
130 |                 width = int(1.0 * w * long_size / h + 0.5)
131 |                 short_size = width
132 |             else:
133 |                 width = long_size
134 |                 height = int(1.0 * h * long_size / w + 0.5)
135 |                 short_size = height
136 |             # resize image to current size
137 |             cur_img = resize_image(image, height, width, **self.module._up_kwargs)
138 |             if long_size <= crop_size:
139 |                 pad_img = pad_image(cur_img, self.module.mean,
140 |                                     self.module.std, crop_size)
141 |                 outputs = module_inference(self.module, pad_img, self.flip)
142 |                 outputs = crop_image(outputs, 0, height, 0, width)
143 |             else:
144 |                 if short_size < crop_size:
145 |                     # pad if needed
146 |                     pad_img = pad_image(cur_img, self.module.mean,
147 |                                         self.module.std, crop_size)
148 |                 else:
149 |                     pad_img = cur_img
150 |                 _,_,ph,pw = pad_img.size()
151 |                 assert(ph >= height and pw >= width)
152 |                 # grid forward and normalize
153 |                 h_grids = int(math.ceil(1.0 * (ph-crop_size)/stride)) + 1
154 |                 w_grids = int(math.ceil(1.0 * (pw-crop_size)/stride)) + 1
155 |                 with torch.cuda.device_of(image):
156 |                     outputs = image.new().resize_(batch,self.nclass,ph,pw).zero_().cuda()
157 |                     count_norm = image.new().resize_(batch,1,ph,pw).zero_().cuda()
158 |                 # grid evaluation
159 |                 for idh in range(h_grids):
160 |                     for idw in range(w_grids):
161 |                         h0 = idh * stride
162 |                         w0 = idw * stride
163 |                         h1 = min(h0 + crop_size, ph)
164 |                         w1 = min(w0 + crop_size, pw)
165 |                         crop_img = crop_image(pad_img, h0, h1, w0, w1)
166 |                         # pad if needed
167 |                         pad_crop_img = pad_image(crop_img, self.module.mean,
168 |                                                  self.module.std, crop_size)
169 |                         output = module_inference(self.module, pad_crop_img, self.flip)
170 |                         outputs[:,:,h0:h1,w0:w1] += crop_image(output,
171 |                             0, h1-h0, 0, w1-w0)
172 |                         count_norm[:,:,h0:h1,w0:w1] += 1
173 |                 assert((count_norm==0).sum()==0)
174 |                 outputs = outputs / count_norm
175 |                 outputs = outputs[:,:,:height,:width]
176 | 
177 |             score = resize_image(outputs, h, w, **self.module._up_kwargs)
178 |             scores += score
179 | 
180 |         return scores
181 | 
182 | 
183 | def module_inference(module, image, flip=True):
184 |     output = module.evaluate(image)
185 |     if flip:
186 |         fimg = flip_image(image)
187 |         foutput = module.evaluate(fimg)
188 |         output += flip_image(foutput)
189 |     return output.exp()
190 | 
191 | def resize_image(img, h, w, **up_kwargs):
192 |     return F.interpolate(img, (h, w), **up_kwargs)
193 | 
194 | def pad_image(img, mean, std, crop_size):
195 |     b,c,h,w = img.size()
196 |     assert(c==3)
197 |     padh = crop_size - h if h < crop_size else 0
198 |     padw = crop_size - w if w < crop_size else 0
199 |     pad_values = -np.array(mean) / np.array(std)
200 |     img_pad = img.new().resize_(b,c,h+padh,w+padw)
201 |     for i in range(c):
202 |         # note that pytorch pad params is in reversed orders
203 |         img_pad[:,i,:,:] = F.pad(img[:,i,:,:], (0, padw, 0, padh), value=pad_values[i])
204 |     assert(img_pad.size(2)>=crop_size and img_pad.size(3)>=crop_size)
205 |     return img_pad
206 | 
207 | def crop_image(img, h0, h1, w0, w1):
208 |     return img[:,:,h0:h1,w0:w1]
209 | 
210 | def flip_image(img):
211 |     assert(img.dim()==4)
212 |     with torch.cuda.device_of(img):
213 |         idx = torch.arange(img.size(3)-1, -1, -1).type_as(img).long()
214 |     return img.index_select(3, idx)
215 | 


--------------------------------------------------------------------------------
/encoding/models/deeplabv3.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from .fcn import FCNHead
  8 | from .base import BaseNet
  9 | 
 10 | __all__ = ['DeepLabV3', 'get_deeplab']
 11 | 
 12 | class DeepLabV3(BaseNet):
 13 |     def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs):
 14 |         super(DeepLabV3, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs)
 15 | 
 16 |         self.head = DeepLabV3Head(2048, nclass, norm_layer, self._up_kwargs)
 17 |         if aux:
 18 |             self.auxlayer = FCNHead(1024, nclass, norm_layer)
 19 | 
 20 |     def forward(self, x):
 21 |         _, _, h, w = x.size()
 22 |         _, _, c3, c4 = self.base_forward(x)
 23 | 
 24 |         outputs = []
 25 |         x = self.head(c4)
 26 |         x = F.interpolate(x, (h,w), **self._up_kwargs)
 27 |         outputs.append(x)
 28 |         if self.aux:
 29 |             auxout = self.auxlayer(c3)
 30 |             auxout = F.interpolate(auxout, (h,w), **self._up_kwargs)
 31 |             outputs.append(auxout)
 32 | 
 33 |         return tuple(outputs)
 34 | 
 35 | 
 36 | class DeepLabV3Head(nn.Module):
 37 |     def __init__(self, in_channels, out_channels, norm_layer, up_kwargs, atrous_rates=(12, 24, 36)):
 38 |         super(DeepLabV3Head, self).__init__()
 39 |         inter_channels = in_channels // 8
 40 |         self.aspp = ASPP_Module(in_channels, atrous_rates, norm_layer, up_kwargs)
 41 |         self.block = nn.Sequential(
 42 |             nn.Conv2d(inter_channels, inter_channels, 3, padding=1, bias=False),
 43 |             norm_layer(inter_channels),
 44 |             nn.ReLU(True),
 45 |             nn.Dropout2d(0.1, False),
 46 |             nn.Conv2d(inter_channels, out_channels, 1))
 47 | 
 48 |     def forward(self, x):
 49 |         x = self.aspp(x)
 50 |         x = self.block(x)
 51 |         return x
 52 | 
 53 | 
 54 | def ASPPConv(in_channels, out_channels, atrous_rate, norm_layer):
 55 |     block = nn.Sequential(
 56 |         nn.Conv2d(in_channels, out_channels, 3, padding=atrous_rate,
 57 |                   dilation=atrous_rate, bias=False),
 58 |         norm_layer(out_channels),
 59 |         nn.ReLU(True))
 60 |     return block
 61 | 
 62 | class AsppPooling(nn.Module):
 63 |     def __init__(self, in_channels, out_channels, norm_layer, up_kwargs):
 64 |         super(AsppPooling, self).__init__()
 65 |         self._up_kwargs = up_kwargs
 66 |         self.gap = nn.Sequential(nn.AdaptiveAvgPool2d(1),
 67 |                                  nn.Conv2d(in_channels, out_channels, 1, bias=False),
 68 |                                  norm_layer(out_channels),
 69 |                                  nn.ReLU(True))
 70 | 
 71 |     def forward(self, x):
 72 |         _, _, h, w = x.size()
 73 |         pool = self.gap(x)
 74 | 
 75 |         return F.interpolate(pool, (h,w), **self._up_kwargs)
 76 | 
 77 | class ASPP_Module(nn.Module):
 78 |     def __init__(self, in_channels, atrous_rates, norm_layer, up_kwargs):
 79 |         super(ASPP_Module, self).__init__()
 80 |         out_channels = in_channels // 8
 81 |         rate1, rate2, rate3 = tuple(atrous_rates)
 82 |         self.b0 = nn.Sequential(
 83 |             nn.Conv2d(in_channels, out_channels, 1, bias=False),
 84 |             norm_layer(out_channels),
 85 |             nn.ReLU(True))
 86 |         self.b1 = ASPPConv(in_channels, out_channels, rate1, norm_layer)
 87 |         self.b2 = ASPPConv(in_channels, out_channels, rate2, norm_layer)
 88 |         self.b3 = ASPPConv(in_channels, out_channels, rate3, norm_layer)
 89 |         self.b4 = AsppPooling(in_channels, out_channels, norm_layer, up_kwargs)
 90 | 
 91 |         self.project = nn.Sequential(
 92 |             nn.Conv2d(5*out_channels, out_channels, 1, bias=False),
 93 |             norm_layer(out_channels),
 94 |             nn.ReLU(True),
 95 |             nn.Dropout2d(0.5, False))
 96 | 
 97 |     def forward(self, x):
 98 |         feat0 = self.b0(x)
 99 |         feat1 = self.b1(x)
100 |         feat2 = self.b2(x)
101 |         feat3 = self.b3(x)
102 |         feat4 = self.b4(x)
103 | 
104 |         y = torch.cat((feat0, feat1, feat2, feat3, feat4), 1)
105 | 
106 |         return self.project(y)
107 | 
108 | 
109 | def get_deeplab(dataset='pascal_voc', backbone='resnet50', pretrained=False,
110 |                 root='~/.encoding/models', **kwargs):
111 |     # infer number of classes
112 |     from ..datasets import datasets
113 |     model = DeepLabV3(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs)
114 |     if pretrained:
115 |         raise NotImplementedError
116 | 
117 |     return model
118 | 


--------------------------------------------------------------------------------
/encoding/models/encnet.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | # Created by: Hang Zhang 
  3 | # Email: zhang.hang@rutgers.edu 
  4 | # Copyright (c) 2017
  5 | ###########################################################################
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | import encoding
 12 | 
 13 | from .base import BaseNet
 14 | from .fcn import FCNHead
 15 | 
 16 | __all__ = ['EncNet', 'EncModule', 'get_encnet', 'get_encnet_resnet50_pcontext',
 17 |            'get_encnet_resnet101_pcontext', 'get_encnet_resnet50_ade',
 18 |            'get_encnet_resnet101_ade']
 19 | 
 20 | class EncNet(BaseNet):
 21 |     def __init__(self, nclass, backbone, aux=True, se_loss=True,
 22 |                  norm_layer=nn.BatchNorm2d, **kwargs):
 23 |         super(EncNet, self).__init__(nclass, backbone, aux, se_loss,
 24 |                                      norm_layer=norm_layer, **kwargs)
 25 |         self.head = EncHead([512, 1024, 2048], self.nclass, se_loss=se_loss, jpu=kwargs['jpu'],
 26 |                             lateral=kwargs['lateral'], norm_layer=norm_layer,
 27 |                             up_kwargs=self._up_kwargs)
 28 |         if aux:
 29 |             self.auxlayer = FCNHead(1024, nclass, norm_layer=norm_layer)
 30 | 
 31 |     def forward(self, x):
 32 |         imsize = x.size()[2:]
 33 |         features = self.base_forward(x)
 34 | 
 35 |         x = list(self.head(*features))
 36 |         x[0] = F.interpolate(x[0], imsize, **self._up_kwargs)
 37 |         if self.aux:
 38 |             auxout = self.auxlayer(features[2])
 39 |             auxout = F.interpolate(auxout, imsize, **self._up_kwargs)
 40 |             x.append(auxout)
 41 |         return tuple(x)
 42 | 
 43 | 
 44 | class EncModule(nn.Module):
 45 |     def __init__(self, in_channels, nclass, ncodes=32, se_loss=True, norm_layer=None):
 46 |         super(EncModule, self).__init__()
 47 |         self.se_loss = se_loss
 48 |         self.encoding = nn.Sequential(
 49 |             nn.Conv2d(in_channels, in_channels, 1, bias=False),
 50 |             norm_layer(in_channels),
 51 |             nn.ReLU(inplace=True),
 52 |             encoding.nn.Encoding(D=in_channels, K=ncodes),
 53 |             norm_layer(ncodes),
 54 |             nn.ReLU(inplace=True),
 55 |             encoding.nn.Mean(dim=1))
 56 |         self.fc = nn.Sequential(
 57 |             nn.Linear(in_channels, in_channels),
 58 |             nn.Sigmoid())
 59 |         if self.se_loss:
 60 |             self.selayer = nn.Linear(in_channels, nclass)
 61 | 
 62 |     def forward(self, x):
 63 |         en = self.encoding(x)
 64 |         b, c, _, _ = x.size()
 65 |         gamma = self.fc(en)
 66 |         y = gamma.view(b, c, 1, 1)
 67 |         outputs = [F.relu_(x + x * y)]
 68 |         if self.se_loss:
 69 |             outputs.append(self.selayer(en))
 70 |         return tuple(outputs)
 71 | 
 72 | 
 73 | class EncHead(nn.Module):
 74 |     def __init__(self, in_channels, out_channels, se_loss=True, jpu=True, lateral=False,
 75 |                  norm_layer=None, up_kwargs=None):
 76 |         super(EncHead, self).__init__()
 77 |         self.se_loss = se_loss
 78 |         self.lateral = lateral
 79 |         self.up_kwargs = up_kwargs
 80 |         self.conv5 = nn.Sequential(nn.Conv2d(in_channels[-1], 512, 1, bias=False),
 81 |                                    norm_layer(512),
 82 |                                    nn.ReLU(inplace=True)) if jpu else \
 83 |                      nn.Sequential(nn.Conv2d(in_channels[-1], 512, 3, padding=1, bias=False),
 84 |                                    norm_layer(512),
 85 |                                    nn.ReLU(inplace=True))
 86 |         if lateral:
 87 |             self.connect = nn.ModuleList([
 88 |                 nn.Sequential(
 89 |                     nn.Conv2d(in_channels[0], 512, kernel_size=1, bias=False),
 90 |                     norm_layer(512),
 91 |                     nn.ReLU(inplace=True)),
 92 |                 nn.Sequential(
 93 |                     nn.Conv2d(in_channels[1], 512, kernel_size=1, bias=False),
 94 |                     norm_layer(512),
 95 |                     nn.ReLU(inplace=True)),
 96 |             ])
 97 |             self.fusion = nn.Sequential(
 98 |                     nn.Conv2d(3*512, 512, kernel_size=3, padding=1, bias=False),
 99 |                     norm_layer(512),
100 |                     nn.ReLU(inplace=True))
101 |         self.encmodule = EncModule(512, out_channels, ncodes=32,
102 |             se_loss=se_loss, norm_layer=norm_layer)
103 |         self.conv6 = nn.Sequential(nn.Dropout2d(0.1, False),
104 |                                    nn.Conv2d(512, out_channels, 1))
105 | 
106 |     def forward(self, *inputs):
107 |         feat = self.conv5(inputs[-1])
108 |         if self.lateral:
109 |             c2 = self.connect[0](inputs[1])
110 |             c3 = self.connect[1](inputs[2])
111 |             feat = self.fusion(torch.cat([feat, c2, c3], 1))
112 |         outs = list(self.encmodule(feat))
113 |         outs[0] = self.conv6(outs[0])
114 |         return tuple(outs)
115 | 
116 | 
117 | def get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False,
118 |                root='~/.encoding/models', **kwargs):
119 |     r"""EncNet model from the paper `"Context Encoding for Semantic Segmentation"
120 |     <https://arxiv.org/pdf/1803.08904.pdf>`_
121 | 
122 |     Parameters
123 |     ----------
124 |     dataset : str, default pascal_voc
125 |         The dataset that model pretrained on. (pascal_voc, ade20k)
126 |     backbone : str, default resnet50
127 |         The backbone network. (resnet50, 101, 152)
128 |     pretrained : bool, default False
129 |         Whether to load the pretrained weights for model.
130 |     root : str, default '~/.encoding/models'
131 |         Location for keeping the model parameters.
132 | 
133 | 
134 |     Examples
135 |     --------
136 |     >>> model = get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False)
137 |     >>> print(model)
138 |     """
139 |     acronyms = {
140 |         'pascal_voc': 'voc',
141 |         'ade20k': 'ade',
142 |         'pcontext': 'pcontext',
143 |     }
144 |     # infer number of classes
145 |     from ..datasets import datasets
146 |     model = EncNet(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs)
147 |     if pretrained:
148 |         from .model_store import get_model_file
149 |         model.load_state_dict(torch.load(
150 |             get_model_file('encnet_%s_%s'%(backbone, acronyms[dataset]), root=root)))
151 |     return model
152 | 
153 | def get_encnet_resnet50_pcontext(pretrained=False, root='~/.encoding/models', **kwargs):
154 |     r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation"
155 |     <https://arxiv.org/pdf/1803.08904.pdf>`_
156 | 
157 |     Parameters
158 |     ----------
159 |     pretrained : bool, default False
160 |         Whether to load the pretrained weights for model.
161 |     root : str, default '~/.encoding/models'
162 |         Location for keeping the model parameters.
163 | 
164 | 
165 |     Examples
166 |     --------
167 |     >>> model = get_encnet_resnet50_pcontext(pretrained=True)
168 |     >>> print(model)
169 |     """
170 |     return get_encnet('pcontext', 'resnet50', pretrained, root=root, aux=True,
171 |                       base_size=520, crop_size=480, **kwargs)
172 | 
173 | def get_encnet_resnet101_pcontext(pretrained=False, root='~/.encoding/models', **kwargs):
174 |     r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation"
175 |     <https://arxiv.org/pdf/1803.08904.pdf>`_
176 | 
177 |     Parameters
178 |     ----------
179 |     pretrained : bool, default False
180 |         Whether to load the pretrained weights for model.
181 |     root : str, default '~/.encoding/models'
182 |         Location for keeping the model parameters.
183 | 
184 | 
185 |     Examples
186 |     --------
187 |     >>> model = get_encnet_resnet101_pcontext(pretrained=True)
188 |     >>> print(model)
189 |     """
190 |     return get_encnet('pcontext', 'resnet101', pretrained, root=root, aux=True,
191 |                       base_size=520, crop_size=480, lateral=True, **kwargs)
192 | 
193 | def get_encnet_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs):
194 |     r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation"
195 |     <https://arxiv.org/pdf/1803.08904.pdf>`_
196 | 
197 |     Parameters
198 |     ----------
199 |     pretrained : bool, default False
200 |         Whether to load the pretrained weights for model.
201 |     root : str, default '~/.encoding/models'
202 |         Location for keeping the model parameters.
203 | 
204 | 
205 |     Examples
206 |     --------
207 |     >>> model = get_encnet_resnet50_ade(pretrained=True)
208 |     >>> print(model)
209 |     """
210 |     return get_encnet('ade20k', 'resnet50', pretrained, root=root, aux=True,
211 |                       base_size=520, crop_size=480, **kwargs)
212 | 
213 | def get_encnet_resnet101_ade(pretrained=False, root='~/.encoding/models', **kwargs):
214 |     r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation"
215 |     <https://arxiv.org/pdf/1803.08904.pdf>`_
216 | 
217 |     Parameters
218 |     ----------
219 |     pretrained : bool, default False
220 |         Whether to load the pretrained weights for model.
221 |     root : str, default '~/.encoding/models'
222 |         Location for keeping the model parameters.
223 | 
224 | 
225 |     Examples
226 |     --------
227 |     >>> model = get_encnet_resnet50_ade(pretrained=True)
228 |     >>> print(model)
229 |     """
230 |     return get_encnet('ade20k', 'resnet101', pretrained, root=root, aux=True,
231 |                       base_size=640, crop_size=576, lateral=True, **kwargs)
232 | 
233 | def get_encnet_resnet152_ade(pretrained=False, root='~/.encoding/models', **kwargs):
234 |     r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation"
235 |     <https://arxiv.org/pdf/1803.08904.pdf>`_
236 | 
237 |     Parameters
238 |     ----------
239 |     pretrained : bool, default False
240 |         Whether to load the pretrained weights for model.
241 |     root : str, default '~/.encoding/models'
242 |         Location for keeping the model parameters.
243 | 
244 | 
245 |     Examples
246 |     --------
247 |     >>> model = get_encnet_resnet50_ade(pretrained=True)
248 |     >>> print(model)
249 |     """
250 |     return get_encnet('ade20k', 'resnet152', pretrained, root=root, aux=True,
251 |                       base_size=520, crop_size=480, **kwargs)
252 | 


--------------------------------------------------------------------------------
/encoding/models/fcn.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | # Created by: Hang Zhang 
  3 | # Email: zhang.hang@rutgers.edu 
  4 | # Copyright (c) 2017
  5 | ###########################################################################
  6 | from __future__ import division
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | 
 11 | from torch.nn.functional import interpolate
 12 | 
 13 | from .base import BaseNet
 14 | 
 15 | __all__ = ['FCN', 'get_fcn', 'get_fcn_resnet50_pcontext', 'get_fcn_resnet50_ade']
 16 | 
 17 | class FCN(BaseNet):
 18 |     r"""Fully Convolutional Networks for Semantic Segmentation
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     nclass : int
 23 |         Number of categories for the training dataset.
 24 |     backbone : string
 25 |         Pre-trained dilated backbone network type (default:'resnet50'; 'resnet50',
 26 |         'resnet101' or 'resnet152').
 27 |     norm_layer : object
 28 |         Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`;
 29 | 
 30 | 
 31 |     Reference:
 32 | 
 33 |         Long, Jonathan, Evan Shelhamer, and Trevor Darrell. "Fully convolutional networks
 34 |         for semantic segmentation." *CVPR*, 2015
 35 | 
 36 |     Examples
 37 |     --------
 38 |     >>> model = FCN(nclass=21, backbone='resnet50')
 39 |     >>> print(model)
 40 |     """
 41 |     def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs):
 42 |         super(FCN, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs)
 43 |         self.head = FCNHead(2048, nclass, norm_layer)
 44 |         if aux:
 45 |             self.auxlayer = FCNHead(1024, nclass, norm_layer)
 46 | 
 47 |     def forward(self, x):
 48 |         imsize = x.size()[2:]
 49 |         _, _, c3, c4 = self.base_forward(x)
 50 | 
 51 |         x = self.head(c4)
 52 |         x = interpolate(x, imsize, **self._up_kwargs)
 53 |         outputs = [x]
 54 |         if self.aux:
 55 |             auxout = self.auxlayer(c3)
 56 |             auxout = interpolate(auxout, imsize, **self._up_kwargs)
 57 |             outputs.append(auxout)
 58 |         return tuple(outputs)
 59 | 
 60 |         
 61 | class FCNHead(nn.Module):
 62 |     def __init__(self, in_channels, out_channels, norm_layer):
 63 |         super(FCNHead, self).__init__()
 64 |         inter_channels = in_channels // 4
 65 |         self.conv5 = nn.Sequential(nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
 66 |                                    norm_layer(inter_channels),
 67 |                                    nn.ReLU(),
 68 |                                    nn.Dropout2d(0.1, False),
 69 |                                    nn.Conv2d(inter_channels, out_channels, 1))
 70 | 
 71 |     def forward(self, x):
 72 |         return self.conv5(x)
 73 | 
 74 | 
 75 | def get_fcn(dataset='pascal_voc', backbone='resnet50', pretrained=False,
 76 |             root='~/.encoding/models', **kwargs):
 77 |     r"""FCN model from the paper `"Fully Convolutional Network for semantic segmentation"
 78 |     <https://people.eecs.berkeley.edu/~jonlong/long_shelhamer_fcn.pdf>`_
 79 |     Parameters
 80 |     ----------
 81 |     dataset : str, default pascal_voc
 82 |         The dataset that model pretrained on. (pascal_voc, ade20k)
 83 |     pretrained : bool, default False
 84 |         Whether to load the pretrained weights for model.
 85 |     root : str, default '~/.encoding/models'
 86 |         Location for keeping the model parameters.
 87 |     Examples
 88 |     --------
 89 |     >>> model = get_fcn(dataset='pascal_voc', backbone='resnet50', pretrained=False)
 90 |     >>> print(model)
 91 |     """
 92 |     acronyms = {
 93 |         'pascal_voc': 'voc',
 94 |         'pascal_aug': 'voc',
 95 |         'pcontext': 'pcontext',
 96 |         'ade20k': 'ade',
 97 |     }
 98 |     # infer number of classes
 99 |     from ..datasets import datasets
100 |     model = FCN(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs)
101 |     if pretrained:
102 |         from .model_store import get_model_file
103 |         model.load_state_dict(torch.load(
104 |             get_model_file('fcn_%s_%s'%(backbone, acronyms[dataset]), root=root)))
105 |     return model
106 | 
107 | def get_fcn_resnet50_pcontext(pretrained=False, root='~/.encoding/models', **kwargs):
108 |     r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation"
109 |     <https://arxiv.org/pdf/1803.08904.pdf>`_
110 | 
111 |     Parameters
112 |     ----------
113 |     pretrained : bool, default False
114 |         Whether to load the pretrained weights for model.
115 |     root : str, default '~/.encoding/models'
116 |         Location for keeping the model parameters.
117 | 
118 | 
119 |     Examples
120 |     --------
121 |     >>> model = get_fcn_resnet50_pcontext(pretrained=True)
122 |     >>> print(model)
123 |     """
124 |     return get_fcn('pcontext', 'resnet50', pretrained, root=root, aux=False, **kwargs)
125 | 
126 | def get_fcn_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs):
127 |     r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation"
128 |     <https://arxiv.org/pdf/1803.08904.pdf>`_
129 | 
130 |     Parameters
131 |     ----------
132 |     pretrained : bool, default False
133 |         Whether to load the pretrained weights for model.
134 |     root : str, default '~/.encoding/models'
135 |         Location for keeping the model parameters.
136 | 
137 | 
138 |     Examples
139 |     --------
140 |     >>> model = get_fcn_resnet50_ade(pretrained=True)
141 |     >>> print(model)
142 |     """
143 |     return get_fcn('ade20k', 'resnet50', pretrained, root=root, **kwargs)
144 | 


--------------------------------------------------------------------------------
/encoding/models/model_store.py:
--------------------------------------------------------------------------------
  1 | """Model store which provides pretrained models."""
  2 | from __future__ import print_function
  3 | 
  4 | import os
  5 | import zipfile
  6 | 
  7 | from ..utils import download, check_sha1
  8 | 
  9 | __all__ = ['get_model_file', 'purge']
 10 | 
 11 | _model_sha1 = {name: checksum for checksum, name in [
 12 |     ('ebb6acbbd1d1c90b7f446ae59d30bf70c74febc1', 'resnet50'),
 13 |     ('2a57e44de9c853fa015b172309a1ee7e2d0e4e2a', 'resnet101'),
 14 |     ('0d43d698c66aceaa2bc0309f55efdd7ff4b143af', 'resnet152'),
 15 |     ('662e979de25a389f11c65e9f1df7e06c2c356381', 'fcn_resnet50_ade'),
 16 |     ('eeed8e582f0fdccdba8579e7490570adc6d85c7c', 'fcn_resnet50_pcontext'),
 17 |     ('54f70c772505064e30efd1ddd3a14e1759faa363', 'psp_resnet50_ade'),
 18 |     ('075195c5237b778c718fd73ceddfa1376c18dfd0', 'deeplab_resnet50_ade'),
 19 |     ('5ee47ee28b480cc781a195d13b5806d5bbc616bf', 'encnet_resnet101_coco'),
 20 |     ('4de91d5922d4d3264f678b663f874da72e82db00', 'encnet_resnet50_pcontext'),
 21 |     ('9f27ea13d514d7010e59988341bcbd4140fcc33d', 'encnet_resnet101_pcontext'),
 22 |     ('07ac287cd77e53ea583f37454e17d30ce1509a4a', 'encnet_resnet50_ade'),
 23 |     ('3f54fa3b67bac7619cd9b3673f5c8227cf8f4718', 'encnet_resnet101_ade'),
 24 |     ]}
 25 | 
 26 | encoding_repo_url = 'https://hangzh.s3.amazonaws.com/'
 27 | _url_format = '{repo_url}encoding/models/{file_name}.zip'
 28 | 
 29 | def short_hash(name):
 30 |     if name not in _model_sha1:
 31 |         raise ValueError('Pretrained model for {name} is not available.'.format(name=name))
 32 |     return _model_sha1[name][:8]
 33 | 
 34 | def get_model_file(name, root=os.path.join('~', '.encoding', 'models')):
 35 |     r"""Return location for the pretrained on local file system.
 36 | 
 37 |     This function will download from online model zoo when model cannot be found or has mismatch.
 38 |     The root directory will be created if it doesn't exist.
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     name : str
 43 |         Name of the model.
 44 |     root : str, default '~/.encoding/models'
 45 |         Location for keeping the model parameters.
 46 | 
 47 |     Returns
 48 |     -------
 49 |     file_path
 50 |         Path to the requested pretrained model file.
 51 |     """
 52 |     file_name = '{name}-{short_hash}'.format(name=name, short_hash=short_hash(name))
 53 |     root = os.path.expanduser(root)
 54 |     file_path = os.path.join(root, file_name+'.pth')
 55 |     sha1_hash = _model_sha1[name]
 56 |     if os.path.exists(file_path):
 57 |         if check_sha1(file_path, sha1_hash):
 58 |             return file_path
 59 |         else:
 60 |             print('Mismatch in the content of model file {} detected.' +
 61 |                   ' Downloading again.'.format(file_path))
 62 |     else:
 63 |         print('Model file {} is not found. Downloading.'.format(file_path))
 64 | 
 65 |     if not os.path.exists(root):
 66 |         os.makedirs(root)
 67 | 
 68 |     zip_file_path = os.path.join(root, file_name+'.zip')
 69 |     repo_url = os.environ.get('ENCODING_REPO', encoding_repo_url)
 70 |     if repo_url[-1] != '/':
 71 |         repo_url = repo_url + '/'
 72 |     download(_url_format.format(repo_url=repo_url, file_name=file_name),
 73 |              path=zip_file_path,
 74 |              overwrite=True)
 75 |     with zipfile.ZipFile(zip_file_path) as zf:
 76 |         zf.extractall(root)
 77 |     os.remove(zip_file_path)
 78 | 
 79 |     if check_sha1(file_path, sha1_hash):
 80 |         return file_path
 81 |     else:
 82 |         raise ValueError('Downloaded file has different hash. Please try again.')
 83 | 
 84 | def purge(root=os.path.join('~', '.encoding', 'models')):
 85 |     r"""Purge all pretrained model files in local file store.
 86 | 
 87 |     Parameters
 88 |     ----------
 89 |     root : str, default '~/.encoding/models'
 90 |         Location for keeping the model parameters.
 91 |     """
 92 |     root = os.path.expanduser(root)
 93 |     files = os.listdir(root)
 94 |     for f in files:
 95 |         if f.endswith(".pth"):
 96 |             os.remove(os.path.join(root, f))
 97 | 
 98 | def pretrained_model_list():
 99 |     return list(_model_sha1.keys())
100 | 


--------------------------------------------------------------------------------
/encoding/models/model_zoo.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=wildcard-import, unused-wildcard-import
 2 | from .fcn import *
 3 | from .psp import *
 4 | from .encnet import *
 5 | 
 6 | __all__ = ['get_model']
 7 | 
 8 | 
 9 | def get_model(name, **kwargs):
10 |     """Returns a pre-defined model by name
11 | 
12 |     Parameters
13 |     ----------
14 |     name : str
15 |         Name of the model.
16 |     pretrained : bool
17 |         Whether to load the pretrained weights for model.
18 |     root : str, default '~/.encoding/models'
19 |         Location for keeping the model parameters.
20 | 
21 |     Returns
22 |     -------
23 |     Module:
24 |         The model.
25 |     """
26 |     models = {
27 |         'fcn_resnet50_pcontext': get_fcn_resnet50_pcontext,
28 |         'encnet_resnet50_pcontext': get_encnet_resnet50_pcontext,
29 |         'encnet_resnet101_pcontext': get_encnet_resnet101_pcontext,
30 |         'encnet_resnet50_ade': get_encnet_resnet50_ade,
31 |         'encnet_resnet101_ade': get_encnet_resnet101_ade,
32 |         'fcn_resnet50_ade': get_fcn_resnet50_ade,
33 |         'psp_resnet50_ade': get_psp_resnet50_ade,
34 |         }
35 |     name = name.lower()
36 |     if name not in models:
37 |         raise ValueError('%s\n\t%s' % (str(name), '\n\t'.join(sorted(models.keys()))))
38 |     net = models[name](**kwargs)
39 |     return net
40 | 


--------------------------------------------------------------------------------
/encoding/models/psp.py:
--------------------------------------------------------------------------------
 1 | ###########################################################################
 2 | # Created by: Hang Zhang 
 3 | # Email: zhang.hang@rutgers.edu 
 4 | # Copyright (c) 2017
 5 | ###########################################################################
 6 | from __future__ import division
 7 | 
 8 | import torch
 9 | import torch.nn as nn
10 | 
11 | from torch.nn.functional import interpolate
12 | 
13 | from .base import BaseNet
14 | from .fcn import FCNHead
15 | from ..nn import PyramidPooling
16 | 
17 | class PSP(BaseNet):
18 |     def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs):
19 |         super(PSP, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs)
20 |         self.head = PSPHead(2048, nclass, norm_layer, self._up_kwargs)
21 |         if aux:
22 |             self.auxlayer = FCNHead(1024, nclass, norm_layer)
23 | 
24 |     def forward(self, x):
25 |         _, _, h, w = x.size()
26 |         _, _, c3, c4 = self.base_forward(x)
27 | 
28 |         outputs = []
29 |         x = self.head(c4)
30 |         x = interpolate(x, (h,w), **self._up_kwargs)
31 |         outputs.append(x)
32 |         if self.aux:
33 |             auxout = self.auxlayer(c3)
34 |             auxout = interpolate(auxout, (h,w), **self._up_kwargs)
35 |             outputs.append(auxout)
36 |         return tuple(outputs)
37 | 
38 | 
39 | class PSPHead(nn.Module):
40 |     def __init__(self, in_channels, out_channels, norm_layer, up_kwargs):
41 |         super(PSPHead, self).__init__()
42 |         inter_channels = in_channels // 4
43 |         self.conv5 = nn.Sequential(PyramidPooling(in_channels, norm_layer, up_kwargs),
44 |                                    nn.Conv2d(in_channels * 2, inter_channels, 3, padding=1, bias=False),
45 |                                    norm_layer(inter_channels),
46 |                                    nn.ReLU(True),
47 |                                    nn.Dropout2d(0.1, False),
48 |                                    nn.Conv2d(inter_channels, out_channels, 1))
49 | 
50 |     def forward(self, x):
51 |         return self.conv5(x)
52 | 
53 | def get_psp(dataset='pascal_voc', backbone='resnet50', pretrained=False,
54 |             root='~/.encoding/models', **kwargs):
55 |     acronyms = {
56 |         'pascal_voc': 'voc',
57 |         'pascal_aug': 'voc',
58 |         'ade20k': 'ade',
59 |     }
60 |     # infer number of classes
61 |     from ..datasets import datasets
62 |     model = PSP(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs)
63 |     if pretrained:
64 |         from .model_store import get_model_file
65 |         model.load_state_dict(torch.load(
66 |             get_model_file('psp_%s_%s'%(backbone, acronyms[dataset]), root=root)))
67 |     return model
68 | 
69 | def get_psp_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs):
70 |     r"""PSP model from the paper `"Context Encoding for Semantic Segmentation"
71 |     <https://arxiv.org/pdf/1803.08904.pdf>`_
72 | 
73 |     Parameters
74 |     ----------
75 |     pretrained : bool, default False
76 |         Whether to load the pretrained weights for model.
77 |     root : str, default '~/.encoding/models'
78 |         Location for keeping the model parameters.
79 | 
80 | 
81 |     Examples
82 |     --------
83 |     >>> model = get_psp_resnet50_ade(pretrained=True)
84 |     >>> print(model)
85 |     """
86 |     return get_psp('ade20k', 'resnet50', pretrained, root=root, **kwargs)
87 | 


--------------------------------------------------------------------------------
/encoding/nn/__init__.py:
--------------------------------------------------------------------------------
 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 | ## Created by: Hang Zhang
 3 | ## ECE Department, Rutgers University
 4 | ## Email: zhang.hang@rutgers.edu
 5 | ## Copyright (c) 2017
 6 | ##
 7 | ## This source code is licensed under the MIT-style license found in the
 8 | ## LICENSE file in the root directory of this source tree
 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 | 
11 | """Encoding NN Modules"""
12 | from .syncbn import *
13 | from .encoding import *
14 | from .customize import *
15 | 


--------------------------------------------------------------------------------
/encoding/nn/comm.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # File   : comm.py
  3 | # Author : Jiayuan Mao
  4 | # Email  : maojiayuan@gmail.com
  5 | # Date   : 27/01/2018
  6 | # 
  7 | # This file is part of Synchronized-BatchNorm-PyTorch.
  8 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
  9 | # Distributed under MIT License.
 10 | 
 11 | import queue
 12 | import collections
 13 | 
 14 | import threading
 15 | 
 16 | __all__ = ['FutureResult', 'SlavePipe', 'SyncMaster']
 17 | 
 18 | 
 19 | class FutureResult(object):
 20 |     """A thread-safe future implementation. Used only as one-to-one pipe."""
 21 | 
 22 |     def __init__(self):
 23 |         self._result = None
 24 |         self._lock = threading.Lock()
 25 |         self._cond = threading.Condition(self._lock)
 26 | 
 27 |     def put(self, result):
 28 |         with self._lock:
 29 |             assert self._result is None, 'Previous result has\'t been fetched.'
 30 |             self._result = result
 31 |             self._cond.notify()
 32 | 
 33 |     def get(self):
 34 |         with self._lock:
 35 |             if self._result is None:
 36 |                 self._cond.wait()
 37 | 
 38 |             res = self._result
 39 |             self._result = None
 40 |             return res
 41 | 
 42 | 
 43 | _MasterRegistry = collections.namedtuple('MasterRegistry', ['result'])
 44 | _SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result'])
 45 | 
 46 | 
 47 | class SlavePipe(_SlavePipeBase):
 48 |     """Pipe for master-slave communication."""
 49 | 
 50 |     def run_slave(self, msg):
 51 |         self.queue.put((self.identifier, msg))
 52 |         ret = self.result.get()
 53 |         self.queue.put(True)
 54 |         return ret
 55 | 
 56 | 
 57 | class SyncMaster(object):
 58 |     """An abstract `SyncMaster` object.
 59 | 
 60 |     - During the replication, as the data parallel will trigger an callback of each module, all slave devices should
 61 |     call `register(id)` and obtain an `SlavePipe` to communicate with the master.
 62 |     - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected,
 63 |     and passed to a registered callback.
 64 |     - After receiving the messages, the master device should gather the information and determine to message passed
 65 |     back to each slave devices.
 66 |     """
 67 | 
 68 |     def __init__(self, master_callback):
 69 |         """
 70 | 
 71 |         Args:
 72 |             master_callback: a callback to be invoked after having collected messages from slave devices.
 73 |         """
 74 |         self._master_callback = master_callback
 75 |         self._queue = queue.Queue()
 76 |         self._registry = collections.OrderedDict()
 77 |         self._activated = False
 78 | 
 79 |     def register_slave(self, identifier):
 80 |         """
 81 |         Register an slave device.
 82 | 
 83 |         Args:
 84 |             identifier: an identifier, usually is the device id.
 85 | 
 86 |         Returns: a `SlavePipe` object which can be used to communicate with the master device.
 87 | 
 88 |         """
 89 |         if self._activated:
 90 |             assert self._queue.empty(), 'Queue is not clean before next initialization.'
 91 |             self._activated = False
 92 |             self._registry.clear()
 93 |         future = FutureResult()
 94 |         self._registry[identifier] = _MasterRegistry(future)
 95 |         return SlavePipe(identifier, self._queue, future)
 96 | 
 97 |     def run_master(self, master_msg):
 98 |         """
 99 |         Main entry for the master device in each forward pass.
100 |         The messages were first collected from each devices (including the master device), and then
101 |         an callback will be invoked to compute the message to be sent back to each devices
102 |         (including the master device).
103 | 
104 |         Args:
105 |             master_msg: the message that the master want to send to itself. This will be placed as the first
106 |             message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example.
107 | 
108 |         Returns: the message to be sent back to the master device.
109 | 
110 |         """
111 |         self._activated = True
112 | 
113 |         intermediates = [(0, master_msg)]
114 |         for i in range(self.nr_slaves):
115 |             intermediates.append(self._queue.get())
116 | 
117 |         results = self._master_callback(intermediates)
118 |         assert results[0][0] == 0, 'The first result should belongs to the master.'
119 | 
120 |         for i, res in results:
121 |             if i == 0:
122 |                 continue
123 |             self._registry[i].result.put(res)
124 | 
125 |         for i in range(self.nr_slaves):
126 |             assert self._queue.get() is True
127 | 
128 |         return results[0][1]
129 | 
130 |     @property
131 |     def nr_slaves(self):
132 |         return len(self._registry)
133 | 


--------------------------------------------------------------------------------
/encoding/nn/customize.py:
--------------------------------------------------------------------------------
  1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2 | ## Created by: Hang Zhang
  3 | ## ECE Department, Rutgers University
  4 | ## Email: zhang.hang@rutgers.edu
  5 | ## Copyright (c) 2017
  6 | ##
  7 | ## This source code is licensed under the MIT-style license found in the
  8 | ## LICENSE file in the root directory of this source tree
  9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 | 
 11 | """Encoding Custermized NN Module"""
 12 | import torch
 13 | import torch.nn as nn
 14 | 
 15 | from torch.nn import functional as F
 16 | from torch.nn import Module, Sequential, Conv2d, ReLU, AdaptiveAvgPool2d, BCELoss, CrossEntropyLoss
 17 | 
 18 | from torch.autograd import Variable
 19 | 
 20 | torch_ver = torch.__version__[:3]
 21 | 
 22 | __all__ = ['SegmentationLosses', 'PyramidPooling', 'JPU', 'JPU_X', 'Mean']
 23 | 
 24 | class SegmentationLosses(CrossEntropyLoss):
 25 |     """2D Cross Entropy Loss with Auxilary Loss"""
 26 |     def __init__(self, se_loss=False, se_weight=0.2, nclass=-1,
 27 |                  aux=False, aux_weight=0.4, weight=None,
 28 |                  size_average=True, ignore_index=-1, reduction='mean'):
 29 |         super(SegmentationLosses, self).__init__(weight, ignore_index=ignore_index, reduction=reduction)
 30 |         self.se_loss = se_loss
 31 |         self.aux = aux
 32 |         self.nclass = nclass
 33 |         self.se_weight = se_weight
 34 |         self.aux_weight = aux_weight
 35 |         self.bceloss = BCELoss(weight, reduction=reduction)
 36 | 
 37 |     def forward(self, *inputs):
 38 |         if not self.se_loss and not self.aux:
 39 |             return super(SegmentationLosses, self).forward(*inputs)
 40 |         elif not self.se_loss:
 41 |             pred1, pred2, target = tuple(inputs)
 42 |             loss1 = super(SegmentationLosses, self).forward(pred1, target)
 43 |             loss2 = super(SegmentationLosses, self).forward(pred2, target)
 44 |             return loss1 + self.aux_weight * loss2
 45 |         elif not self.aux:
 46 |             pred, se_pred, target = tuple(inputs)
 47 |             se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred)
 48 |             loss1 = super(SegmentationLosses, self).forward(pred, target)
 49 |             loss2 = self.bceloss(torch.sigmoid(se_pred), se_target)
 50 |             return loss1 + self.se_weight * loss2
 51 |         else:
 52 |             pred1, se_pred, pred2, target = tuple(inputs)
 53 |             se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred1)
 54 |             loss1 = super(SegmentationLosses, self).forward(pred1, target)
 55 |             loss2 = super(SegmentationLosses, self).forward(pred2, target)
 56 |             loss3 = self.bceloss(torch.sigmoid(se_pred), se_target)
 57 |             return loss1 + self.aux_weight * loss2 + self.se_weight * loss3
 58 | 
 59 |     @staticmethod
 60 |     def _get_batch_label_vector(target, nclass):
 61 |         # target is a 3D Variable BxHxW, output is 2D BxnClass
 62 |         batch = target.size(0)
 63 |         tvect = Variable(torch.zeros(batch, nclass))
 64 |         for i in range(batch):
 65 |             hist = torch.histc(target[i].cpu().data.float(), 
 66 |                                bins=nclass, min=0,
 67 |                                max=nclass-1)
 68 |             vect = hist>0
 69 |             tvect[i] = vect
 70 |         return tvect
 71 | 
 72 | 
 73 | class Normalize(Module):
 74 |     r"""Performs :math:`L_p` normalization of inputs over specified dimension.
 75 | 
 76 |     Does:
 77 | 
 78 |     .. math::
 79 |         v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}
 80 | 
 81 |     for each subtensor v over dimension dim of input. Each subtensor is
 82 |     flattened into a vector, i.e. :math:`\lVert v \rVert_p` is not a matrix
 83 |     norm.
 84 | 
 85 |     With default arguments normalizes over the second dimension with Euclidean
 86 |     norm.
 87 | 
 88 |     Args:
 89 |         p (float): the exponent value in the norm formulation. Default: 2
 90 |         dim (int): the dimension to reduce. Default: 1
 91 |     """
 92 |     def __init__(self, p=2, dim=1):
 93 |         super(Normalize, self).__init__()
 94 |         self.p = p
 95 |         self.dim = dim
 96 | 
 97 |     def forward(self, x):
 98 |         return F.normalize(x, self.p, self.dim, eps=1e-8)
 99 | 
100 | 
101 | class PyramidPooling(Module):
102 |     """
103 |     Reference:
104 |         Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
105 |     """
106 |     def __init__(self, in_channels, norm_layer, up_kwargs):
107 |         super(PyramidPooling, self).__init__()
108 |         self.pool1 = AdaptiveAvgPool2d(1)
109 |         self.pool2 = AdaptiveAvgPool2d(2)
110 |         self.pool3 = AdaptiveAvgPool2d(3)
111 |         self.pool4 = AdaptiveAvgPool2d(6)
112 | 
113 |         out_channels = int(in_channels/4)
114 |         self.conv1 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False),
115 |                                 norm_layer(out_channels),
116 |                                 ReLU(True))
117 |         self.conv2 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False),
118 |                                 norm_layer(out_channels),
119 |                                 ReLU(True))
120 |         self.conv3 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False),
121 |                                 norm_layer(out_channels),
122 |                                 ReLU(True))
123 |         self.conv4 = Sequential(Conv2d(in_channels, out_channels, 1, bias=False),
124 |                                 norm_layer(out_channels),
125 |                                 ReLU(True))
126 |         # bilinear upsample options
127 |         self._up_kwargs = up_kwargs
128 | 
129 |     def forward(self, x):
130 |         _, _, h, w = x.size()
131 |         feat1 = F.interpolate(self.conv1(self.pool1(x)), (h, w), **self._up_kwargs)
132 |         feat2 = F.interpolate(self.conv2(self.pool2(x)), (h, w), **self._up_kwargs)
133 |         feat3 = F.interpolate(self.conv3(self.pool3(x)), (h, w), **self._up_kwargs)
134 |         feat4 = F.interpolate(self.conv4(self.pool4(x)), (h, w), **self._up_kwargs)
135 |         return torch.cat((x, feat1, feat2, feat3, feat4), 1)
136 | 
137 | 
138 | class SeparableConv2d(nn.Module):
139 |     def __init__(self, inplanes, planes, kernel_size=3, stride=1, padding=1, dilation=1, bias=False, norm_layer=nn.BatchNorm2d):
140 |         super(SeparableConv2d, self).__init__()
141 | 
142 |         self.conv1 = nn.Conv2d(inplanes, inplanes, kernel_size, stride, padding, dilation, groups=inplanes, bias=bias)
143 |         self.bn = norm_layer(inplanes)
144 |         self.pointwise = nn.Conv2d(inplanes, planes, 1, 1, 0, 1, 1, bias=bias)
145 | 
146 |     def forward(self, x):
147 |         x = self.conv1(x)
148 |         x = self.bn(x)
149 |         x = self.pointwise(x)
150 |         return x
151 | 
152 | 
153 | class JPU(nn.Module):
154 |     def __init__(self, in_channels, width=512, norm_layer=None, up_kwargs=None):
155 |         super(JPU, self).__init__()
156 |         self.up_kwargs = up_kwargs
157 | 
158 |         self.conv5 = nn.Sequential(
159 |             nn.Conv2d(in_channels[-1], width, 3, padding=1, bias=False),
160 |             norm_layer(width),
161 |             nn.ReLU(inplace=True))
162 |         self.conv4 = nn.Sequential(
163 |             nn.Conv2d(in_channels[-2], width, 3, padding=1, bias=False),
164 |             norm_layer(width),
165 |             nn.ReLU(inplace=True))
166 |         self.conv3 = nn.Sequential(
167 |             nn.Conv2d(in_channels[-3], width, 3, padding=1, bias=False),
168 |             norm_layer(width),
169 |             nn.ReLU(inplace=True))
170 | 
171 |         self.dilation1 = nn.Sequential(SeparableConv2d(3*width, width, kernel_size=3, padding=1, dilation=1, bias=False),
172 |                                        norm_layer(width),
173 |                                        nn.ReLU(inplace=True))
174 |         self.dilation2 = nn.Sequential(SeparableConv2d(3*width, width, kernel_size=3, padding=2, dilation=2, bias=False),
175 |                                        norm_layer(width),
176 |                                        nn.ReLU(inplace=True))
177 |         self.dilation3 = nn.Sequential(SeparableConv2d(3*width, width, kernel_size=3, padding=4, dilation=4, bias=False),
178 |                                        norm_layer(width),
179 |                                        nn.ReLU(inplace=True))
180 |         self.dilation4 = nn.Sequential(SeparableConv2d(3*width, width, kernel_size=3, padding=8, dilation=8, bias=False),
181 |                                        norm_layer(width),
182 |                                        nn.ReLU(inplace=True))
183 | 
184 |     def forward(self, *inputs):
185 |         feats = [self.conv5(inputs[-1]), self.conv4(inputs[-2]), self.conv3(inputs[-3])]
186 |         _, _, h, w = feats[-1].size()
187 |         feats[-2] = F.interpolate(feats[-2], (h, w), **self.up_kwargs)
188 |         feats[-3] = F.interpolate(feats[-3], (h, w), **self.up_kwargs)
189 |         feat = torch.cat(feats, dim=1)
190 |         feat = torch.cat([self.dilation1(feat), self.dilation2(feat), self.dilation3(feat), self.dilation4(feat)], dim=1)
191 | 
192 |         return inputs[0], inputs[1], inputs[2], feat
193 | 
194 | 
195 | class JUM(nn.Module):
196 |     def __init__(self, in_channels, width, dilation, norm_layer, up_kwargs):
197 |         super(JUM, self).__init__()
198 |         self.up_kwargs = up_kwargs
199 | 
200 |         self.conv_l = nn.Sequential(
201 |             nn.Conv2d(in_channels[-1], width, 3, padding=1, bias=False),
202 |             norm_layer(width),
203 |             nn.ReLU(inplace=True))
204 |         self.conv_h = nn.Sequential(
205 |             nn.Conv2d(in_channels[-2], width, 3, padding=1, bias=False),
206 |             norm_layer(width),
207 |             nn.ReLU(inplace=True))
208 | 
209 |         norm_layer = lambda n_channels: nn.GroupNorm(32, n_channels)
210 |         self.dilation1 = nn.Sequential(SeparableConv2d(2*width, width, kernel_size=3, padding=dilation, dilation=dilation, bias=False, norm_layer=norm_layer),
211 |                                        norm_layer(width),
212 |                                        nn.ReLU(inplace=True))
213 |         self.dilation2 = nn.Sequential(SeparableConv2d(2*width, width, kernel_size=3, padding=2*dilation, dilation=2*dilation, bias=False, norm_layer=norm_layer),
214 |                                        norm_layer(width),
215 |                                        nn.ReLU(inplace=True))
216 |         self.dilation3 = nn.Sequential(SeparableConv2d(2*width, width, kernel_size=3, padding=4*dilation, dilation=4*dilation, bias=False, norm_layer=norm_layer),
217 |                                        norm_layer(width),
218 |                                        nn.ReLU(inplace=True))
219 | 
220 |     def forward(self, x_l, x_h):
221 |         feats = [self.conv_l(x_l), self.conv_h(x_h)]
222 |         _, _, h, w = feats[-1].size()
223 |         feats[-2] = F.upsample(feats[-2], (h, w), **self.up_kwargs)
224 |         feat = torch.cat(feats, dim=1)
225 |         feat = torch.cat([feats[-2], self.dilation1(feat), self.dilation2(feat), self.dilation3(feat)], dim=1)
226 | 
227 |         return feat
228 | 
229 | class JPU_X(nn.Module):
230 |     def __init__(self, in_channels, width=512, norm_layer=None, up_kwargs=None):
231 |         super(JPU_X, self).__init__()
232 |         self.jum_1 = JUM(in_channels[:2], width//2, 1, norm_layer, up_kwargs)
233 |         self.jum_2 = JUM(in_channels[1:], width, 2, norm_layer, up_kwargs)
234 | 
235 |     def forward(self, *inputs):
236 |         feat = self.jum_1(inputs[2], inputs[1])
237 |         feat = self.jum_2(inputs[3], feat)
238 | 
239 |         return inputs[0], inputs[1], inputs[2], feat
240 | 
241 | 
242 | class Mean(Module):
243 |     def __init__(self, dim, keep_dim=False):
244 |         super(Mean, self).__init__()
245 |         self.dim = dim
246 |         self.keep_dim = keep_dim
247 | 
248 |     def forward(self, input):
249 |         return input.mean(self.dim, self.keep_dim)
250 | 


--------------------------------------------------------------------------------
/encoding/nn/encoding.py:
--------------------------------------------------------------------------------
  1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2 | ## Created by: Hang Zhang
  3 | ## ECE Department, Rutgers University
  4 | ## Email: zhang.hang@rutgers.edu
  5 | ## Copyright (c) 2017
  6 | ##
  7 | ## This source code is licensed under the MIT-style license found in the
  8 | ## LICENSE file in the root directory of this source tree
  9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 | 
 11 | """Encoding Package Core NN Modules."""
 12 | import torch
 13 | import torch.nn.functional as F
 14 | 
 15 | from torch.nn import Module, Parameter
 16 | 
 17 | from ..functions import scaled_l2, aggregate
 18 | 
 19 | __all__ = ['Encoding']
 20 | 
 21 | class Encoding(Module):
 22 |     r"""
 23 |     Encoding Layer: a learnable residual encoder.
 24 | 
 25 |     .. image:: _static/img/cvpr17.svg
 26 |         :width: 50%
 27 |         :align: center
 28 | 
 29 |     Encoding Layer accpets 3D or 4D inputs.
 30 |     It considers an input featuremaps with the shape of :math:`C\times H\times W`
 31 |     as a set of C-dimentional input features :math:`X=\{x_1, ...x_N\}`, where N is total number
 32 |     of features given by :math:`H\times W`, which learns an inherent codebook
 33 |     :math:`D=\{d_1,...d_K\}` and a set of smoothing factor of visual centers
 34 |     :math:`S=\{s_1,...s_K\}`. Encoding Layer outputs the residuals with soft-assignment weights
 35 |     :math:`e_k=\sum_{i=1}^Ne_{ik}`, where
 36 | 
 37 |     .. math::
 38 | 
 39 |         e_{ik} = \frac{exp(-s_k\|r_{ik}\|^2)}{\sum_{j=1}^K exp(-s_j\|r_{ij}\|^2)} r_{ik}
 40 | 
 41 |     and the residuals are given by :math:`r_{ik} = x_i - d_k`. The output encoders are
 42 |     :math:`E=\{e_1,...e_K\}`.
 43 | 
 44 |     Args:
 45 |         D: dimention of the features or feature channels
 46 |         K: number of codeswords
 47 | 
 48 |     Shape:
 49 |         - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` or
 50 |           :math:`\mathcal{R}^{B\times D\times H\times W}` (where :math:`B` is batch,
 51 |           :math:`N` is total number of features or :math:`H\times W`.)
 52 |         - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
 53 | 
 54 |     Attributes:
 55 |         codewords (Tensor): the learnable codewords of shape (:math:`K\times D`)
 56 |         scale (Tensor): the learnable scale factor of visual centers
 57 | 
 58 |     Reference:
 59 |         Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
 60 |         Amit Agrawal. “Context Encoding for Semantic Segmentation.
 61 |         *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
 62 | 
 63 |         Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network."
 64 |         *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*
 65 | 
 66 |     Examples:
 67 |         >>> import encoding
 68 |         >>> import torch
 69 |         >>> import torch.nn.functional as F
 70 |         >>> from torch.autograd import Variable
 71 |         >>> B,C,H,W,K = 2,3,4,5,6
 72 |         >>> X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), requires_grad=True)
 73 |         >>> layer = encoding.Encoding(C,K).double().cuda()
 74 |         >>> E = layer(X)
 75 |     """
 76 |     def __init__(self, D, K):
 77 |         super(Encoding, self).__init__()
 78 |         # init codewords and smoothing factor
 79 |         self.D, self.K = D, K
 80 |         self.codewords = Parameter(torch.Tensor(K, D), requires_grad=True)
 81 |         self.scale = Parameter(torch.Tensor(K), requires_grad=True)
 82 |         self.reset_params()
 83 | 
 84 |     def reset_params(self):
 85 |         std1 = 1./((self.K*self.D)**(1/2))
 86 |         self.codewords.data.uniform_(-std1, std1)
 87 |         self.scale.data.uniform_(-1, 0)
 88 | 
 89 |     def forward(self, X):
 90 |         # input X is a 4D tensor
 91 |         assert(X.size(1) == self.D)
 92 |         B, D = X.size(0), self.D
 93 |         if X.dim() == 3:
 94 |             # BxDxN => BxNxD
 95 |             X = X.transpose(1, 2).contiguous()
 96 |         elif X.dim() == 4:
 97 |             # BxDxHxW => Bx(HW)xD
 98 |             X = X.view(B, D, -1).transpose(1, 2).contiguous()
 99 |         else:
100 |             raise RuntimeError('Encoding Layer unknown input dims!')
101 |         # assignment weights BxNxK
102 |         A = F.softmax(scaled_l2(X, self.codewords, self.scale), dim=2)
103 |         # aggregate
104 |         E = aggregate(A, X, self.codewords)
105 |         return E
106 | 
107 |     def __repr__(self):
108 |         return self.__class__.__name__ + '(' \
109 |             + 'N x ' + str(self.D) + '=>' + str(self.K) + 'x' \
110 |             + str(self.D) + ')'
111 | 


--------------------------------------------------------------------------------
/encoding/nn/syncbn.py:
--------------------------------------------------------------------------------
  1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2 | ## Created by: Hang Zhang
  3 | ## ECE Department, Rutgers University
  4 | ## Email: zhang.hang@rutgers.edu
  5 | ## Copyright (c) 2017
  6 | ##
  7 | ## This source code is licensed under the MIT-style license found in the
  8 | ## LICENSE file in the root directory of this source tree
  9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 | 
 11 | """Synchronized Cross-GPU Batch Normalization Module"""
 12 | import collections
 13 | import torch
 14 | 
 15 | from torch.nn.modules.batchnorm import _BatchNorm
 16 | from torch.nn.functional import batch_norm
 17 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
 18 | 
 19 | from ..functions import normalization
 20 | from .comm import SyncMaster
 21 | 
 22 | 
 23 | __all__ = ['SyncBatchNorm', 'BatchNorm']
 24 | 
 25 | 
 26 | class SyncBatchNorm(_BatchNorm):
 27 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):
 28 |         super(SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine)
 29 | 
 30 |         self._sync_master = SyncMaster(self._data_parallel_master)
 31 |         self._parallel_id = None
 32 |         self._slave_pipe = None
 33 | 
 34 |     def forward(self, input):
 35 |         if not self.training:
 36 |             return batch_norm(
 37 |                 input, self.running_mean, self.running_var, self.weight, self.bias,
 38 |                 self.training, self.momentum, self.eps)
 39 | 
 40 |         # Resize the input to (B, C, -1).
 41 |         input_shape = input.size()
 42 |         input = input.view(input_shape[0], self.num_features, -1)
 43 | 
 44 |         # sum(x) and sum(x^2)
 45 |         N = input.size(0) * input.size(2)
 46 |         xsum = input.sum((0, 2))
 47 |         xsqsum = input.pow(2).sum((0, 2))
 48 | 
 49 |         # all-reduce for global sum(x) and sum(x^2)
 50 |         if self._parallel_id == 0:
 51 |             mean, inv_std = self._sync_master.run_master(_ChildMessage(xsum, xsqsum, N))
 52 |         else:
 53 |             mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(xsum, xsqsum, N))
 54 |         # forward
 55 |         return normalization(input, mean, inv_std, self.weight, self.bias).view(input_shape)
 56 | 
 57 |     def __data_parallel_replicate__(self, ctx, copy_id):
 58 |         self._parallel_id = copy_id
 59 | 
 60 |         # parallel_id == 0 means master device.
 61 |         if self._parallel_id == 0:
 62 |             ctx.sync_master = self._sync_master
 63 |         else:
 64 |             self._slave_pipe = ctx.sync_master.register_slave(copy_id)
 65 | 
 66 |     def _data_parallel_master(self, intermediates):
 67 |         """Reduce the sum and square-sum, compute the statistics, and broadcast it."""
 68 | 
 69 |         # Always using same "device order" makes the ReduceAdd operation faster.
 70 |         # Thanks to:: Tete Xiao (http://tetexiao.com/)
 71 |         intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device())
 72 | 
 73 |         to_reduce = [i[1][:2] for i in intermediates]
 74 |         to_reduce = [j for i in to_reduce for j in i]  # flatten
 75 |         target_gpus = [i[1].sum.get_device() for i in intermediates]
 76 | 
 77 |         sum_size = sum([i[1].sum_size for i in intermediates])
 78 |         sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce)
 79 |         mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size)
 80 | 
 81 |         broadcasted = Broadcast.apply(target_gpus, mean, inv_std)
 82 | 
 83 |         outputs = []
 84 |         for i, rec in enumerate(intermediates):
 85 |             outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2])))
 86 | 
 87 |         return outputs
 88 | 
 89 |     def _compute_mean_std(self, sum_, ssum, size):
 90 |         """Compute the mean and standard-deviation with sum and square-sum. This method
 91 |         also maintains the moving average on the master device."""
 92 |         assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.'
 93 |         mean = sum_ / size
 94 |         sumvar = ssum - sum_ * mean
 95 |         unbias_var = sumvar / (size - 1)
 96 |         bias_var = sumvar / size
 97 | 
 98 |         self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data
 99 |         self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data
100 | 
101 |         return mean, (bias_var + self.eps) ** -0.5
102 | 
103 |     @classmethod
104 |     def convert_sync_batchnorm(cls, module, skip_classes=()):
105 |         for skip_class in skip_classes:
106 |             if isinstance(module, skip_class):
107 |                 return module
108 | 
109 |         module_output = module
110 |         if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
111 |             module_output = SyncBatchNorm(module.num_features,
112 |                                           module.eps,
113 |                                           module.momentum,
114 |                                           module.affine)
115 |             if module.affine:
116 |                 with torch.no_grad():
117 |                     module_output.weight.copy_(module.weight)
118 |                     module_output.bias.copy_(module.bias)
119 |                 # keep requires_grad unchanged
120 |                 module_output.weight.requires_grad = module.weight.requires_grad
121 |                 module_output.bias.requires_grad = module.bias.requires_grad
122 |             module_output.running_mean = module.running_mean
123 |             module_output.running_var = module.running_var
124 |             module_output.num_batches_tracked = module.num_batches_tracked
125 |         for name, child in module.named_children():
126 |             module_output.add_module(name, cls.convert_sync_batchnorm(child, skip_classes))
127 |         del module
128 |         return module_output
129 | 
130 | 
131 | # API adapted from https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
132 | _ChildMessage = collections.namedtuple('Message', ['sum', 'ssum', 'sum_size'])
133 | _MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std'])
134 | 
135 | 
136 | class BatchNorm(_BatchNorm):
137 |     def _check_input_dim(self, input):
138 |         pass
139 | 


--------------------------------------------------------------------------------
/encoding/parallel.py:
--------------------------------------------------------------------------------
  1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2 | ## Created by: Hang Zhang
  3 | ## ECE Department, Rutgers University
  4 | ## Email: zhang.hang@rutgers.edu
  5 | ## Copyright (c) 2017
  6 | ##
  7 | ## This source code is licensed under the MIT-style license found in the
  8 | ## LICENSE file in the root directory of this source tree
  9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 | 
 11 | """Encoding Data Parallel"""
 12 | import threading
 13 | 
 14 | import torch
 15 | import torch.cuda.comm as comm
 16 | 
 17 | from torch.autograd import Variable, Function
 18 | from torch.nn.parallel.data_parallel import DataParallel
 19 | from torch.nn.parallel.parallel_apply import get_a_var
 20 | from torch.nn.parallel._functions import Broadcast
 21 | 
 22 | torch_ver = torch.__version__[:3]
 23 | 
 24 | __all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion']
 25 | 
 26 | def allreduce(*inputs):
 27 |     """Cross GPU all reduce autograd operation for calculate mean and
 28 |     variance in SyncBN.
 29 |     """
 30 |     return AllReduce.apply(*inputs)
 31 | 
 32 | class AllReduce(Function):
 33 |     @staticmethod
 34 |     def forward(ctx, num_inputs, *inputs):
 35 |         ctx.num_inputs = num_inputs
 36 |         ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
 37 |         inputs = [inputs[i:i + num_inputs]
 38 |                  for i in range(0, len(inputs), num_inputs)]
 39 |         # sort before reduce sum
 40 |         inputs = sorted(inputs, key=lambda i: i[0].get_device())
 41 |         results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
 42 |         outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
 43 |         return tuple([t for tensors in outputs for t in tensors])
 44 | 
 45 |     @staticmethod
 46 |     def backward(ctx, *inputs):
 47 |         inputs = [i.data for i in inputs]
 48 |         inputs = [inputs[i:i + ctx.num_inputs]
 49 |                  for i in range(0, len(inputs), ctx.num_inputs)]
 50 |         results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
 51 |         outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
 52 |         return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
 53 | 
 54 | 
 55 | class Reduce(Function):
 56 |     @staticmethod
 57 |     def forward(ctx, *inputs):
 58 |         ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
 59 |         inputs = sorted(inputs, key=lambda i: i.get_device())
 60 |         return comm.reduce_add(inputs)
 61 | 
 62 |     @staticmethod
 63 |     def backward(ctx, gradOutput):
 64 |         return Broadcast.apply(ctx.target_gpus, gradOutput)
 65 | 
 66 | 
 67 | class DataParallelModel(DataParallel):
 68 |     """Implements data parallelism at the module level.
 69 | 
 70 |     This container parallelizes the application of the given module by
 71 |     splitting the input across the specified devices by chunking in the
 72 |     batch dimension.
 73 |     In the forward pass, the module is replicated on each device,
 74 |     and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
 75 |     Note that the outputs are not gathered, please use compatible
 76 |     :class:`encoding.parallel.DataParallelCriterion`.
 77 | 
 78 |     The batch size should be larger than the number of GPUs used. It should
 79 |     also be an integer multiple of the number of GPUs so that each chunk is
 80 |     the same size (so that each GPU processes the same number of samples).
 81 | 
 82 |     Args:
 83 |         module: module to be parallelized
 84 |         device_ids: CUDA devices (default: all devices)
 85 | 
 86 |     Reference:
 87 |         Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
 88 |         Amit Agrawal. “Context Encoding for Semantic Segmentation.
 89 |         *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
 90 | 
 91 |     Example::
 92 | 
 93 |         >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
 94 |         >>> y = net(x)
 95 |     """
 96 |     def gather(self, outputs, output_device):
 97 |         return outputs
 98 | 
 99 |     def replicate(self, module, device_ids):
100 |         modules = super(DataParallelModel, self).replicate(module, device_ids)
101 |         execute_replication_callbacks(modules)
102 |         return modules
103 | 
104 | 
105 | class DataParallelCriterion(DataParallel):
106 |     """
107 |     Calculate loss in multiple-GPUs, which balance the memory usage for
108 |     Semantic Segmentation.
109 | 
110 |     The targets are splitted across the specified devices by chunking in
111 |     the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
112 | 
113 |     Reference:
114 |         Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
115 |         Amit Agrawal. “Context Encoding for Semantic Segmentation.
116 |         *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
117 | 
118 |     Example::
119 | 
120 |         >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
121 |         >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2])
122 |         >>> y = net(x)
123 |         >>> loss = criterion(y, target)
124 |     """
125 |     def forward(self, inputs, *targets, **kwargs):
126 |         # input should be already scatterd
127 |         # scattering the targets instead
128 |         if not self.device_ids:
129 |             return self.module(inputs, *targets, **kwargs)
130 |         targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
131 |         if len(self.device_ids) == 1:
132 |             return self.module(inputs, *targets[0], **kwargs[0])
133 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
134 |         outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
135 |         return Reduce.apply(*outputs) / len(outputs)
136 | 
137 | 
138 | def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
139 |     assert len(modules) == len(inputs)
140 |     assert len(targets) == len(inputs)
141 |     if kwargs_tup:
142 |         assert len(modules) == len(kwargs_tup)
143 |     else:
144 |         kwargs_tup = ({},) * len(modules)
145 |     if devices is not None:
146 |         assert len(modules) == len(devices)
147 |     else:
148 |         devices = [None] * len(modules)
149 | 
150 |     lock = threading.Lock()
151 |     results = {}
152 |     if torch_ver != "0.3":
153 |         grad_enabled = torch.is_grad_enabled()
154 | 
155 |     def _worker(i, module, input, target, kwargs, device=None):
156 |         if torch_ver != "0.3":
157 |             torch.set_grad_enabled(grad_enabled)
158 |         if device is None:
159 |             device = get_a_var(input).get_device()
160 |         try:
161 |             with torch.cuda.device(device):
162 |                 output = module(*(input + target), **kwargs)
163 |             with lock:
164 |                 results[i] = output
165 |         except Exception as e:
166 |             with lock:
167 |                 results[i] = e
168 | 
169 |     if len(modules) > 1:
170 |         threads = [threading.Thread(target=_worker,
171 |                                     args=(i, module, input, target,
172 |                                           kwargs, device),)
173 |                    for i, (module, input, target, kwargs, device) in
174 |                    enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
175 | 
176 |         for thread in threads:
177 |             thread.start()
178 |         for thread in threads:
179 |             thread.join()
180 |     else:
181 |         _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0])
182 | 
183 |     outputs = []
184 |     for i in range(len(inputs)):
185 |         output = results[i]
186 |         if isinstance(output, Exception):
187 |             raise output
188 |         outputs.append(output)
189 |     return outputs
190 | 
191 | 
192 | ###########################################################################
193 | # Adapted from Synchronized-BatchNorm-PyTorch.
194 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
195 | #
196 | class CallbackContext(object):
197 |     pass
198 | 
199 | 
200 | def execute_replication_callbacks(modules):
201 |     """
202 |     Execute an replication callback `__data_parallel_replicate__` on each module created
203 |     by original replication.
204 | 
205 |     The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
206 | 
207 |     Note that, as all modules are isomorphism, we assign each sub-module with a context
208 |     (shared among multiple copies of this module on different devices).
209 |     Through this context, different copies can share some information.
210 | 
211 |     We guarantee that the callback on the master copy (the first copy) will be called ahead
212 |     of calling the callback of any slave copies.
213 |     """
214 |     master_copy = modules[0]
215 |     nr_modules = len(list(master_copy.modules()))
216 |     ctxs = [CallbackContext() for _ in range(nr_modules)]
217 | 
218 |     for i, module in enumerate(modules):
219 |         for j, m in enumerate(module.modules()):
220 |             if hasattr(m, '__data_parallel_replicate__'):
221 |                 m.__data_parallel_replicate__(ctxs[j], i)
222 | 


--------------------------------------------------------------------------------
/encoding/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 | ## Created by: Hang Zhang
 3 | ## ECE Department, Rutgers University
 4 | ## Email: zhang.hang@rutgers.edu
 5 | ## Copyright (c) 2017
 6 | ##
 7 | ## This source code is licensed under the MIT-style license found in the
 8 | ## LICENSE file in the root directory of this source tree
 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 | 
11 | """Encoding Util Tools"""
12 | from .lr_scheduler import LR_Scheduler
13 | from .metrics import SegmentationMetric, batch_intersection_union, batch_pix_accuracy
14 | from .pallete import get_mask_pallete
15 | from .files import *
16 | 
17 | __all__ = ['LR_Scheduler', 'batch_pix_accuracy', 'batch_intersection_union',
18 |            'save_checkpoint', 'download', 'mkdir', 'check_sha1',
19 |            'get_mask_pallete']
20 | 


--------------------------------------------------------------------------------
/encoding/utils/files.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import errno
  3 | import shutil
  4 | import hashlib
  5 | import requests
  6 | 
  7 | import torch
  8 | 
  9 | from tqdm import tqdm
 10 | 
 11 | __all__ = ['save_checkpoint', 'download', 'mkdir', 'check_sha1']
 12 | 
 13 | def save_checkpoint(state, args, is_best, filename='checkpoint.pth.tar'):
 14 |     """Saves checkpoint to disk"""
 15 |     directory = "experiments/segmentation/runs/%s/%s/%s/"%(args.dataset, args.model, args.checkname)
 16 |     if not os.path.exists(directory):
 17 |         os.makedirs(directory)
 18 |     filename = directory + filename
 19 |     torch.save(state, filename)
 20 |     if is_best:
 21 |         shutil.copyfile(filename, directory + 'model_best.pth.tar')
 22 | 
 23 | 
 24 | def download(url, path=None, overwrite=False, sha1_hash=None):
 25 |     """Download an given URL
 26 |     Parameters
 27 |     ----------
 28 |     url : str
 29 |         URL to download
 30 |     path : str, optional
 31 |         Destination path to store downloaded file. By default stores to the
 32 |         current directory with same name as in url.
 33 |     overwrite : bool, optional
 34 |         Whether to overwrite destination file if already exists.
 35 |     sha1_hash : str, optional
 36 |         Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
 37 |         but doesn't match.
 38 |     Returns
 39 |     -------
 40 |     str
 41 |         The file path of the downloaded file.
 42 |     """
 43 |     if path is None:
 44 |         fname = url.split('/')[-1]
 45 |     else:
 46 |         path = os.path.expanduser(path)
 47 |         if os.path.isdir(path):
 48 |             fname = os.path.join(path, url.split('/')[-1])
 49 |         else:
 50 |             fname = path
 51 | 
 52 |     if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)):
 53 |         dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
 54 |         if not os.path.exists(dirname):
 55 |             os.makedirs(dirname)
 56 | 
 57 |         print('Downloading %s from %s...'%(fname, url))
 58 |         r = requests.get(url, stream=True)
 59 |         if r.status_code != 200:
 60 |             raise RuntimeError("Failed downloading url %s"%url)
 61 |         total_length = r.headers.get('content-length')
 62 |         with open(fname, 'wb') as f:
 63 |             if total_length is None: # no content length header
 64 |                 for chunk in r.iter_content(chunk_size=1024):
 65 |                     if chunk: # filter out keep-alive new chunks
 66 |                         f.write(chunk)
 67 |             else:
 68 |                 total_length = int(total_length)
 69 |                 for chunk in tqdm(r.iter_content(chunk_size=1024),
 70 |                                   total=int(total_length / 1024. + 0.5),
 71 |                                   unit='KB', unit_scale=False, dynamic_ncols=True):
 72 |                     f.write(chunk)
 73 | 
 74 |         if sha1_hash and not check_sha1(fname, sha1_hash):
 75 |             raise UserWarning('File {} is downloaded but the content hash does not match. ' \
 76 |                               'The repo may be outdated or download may be incomplete. ' \
 77 |                               'If the "repo_url" is overridden, consider switching to ' \
 78 |                               'the default repo.'.format(fname))
 79 | 
 80 |     return fname
 81 | 
 82 | 
 83 | def check_sha1(filename, sha1_hash):
 84 |     """Check whether the sha1 hash of the file content matches the expected hash.
 85 |     Parameters
 86 |     ----------
 87 |     filename : str
 88 |         Path to the file.
 89 |     sha1_hash : str
 90 |         Expected sha1 hash in hexadecimal digits.
 91 |     Returns
 92 |     -------
 93 |     bool
 94 |         Whether the file content matches the expected hash.
 95 |     """
 96 |     sha1 = hashlib.sha1()
 97 |     with open(filename, 'rb') as f:
 98 |         while True:
 99 |             data = f.read(1048576)
100 |             if not data:
101 |                 break
102 |             sha1.update(data)
103 | 
104 |     return sha1.hexdigest() == sha1_hash
105 | 
106 | 
107 | def mkdir(path):
108 |     """make dir exists okay"""
109 |     try:
110 |         os.makedirs(path)
111 |     except OSError as exc:  # Python >2.5
112 |         if exc.errno == errno.EEXIST and os.path.isdir(path):
113 |             pass
114 |         else:
115 |             raise
116 | 


--------------------------------------------------------------------------------
/encoding/utils/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 | ## Created by: Hang Zhang
 3 | ## ECE Department, Rutgers University
 4 | ## Email: zhang.hang@rutgers.edu
 5 | ## Copyright (c) 2017
 6 | ##
 7 | ## This source code is licensed under the MIT-style license found in the
 8 | ## LICENSE file in the root directory of this source tree
 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 | 
11 | import math
12 | 
13 | class LR_Scheduler(object):
14 |     """Learning Rate Scheduler
15 | 
16 |     Step mode: ``lr = baselr * 0.1 ^ {floor(epoch-1 / lr_step)}``
17 | 
18 |     Cosine mode: ``lr = baselr * 0.5 * (1 + cos(iter/maxiter))``
19 | 
20 |     Poly mode: ``lr = baselr * (1 - iter/maxiter) ^ 0.9``
21 | 
22 |     Args:
23 |         args:  :attr:`args.lr_scheduler` lr scheduler mode (`cos`, `poly`),
24 |           :attr:`args.lr` base learning rate, :attr:`args.epochs` number of epochs,
25 |           :attr:`args.lr_step`
26 | 
27 |         iters_per_epoch: number of iterations per epoch
28 |     """
29 |     def __init__(self, mode, base_lr, num_epochs, iters_per_epoch=0,
30 |                  lr_step=0, warmup_epochs=0):
31 |         self.mode = mode
32 |         print('Using {} LR Scheduler!'.format(self.mode))
33 |         self.lr = base_lr
34 |         if mode == 'step':
35 |             assert lr_step
36 |         self.lr_step = lr_step
37 |         self.iters_per_epoch = iters_per_epoch
38 |         self.N = num_epochs * iters_per_epoch
39 |         self.epoch = -1
40 |         self.warmup_iters = warmup_epochs * iters_per_epoch
41 | 
42 |     def __call__(self, optimizer, i, epoch, best_pred):
43 |         T = epoch * self.iters_per_epoch + i
44 |         if self.mode == 'cos':
45 |             lr = 0.5 * self.lr * (1 + math.cos(1.0 * T / self.N * math.pi))
46 |         elif self.mode == 'poly':
47 |             lr = self.lr * pow((1 - 1.0 * T / self.N), 0.9)
48 |         elif self.mode == 'step':
49 |             lr = self.lr * (0.1 ** (epoch // self.lr_step))
50 |         else:
51 |             raise NotImplemented
52 |         # warm up lr schedule
53 |         if self.warmup_iters > 0 and T < self.warmup_iters:
54 |             lr = lr * 1.0 * T / self.warmup_iters
55 |         if epoch > self.epoch:
56 |             print('\n=>Epoches %i, learning rate = %.4f, \
57 |                 previous best = %.4f' % (epoch, lr, best_pred))
58 |             self.epoch = epoch
59 |         assert lr >= 0
60 |         self._adjust_learning_rate(optimizer, lr)
61 | 
62 |     def _adjust_learning_rate(self, optimizer, lr):
63 |         if len(optimizer.param_groups) == 1:
64 |             optimizer.param_groups[0]['lr'] = lr
65 |         else:
66 |             # enlarge the lr at the head
67 |             optimizer.param_groups[0]['lr'] = lr
68 |             for i in range(1, len(optimizer.param_groups)):
69 |                 optimizer.param_groups[i]['lr'] = lr * 10
70 | 


--------------------------------------------------------------------------------
/encoding/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  2 | ## Created by: Hang Zhang
  3 | ## ECE Department, Rutgers University
  4 | ## Email: zhang.hang@rutgers.edu
  5 | ## Copyright (c) 2017
  6 | ##
  7 | ## This source code is licensed under the MIT-style license found in the
  8 | ## LICENSE file in the root directory of this source tree
  9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 | 
 11 | import threading
 12 | import numpy as np
 13 | 
 14 | import torch
 15 | 
 16 | class SegmentationMetric(object):
 17 |     """Computes pixAcc and mIoU metric scroes
 18 |     """
 19 |     def __init__(self, nclass):
 20 |         self.nclass = nclass
 21 |         self.lock = threading.Lock()
 22 |         self.reset()
 23 | 
 24 |     def update(self, labels, preds):
 25 |         def evaluate_worker(self, label, pred):
 26 |             correct, labeled = batch_pix_accuracy(
 27 |                 pred, label)
 28 |             inter, union = batch_intersection_union(
 29 |                 pred, label, self.nclass)
 30 |             with self.lock:
 31 |                 self.total_correct += correct
 32 |                 self.total_label += labeled
 33 |                 self.total_inter += inter
 34 |                 self.total_union += union
 35 |             return
 36 | 
 37 |         if isinstance(preds, torch.Tensor):
 38 |             evaluate_worker(self, labels, preds)
 39 |         elif isinstance(preds, (list, tuple)):
 40 |             threads = [threading.Thread(target=evaluate_worker,
 41 |                                         args=(self, label, pred),
 42 |                                        )
 43 |                        for (label, pred) in zip(labels, preds)]
 44 |             for thread in threads:
 45 |                 thread.start()
 46 |             for thread in threads:
 47 |                 thread.join()
 48 |         else:
 49 |             raise NotImplemented
 50 | 
 51 |     def get(self):
 52 |         pixAcc = 1.0 * self.total_correct / (np.spacing(1) + self.total_label)
 53 |         IoU = 1.0 * self.total_inter / (np.spacing(1) + self.total_union)
 54 |         mIoU = IoU.mean()
 55 |         return pixAcc, mIoU
 56 |  
 57 |     def reset(self):
 58 |         self.total_inter = 0
 59 |         self.total_union = 0
 60 |         self.total_correct = 0
 61 |         self.total_label = 0
 62 |         return
 63 | 
 64 | 
 65 | def batch_pix_accuracy(output, target):
 66 |     """Batch Pixel Accuracy
 67 |     Args:
 68 |         predict: input 4D tensor
 69 |         target: label 3D tensor
 70 |     """
 71 |     _, predict = torch.max(output, 1)
 72 | 
 73 |     predict = predict.cpu().numpy().astype('int64') + 1
 74 |     target = target.cpu().numpy().astype('int64') + 1
 75 | 
 76 |     pixel_labeled = np.sum(target > 0)
 77 |     pixel_correct = np.sum((predict == target)*(target > 0))
 78 |     assert pixel_correct <= pixel_labeled, \
 79 |         "Correct area should be smaller than Labeled"
 80 |     return pixel_correct, pixel_labeled
 81 | 
 82 | 
 83 | def batch_intersection_union(output, target, nclass):
 84 |     """Batch Intersection of Union
 85 |     Args:
 86 |         predict: input 4D tensor
 87 |         target: label 3D tensor
 88 |         nclass: number of categories (int)
 89 |     """
 90 |     _, predict = torch.max(output, 1)
 91 |     mini = 1
 92 |     maxi = nclass
 93 |     nbins = nclass
 94 |     predict = predict.cpu().numpy().astype('int64') + 1
 95 |     target = target.cpu().numpy().astype('int64') + 1
 96 | 
 97 |     predict = predict * (target > 0).astype(predict.dtype)
 98 |     intersection = predict * (predict == target)
 99 |     # areas of intersection and union
100 |     area_inter, _ = np.histogram(intersection, bins=nbins, range=(mini, maxi))
101 |     area_pred, _ = np.histogram(predict, bins=nbins, range=(mini, maxi))
102 |     area_lab, _ = np.histogram(target, bins=nbins, range=(mini, maxi))
103 |     area_union = area_pred + area_lab - area_inter
104 |     assert (area_inter <= area_union).all(), \
105 |         "Intersection area should be smaller than Union area"
106 |     return area_inter, area_union
107 | 


--------------------------------------------------------------------------------
/encoding/utils/pallete.py:
--------------------------------------------------------------------------------
 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 | ## Created by: Hang Zhang
 3 | ## ECE Department, Rutgers University
 4 | ## Email: zhang.hang@rutgers.edu
 5 | ## Copyright (c) 2017
 6 | ##
 7 | ## This source code is licensed under the MIT-style license found in the
 8 | ## LICENSE file in the root directory of this source tree
 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 | 
11 | from PIL import Image
12 | 
13 | def get_mask_pallete(npimg, dataset='detail'):
14 |     """Get image color pallete for visualizing masks"""
15 |     # recovery boundary
16 |     dataset = dataset.lower()
17 |     if dataset == 'pascal_voc':
18 |         npimg[npimg==21] = 255
19 |     # put colormap
20 |     out_img = Image.fromarray(npimg.squeeze().astype('uint8'))
21 |     if dataset == 'ade20k':
22 |         out_img.putpalette(adepallete)
23 |     elif dataset == 'cityscapes':
24 |         out_img.putpalette(citypallete)
25 |     elif dataset in ('pcontext', 'pascal_voc', 'pascal_aug'):
26 |         out_img.putpalette(vocpallete)
27 |     return out_img
28 | 
29 | def _get_voc_pallete(num_cls):
30 |     n = num_cls
31 |     pallete = [0]*(n*3)
32 |     for j in range(0,n):
33 |             lab = j
34 |             pallete[j*3+0] = 0
35 |             pallete[j*3+1] = 0
36 |             pallete[j*3+2] = 0
37 |             i = 0
38 |             while (lab > 0):
39 |                     pallete[j*3+0] |= (((lab >> 0) & 1) << (7-i))
40 |                     pallete[j*3+1] |= (((lab >> 1) & 1) << (7-i))
41 |                     pallete[j*3+2] |= (((lab >> 2) & 1) << (7-i))
42 |                     i = i + 1
43 |                     lab >>= 3
44 |     return pallete
45 | 
46 | vocpallete = _get_voc_pallete(256)
47 | 
48 | adepallete = [0,0,0,120,120,120,180,120,120,6,230,230,80,50,50,4,200,3,120,120,80,140,140,140,204,5,255,230,230,230,4,250,7,224,5,255,235,255,7,150,5,61,120,120,70,8,255,51,255,6,82,143,255,140,204,255,4,255,51,7,204,70,3,0,102,200,61,230,250,255,6,51,11,102,255,255,7,71,255,9,224,9,7,230,220,220,220,255,9,92,112,9,255,8,255,214,7,255,224,255,184,6,10,255,71,255,41,10,7,255,255,224,255,8,102,8,255,255,61,6,255,194,7,255,122,8,0,255,20,255,8,41,255,5,153,6,51,255,235,12,255,160,150,20,0,163,255,140,140,140,250,10,15,20,255,0,31,255,0,255,31,0,255,224,0,153,255,0,0,0,255,255,71,0,0,235,255,0,173,255,31,0,255,11,200,200,255,82,0,0,255,245,0,61,255,0,255,112,0,255,133,255,0,0,255,163,0,255,102,0,194,255,0,0,143,255,51,255,0,0,82,255,0,255,41,0,255,173,10,0,255,173,255,0,0,255,153,255,92,0,255,0,255,255,0,245,255,0,102,255,173,0,255,0,20,255,184,184,0,31,255,0,255,61,0,71,255,255,0,204,0,255,194,0,255,82,0,10,255,0,112,255,51,0,255,0,194,255,0,122,255,0,255,163,255,153,0,0,255,10,255,112,0,143,255,0,82,0,255,163,255,0,255,235,0,8,184,170,133,0,255,0,255,92,184,0,255,255,0,31,0,184,255,0,214,255,255,0,112,92,255,0,0,224,255,112,224,255,70,184,160,163,0,255,153,0,255,71,255,0,255,0,163,255,204,0,255,0,143,0,255,235,133,255,0,255,0,235,245,0,255,255,0,122,255,245,0,10,190,212,214,255,0,0,204,255,20,0,255,255,255,0,0,153,255,0,41,255,0,255,204,41,0,255,41,255,0,173,0,255,0,245,255,71,0,255,122,0,255,0,255,184,0,92,255,184,255,0,0,133,255,255,214,0,25,194,194,102,255,0,92,0,255]
49 | 
50 | citypallete = [
51 | 128,64,128,244,35,232,70,70,70,102,102,156,190,153,153,153,153,153,250,170,30,220,220,0,107,142,35,152,251,152,70,130,180,220,20,60,255,0,0,0,0,142,0,0,70,0,60,100,0,80,100,0,0,230,119,11,32,128,192,0,0,64,128,128,64,128,0,192,128,128,192,128,64,64,0,192,64,0,64,192,0,192,192,0,64,64,128,192,64,128,64,192,128,192,192,128,0,0,64,128,0,64,0,128,64,128,128,64,0,0,192,128,0,192,0,128,192,128,128,192,64,0,64,192,0,64,64,128,64,192,128,64,64,0,192,192,0,192,64,128,192,192,128,192,0,64,64,128,64,64,0,192,64,128,192,64,0,64,192,128,64,192,0,192,192,128,192,192,64,64,64,192,64,64,64,192,64,192,192,64,64,64,192,192,64,192,64,192,192,192,192,192,32,0,0,160,0,0,32,128,0,160,128,0,32,0,128,160,0,128,32,128,128,160,128,128,96,0,0,224,0,0,96,128,0,224,128,0,96,0,128,224,0,128,96,128,128,224,128,128,32,64,0,160,64,0,32,192,0,160,192,0,32,64,128,160,64,128,32,192,128,160,192,128,96,64,0,224,64,0,96,192,0,224,192,0,96,64,128,224,64,128,96,192,128,224,192,128,32,0,64,160,0,64,32,128,64,160,128,64,32,0,192,160,0,192,32,128,192,160,128,192,96,0,64,224,0,64,96,128,64,224,128,64,96,0,192,224,0,192,96,128,192,224,128,192,32,64,64,160,64,64,32,192,64,160,192,64,32,64,192,160,64,192,32,192,192,160,192,192,96,64,64,224,64,64,96,192,64,224,192,64,96,64,192,224,64,192,96,192,192,224,192,192,0,32,0,128,32,0,0,160,0,128,160,0,0,32,128,128,32,128,0,160,128,128,160,128,64,32,0,192,32,0,64,160,0,192,160,0,64,32,128,192,32,128,64,160,128,192,160,128,0,96,0,128,96,0,0,224,0,128,224,0,0,96,128,128,96,128,0,224,128,128,224,128,64,96,0,192,96,0,64,224,0,192,224,0,64,96,128,192,96,128,64,224,128,192,224,128,0,32,64,128,32,64,0,160,64,128,160,64,0,32,192,128,32,192,0,160,192,128,160,192,64,32,64,192,32,64,64,160,64,192,160,64,64,32,192,192,32,192,64,160,192,192,160,192,0,96,64,128,96,64,0,224,64,128,224,64,0,96,192,128,96,192,0,224,192,128,224,192,64,96,64,192,96,64,64,224,64,192,224,64,64,96,192,192,96,192,64,224,192,192,224,192,32,32,0,160,32,0,32,160,0,160,160,0,32,32,128,160,32,128,32,160,128,160,160,128,96,32,0,224,32,0,96,160,0,224,160,0,96,32,128,224,32,128,96,160,128,224,160,128,32,96,0,160,96,0,32,224,0,160,224,0,32,96,128,160,96,128,32,224,128,160,224,128,96,96,0,224,96,0,96,224,0,224,224,0,96,96,128,224,96,128,96,224,128,224,224,128,32,32,64,160,32,64,32,160,64,160,160,64,32,32,192,160,32,192,32,160,192,160,160,192,96,32,64,224,32,64,96,160,64,224,160,64,96,32,192,224,32,192,96,160,192,224,160,192,32,96,64,160,96,64,32,224,64,160,224,64,32,96,192,160,96,192,32,224,192,160,224,192,96,96,64,224,96,64,96,224,64,224,224,64,96,96,192,224,96,192,96,224,192,0,0,0]
52 | 


--------------------------------------------------------------------------------
/experiments/segmentation/option.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | # Created by: Hang Zhang 
  3 | # Email: zhang.hang@rutgers.edu 
  4 | # Copyright (c) 2017
  5 | ###########################################################################
  6 | import argparse
  7 | 
  8 | import torch
  9 | 
 10 | class Options():
 11 |     def __init__(self):
 12 |         parser = argparse.ArgumentParser(description='PyTorch \
 13 |             Segmentation')
 14 |         # model and dataset 
 15 |         parser.add_argument('--model', type=str, default='encnet',
 16 |                             help='model name (default: encnet)')
 17 |         parser.add_argument('--backbone', type=str, default='resnet50',
 18 |                             help='backbone name (default: resnet50)')
 19 |         parser.add_argument('--jpu', type=str, default=None,
 20 |                             help='JPU name')
 21 |         parser.add_argument('--dilated', action='store_true', default=
 22 |                             False, help='dilation')
 23 |         parser.add_argument('--lateral', action='store_true', default=
 24 |                             False, help='employ FPN')
 25 |         parser.add_argument('--dataset', type=str, default='ade20k',
 26 |                             help='dataset name (default: pascal12)')
 27 |         parser.add_argument('--workers', type=int, default=16,
 28 |                             metavar='N', help='dataloader threads')
 29 |         parser.add_argument('--base-size', type=int, default=520,
 30 |                             help='base image size')
 31 |         parser.add_argument('--crop-size', type=int, default=480,
 32 |                             help='crop image size')
 33 |         parser.add_argument('--train-split', type=str, default='train',
 34 |                             help='dataset train split (default: train)')
 35 |         # training hyper params
 36 |         parser.add_argument('--aux', action='store_true', default= False,
 37 |                             help='Auxilary Loss')
 38 |         parser.add_argument('--aux-weight', type=float, default=0.2,
 39 |                             help='Auxilary loss weight (default: 0.2)')
 40 |         parser.add_argument('--se-loss', action='store_true', default= False,
 41 |                             help='Semantic Encoding Loss SE-loss')
 42 |         parser.add_argument('--se-weight', type=float, default=0.2,
 43 |                             help='SE-loss weight (default: 0.2)')
 44 |         parser.add_argument('--epochs', type=int, default=None, metavar='N',
 45 |                             help='number of epochs to train (default: auto)')
 46 |         parser.add_argument('--start_epoch', type=int, default=0,
 47 |                             metavar='N', help='start epochs (default:0)')
 48 |         parser.add_argument('--batch-size', type=int, default=None,
 49 |                             metavar='N', help='input batch size for \
 50 |                             training (default: auto)')
 51 |         parser.add_argument('--test-batch-size', type=int, default=None,
 52 |                             metavar='N', help='input batch size for \
 53 |                             testing (default: same as batch size)')
 54 |         # optimizer params
 55 |         parser.add_argument('--lr', type=float, default=None, metavar='LR',
 56 |                             help='learning rate (default: auto)')
 57 |         parser.add_argument('--lr-scheduler', type=str, default='poly',
 58 |                             help='learning rate scheduler (default: poly)')
 59 |         parser.add_argument('--momentum', type=float, default=0.9,
 60 |                             metavar='M', help='momentum (default: 0.9)')
 61 |         parser.add_argument('--weight-decay', type=float, default=1e-4,
 62 |                             metavar='M', help='w-decay (default: 1e-4)')
 63 |         # cuda, seed and logging
 64 |         parser.add_argument('--no-cuda', action='store_true', default=
 65 |                             False, help='disables CUDA training')
 66 |         parser.add_argument('--seed', type=int, default=1, metavar='S',
 67 |                             help='random seed (default: 1)')
 68 |         # checking point
 69 |         parser.add_argument('--resume', type=str, default=None,
 70 |                             help='put the path to resuming file if needed')
 71 |         parser.add_argument('--checkname', type=str, default='default',
 72 |                             help='set the checkpoint name')
 73 |         parser.add_argument('--model-zoo', type=str, default=None,
 74 |                             help='evaluating on model zoo model')
 75 |         # finetuning pre-trained models
 76 |         parser.add_argument('--ft', action='store_true', default= False,
 77 |                             help='finetuning on a different dataset')
 78 |         # evaluation option
 79 |         parser.add_argument('--split', default='val')
 80 |         parser.add_argument('--mode', default='testval')
 81 |         parser.add_argument('--ms', action='store_true', default=False,
 82 |                             help='multi scale & flip')
 83 |         parser.add_argument('--no-val', action='store_true', default= False,
 84 |                             help='skip validation during training')
 85 |         parser.add_argument('--save-folder', type=str, default='experiments/segmentation/results',
 86 |                             help = 'path to save images')
 87 | 
 88 |         # the parser
 89 |         self.parser = parser
 90 | 
 91 |     def parse(self):
 92 |         args = self.parser.parse_args()
 93 |         args.cuda = not args.no_cuda and torch.cuda.is_available()
 94 |         # default settings for epochs, batch_size and lr
 95 |         if args.epochs is None:
 96 |             epoches = {
 97 |                 'coco': 30,
 98 |                 'citys': 240,
 99 |                 'pascal_voc': 50,
100 |                 'pascal_aug': 50,
101 |                 'pcontext': 80,
102 |                 'ade20k': 120,
103 |             }
104 |             args.epochs = epoches[args.dataset.lower()]
105 |         if args.batch_size is None:
106 |             args.batch_size = 16
107 |         if args.test_batch_size is None:
108 |             args.test_batch_size = args.batch_size
109 |         if args.lr is None:
110 |             lrs = {
111 |                 'coco': 0.01,
112 |                 'citys': 0.01,
113 |                 'pascal_voc': 0.0001,
114 |                 'pascal_aug': 0.001,
115 |                 'pcontext': 0.001,
116 |                 'ade20k': 0.01,
117 |             }
118 |             args.lr = lrs[args.dataset.lower()] / 16 * args.batch_size
119 |         print(args)
120 |         return args
121 | 


--------------------------------------------------------------------------------
/experiments/segmentation/scripts/deeplab_res50_pcontext.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #train
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset pcontext \
 5 |     --model deeplab --jpu [JPU|JPU_X] --aux --aux-weight 0.4 \
 6 |     --backbone resnet50 --checkname deeplab_res50_pcontext
 7 | 
 8 | #test [single-scale]
 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
10 |     --model deeplab --jpu [JPU|JPU_X] --aux \
11 |     --backbone resnet50 --resume {MODEL} --split val --mode testval
12 | 
13 | #test [multi-scale]
14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
15 |     --model deeplab --jpu [JPU|JPU_X] --aux \
16 |     --backbone resnet50 --resume {MODEL} --split val --mode testval --ms
17 | 
18 | #predict [single-scale]
19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
20 |     --model deeplab --jpu [JPU|JPU_X] --aux \
21 |     --backbone resnet50 --resume {MODEL} --split val --mode test
22 | 
23 | #predict [multi-scale]
24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
25 |     --model deeplab --jpu [JPU|JPU_X] --aux \
26 |     --backbone resnet50 --resume {MODEL} --split val --mode test --ms
27 | 
28 | #fps
29 | CUDA_VISIBLE_DEVICES=0 python -m experiments.segmentation.test_fps_params --dataset pcontext \
30 |     --model deeplab --jpu [JPU|JPU_X] --aux \
31 |     --backbone resnet50
32 | 


--------------------------------------------------------------------------------
/experiments/segmentation/scripts/encnet_res101_ade20k_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #train
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \
 5 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
 6 |     --backbone resnet101 --checkname encnet_res101_ade20k_train
 7 | 
 8 | #test [single-scale]
 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
10 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
11 |     --backbone resnet101 --resume {MODEL} --split val --mode testval
12 | 
13 | #test [multi-scale]
14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
15 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
16 |     --backbone resnet101 --resume {MODEL} --split val --mode testval --ms
17 | 
18 | #predict [single-scale]
19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
20 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
21 |     --backbone resnet101 --resume {MODEL} --split val --mode test
22 | 
23 | #predict [multi-scale]
24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
25 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
26 |     --backbone resnet101 --resume {MODEL} --split val --mode test --ms
27 | 


--------------------------------------------------------------------------------
/experiments/segmentation/scripts/encnet_res101_ade20k_trainval.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #train
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \
 5 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
 6 |     --backbone resnet101 --checkname encnet_res101_ade20k_train
 7 | 
 8 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \
 9 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
10 |     --backbone resnet101 --checkname encnet_res101_ade20k_trainval \
11 |     --train-split trainval --lr 0.001 --epochs 20 --ft --resume {MODEL_PATH}
12 | 
13 | #predict [single-scale]
14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
15 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
16 |     --backbone resnet101 --resume {MODEL} --split test --mode test
17 | 
18 | #predict [multi-scale]
19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
20 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
21 |     --backbone resnet101 --resume {MODEL} --split test --mode test --ms
22 | 


--------------------------------------------------------------------------------
/experiments/segmentation/scripts/encnet_res101_pcontext.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #train
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset pcontext \
 5 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
 6 |     --backbone resnet101 --checkname encnet_res101_pcontext
 7 | 
 8 | #test [single-scale]
 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
10 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
11 |     --backbone resnet101 --resume {MODEL} --split val --mode testval
12 | 
13 | #test [multi-scale]
14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
15 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
16 |     --backbone resnet101 --resume {MODEL} --split val --mode testval --ms
17 | 
18 | #predict [single-scale]
19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
20 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
21 |     --backbone resnet101 --resume {MODEL} --split val --mode test
22 | 
23 | #predict [multi-scale]
24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
25 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
26 |     --backbone resnet101 --resume {MODEL} --split val --mode test --ms
27 | 
28 | #fps
29 | CUDA_VISIBLE_DEVICES=0 python -m experiments.segmentation.test_fps_params --dataset pcontext \
30 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
31 |     --backbone resnet101
32 | 


--------------------------------------------------------------------------------
/experiments/segmentation/scripts/encnet_res50_ade20k_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #train
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \
 5 |             --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
 6 |             --backbone resnet50 --checkname encnet_res50_ade20k_train
 7 | 
 8 | #test [single-scale]
 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
10 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
11 |     --backbone resnet50 --resume {MODEL} --split val --mode testval
12 | 
13 | #test [multi-scale]
14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
15 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
16 |     --backbone resnet50 --resume {MODEL} --split val --mode testval --ms
17 | 
18 | #predict [single-scale]
19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
20 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
21 |     --backbone resnet50 --resume {MODEL} --split val --mode test
22 | 
23 | #predict [multi-scale]
24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
25 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
26 |     --backbone resnet50 --resume {MODEL} --split val --mode test --ms
27 | 


--------------------------------------------------------------------------------
/experiments/segmentation/scripts/encnet_res50_ade20k_trainval.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #train
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \
 5 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
 6 |     --backbone resnet50 --checkname encnet_res50_ade20k_train
 7 | 
 8 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset ade20k \
 9 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
10 |     --backbone resnet50 --checkname encnet_res50_ade20k_trainval \
11 |     --train-split trainval --lr 0.001 --epochs 20 --ft --resume {MODEL_PATH}
12 | 
13 | #predict [single-scale]
14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
15 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
16 |     --backbone resnet50 --resume {MODEL} --split test --mode test
17 | 
18 | #predict [multi-scale]
19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset ade20k \
20 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
21 |     --backbone resnet50 --resume {MODEL} --split test --mode test --ms
22 | 


--------------------------------------------------------------------------------
/experiments/segmentation/scripts/encnet_res50_pcontext.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #train
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset pcontext \
 5 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
 6 |     --backbone resnet50 --checkname encnet_res50_pcontext
 7 | 
 8 | #test [single-scale]
 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
10 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
11 |     --backbone resnet50 --resume {MODEL} --split val --mode testval
12 | 
13 | #test [multi-scale]
14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
15 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
16 |     --backbone resnet50 --resume {MODEL} --split val --mode testval --ms
17 | 
18 | #predict [single-scale]
19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
20 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
21 |     --backbone resnet50 --resume {MODEL} --split val --mode test
22 | 
23 | #predict [multi-scale]
24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
25 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
26 |     --backbone resnet50 --resume {MODEL} --split val --mode test --ms
27 | 
28 | #fps
29 | CUDA_VISIBLE_DEVICES=0 python -m experiments.segmentation.test_fps_params --dataset pcontext \
30 |     --model encnet --jpu [JPU|JPU_X] --aux --se-loss \
31 |     --backbone resnet50
32 | 


--------------------------------------------------------------------------------
/experiments/segmentation/scripts/psp_res50_pcontext.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #train
 4 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.train --dataset pcontext \
 5 |     --model psp --jpu [JPU|JPU_X] --aux --aux-weight 0.4 \
 6 |     --backbone resnet50 --checkname psp_res50_pcontext
 7 | 
 8 | #test [single-scale]
 9 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
10 |     --model psp --jpu [JPU|JPU_X] --aux \
11 |     --backbone resnet50 --resume {MODEL} --split val --mode testval
12 | 
13 | #test [multi-scale]
14 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
15 |     --model psp --jpu [JPU|JPU_X] --aux \
16 |     --backbone resnet50 --resume {MODEL} --split val --mode testval --ms
17 | 
18 | #predict [single-scale]
19 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
20 |     --model psp --jpu [JPU|JPU_X] --aux \
21 |     --backbone resnet50 --resume {MODEL} --split val --mode test
22 | 
23 | #predict [multi-scale]
24 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -m experiments.segmentation.test --dataset pcontext \
25 |     --model psp --jpu [JPU|JPU_X] --aux \
26 |     --backbone resnet50 --resume {MODEL} --split val --mode test --ms
27 | 
28 | #fps
29 | CUDA_VISIBLE_DEVICES=0 python -m experiments.segmentation.test_fps_params --dataset pcontext \
30 |     --model psp --jpu [JPU|JPU_X] --aux \
31 |     --backbone resnet50
32 | 


--------------------------------------------------------------------------------
/experiments/segmentation/test.py:
--------------------------------------------------------------------------------
 1 | ###########################################################################
 2 | # Created by: Hang Zhang 
 3 | # Email: zhang.hang@rutgers.edu 
 4 | # Copyright (c) 2017
 5 | ###########################################################################
 6 | 
 7 | import os
 8 | 
 9 | import torch
10 | import torchvision.transforms as transform
11 | 
12 | import encoding.utils as utils
13 | 
14 | from tqdm import tqdm
15 | 
16 | from torch.utils import data
17 | 
18 | from encoding.nn import BatchNorm
19 | from encoding.datasets import get_segmentation_dataset, test_batchify_fn
20 | from encoding.models import get_model, get_segmentation_model, MultiEvalModule
21 | 
22 | from .option import Options
23 | 
24 | 
25 | def test(args):
26 |     # output folder
27 |     outdir = args.save_folder
28 |     if not os.path.exists(outdir):
29 |         os.makedirs(outdir)
30 |     # data transforms
31 |     input_transform = transform.Compose([
32 |         transform.ToTensor(),
33 |         transform.Normalize([.485, .456, .406], [.229, .224, .225])])
34 |     # dataset
35 |     testset = get_segmentation_dataset(args.dataset, split=args.split, mode=args.mode,
36 |                                        transform=input_transform)
37 |     # dataloader
38 |     loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \
39 |         if args.cuda else {}
40 |     test_data = data.DataLoader(testset, batch_size=args.test_batch_size,
41 |                                 drop_last=False, shuffle=False,
42 |                                 collate_fn=test_batchify_fn, **loader_kwargs)
43 |     # model
44 |     if args.model_zoo is not None:
45 |         model = get_model(args.model_zoo, pretrained=True)
46 |     else:
47 |         model = get_segmentation_model(args.model, dataset = args.dataset,
48 |                                        backbone = args.backbone, dilated = args.dilated,
49 |                                        lateral = args.lateral, jpu = args.jpu, aux = args.aux,
50 |                                        se_loss = args.se_loss, norm_layer = BatchNorm,
51 |                                        base_size = args.base_size, crop_size = args.crop_size)
52 |         # resuming checkpoint
53 |         if args.resume is None or not os.path.isfile(args.resume):
54 |             raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume))
55 |         checkpoint = torch.load(args.resume)
56 |         # strict=False, so that it is compatible with old pytorch saved models
57 |         model.load_state_dict(checkpoint['state_dict'])
58 |         print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
59 | 
60 |     print(model)
61 |     scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \
62 |         [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
63 |     if not args.ms:
64 |         scales = [1.0]
65 |     evaluator = MultiEvalModule(model, testset.num_class, scales=scales, flip=args.ms).cuda()
66 |     evaluator.eval()
67 |     metric = utils.SegmentationMetric(testset.num_class)
68 | 
69 |     tbar = tqdm(test_data)
70 |     for i, (image, dst) in enumerate(tbar):
71 |         if 'val' in args.mode:
72 |             with torch.no_grad():
73 |                 predicts = evaluator.parallel_forward(image)
74 |                 metric.update(dst, predicts)
75 |                 pixAcc, mIoU = metric.get()
76 |                 tbar.set_description( 'pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
77 |         else:
78 |             with torch.no_grad():
79 |                 outputs = evaluator.parallel_forward(image)
80 |                 predicts = [testset.make_pred(torch.max(output, 1)[1].cpu().numpy())
81 |                             for output in outputs]
82 |             for predict, impath in zip(predicts, dst):
83 |                 mask = utils.get_mask_pallete(predict, args.dataset)
84 |                 outname = os.path.splitext(impath)[0] + '.png'
85 |                 mask.save(os.path.join(outdir, outname))
86 | 
87 | if __name__ == "__main__":
88 |     args = Options().parse()
89 |     torch.manual_seed(args.seed)
90 |     args.test_batch_size = torch.cuda.device_count()
91 |     test(args)
92 | 


--------------------------------------------------------------------------------
/experiments/segmentation/test_fps_params.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import torch
 4 | import encoding
 5 | 
 6 | from encoding.nn import BatchNorm
 7 | from .option import Options
 8 | 
 9 | 
10 | if __name__ == "__main__":
11 |     args = Options().parse()
12 |     model = encoding.models.get_segmentation_model(args.model, dataset = args.dataset,
13 |                                        backbone = args.backbone, dilated = args.dilated,
14 |                                        lateral = args.lateral, jpu = args.jpu, aux = args.aux,
15 |                                        se_loss = args.se_loss, norm_layer = BatchNorm)
16 | 
17 |     num_parameters = sum([l.nelement() for l in model.pretrained.parameters()])
18 |     print(num_parameters)
19 |     num_parameters = sum([l.nelement() for l in model.head.parameters()])
20 |     print(num_parameters)
21 | 
22 |     model.cuda()
23 |     model.eval()
24 |     x = torch.Tensor(1, 3, 512, 512).cuda()
25 | 
26 |     N = 10
27 |     with torch.no_grad():
28 |         for _ in range(N):
29 |             out = model(x)
30 | 
31 |         result = []
32 |         for _ in range(10):
33 |             st = time.time()
34 |             for _ in range(N):
35 |                 out = model(x)
36 |             result.append(N/(time.time()-st))
37 | 
38 |         import numpy as np
39 |         print(np.mean(result), np.std(result))
40 | 


--------------------------------------------------------------------------------
/experiments/segmentation/test_single_image.py:
--------------------------------------------------------------------------------
 1 | ###########################################################################
 2 | # Created by: Hang Zhang 
 3 | # Email: zhang.hang@rutgers.edu 
 4 | # Copyright (c) 2017
 5 | ###########################################################################
 6 | 
 7 | import os
 8 | 
 9 | import torch
10 | import torchvision.transforms as transform
11 | 
12 | import encoding.utils as utils
13 | 
14 | from PIL import Image
15 | 
16 | from encoding.nn import BatchNorm
17 | from encoding.datasets import datasets
18 | from encoding.models import get_model, get_segmentation_model, MultiEvalModule
19 | 
20 | from .option import Options
21 | 
22 | 
23 | def test(args):
24 |     # data transforms
25 |     input_transform = transform.Compose([
26 |         transform.ToTensor(),
27 |         transform.Normalize([.485, .456, .406], [.229, .224, .225])])
28 |     # model
29 |     if args.model_zoo is not None:
30 |         model = get_model(args.model_zoo, pretrained=True)
31 |     else:
32 |         model = get_segmentation_model(args.model, dataset = args.dataset,
33 |                                        backbone = args.backbone, dilated = args.dilated,
34 |                                        lateral = args.lateral, jpu = args.jpu, aux = args.aux,
35 |                                        se_loss = args.se_loss, norm_layer = BatchNorm,
36 |                                        base_size = args.base_size, crop_size = args.crop_size)
37 |         # resuming checkpoint
38 |         if args.resume is None or not os.path.isfile(args.resume):
39 |             raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume))
40 |         checkpoint = torch.load(args.resume)
41 |         # strict=False, so that it is compatible with old pytorch saved models
42 |         model.load_state_dict(checkpoint['state_dict'], strict=False)
43 |         print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
44 | 
45 |     print(model)
46 |     scales = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \
47 |         [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
48 |     if not args.ms:
49 |         scales = [1.0]
50 |     num_classes = datasets[args.dataset.lower()].NUM_CLASS
51 |     evaluator = MultiEvalModule(model, num_classes, scales=scales, flip=args.ms).cuda()
52 |     evaluator.eval()
53 | 
54 |     img = input_transform(Image.open(args.input_path).convert('RGB')).unsqueeze(0)
55 | 
56 |     with torch.no_grad():
57 |         output = evaluator.parallel_forward(img)[0]
58 |         predict = torch.max(output, 1)[1].cpu().numpy()
59 |     mask = utils.get_mask_pallete(predict, args.dataset)
60 |     mask.save(args.save_path)
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     option = Options()
65 |     option.parser.add_argument('--input-path', type=str, required=True, help='path to read input image')
66 |     option.parser.add_argument('--save-path', type=str, required=True, help='path to save output image')
67 |     args = option.parse()
68 | 
69 |     torch.manual_seed(args.seed)
70 | 
71 |     test(args)
72 | 


--------------------------------------------------------------------------------
/experiments/segmentation/train.py:
--------------------------------------------------------------------------------
  1 | ###########################################################################
  2 | # Created by: Hang Zhang 
  3 | # Email: zhang.hang@rutgers.edu 
  4 | # Copyright (c) 2017
  5 | ###########################################################################
  6 | 
  7 | import os
  8 | import numpy as np
  9 | from tqdm import tqdm
 10 | 
 11 | import torch
 12 | from torch.utils import data
 13 | import torchvision.transforms as transform
 14 | from torch.nn.parallel.scatter_gather import gather
 15 | 
 16 | import encoding.utils as utils
 17 | from encoding.nn import SegmentationLosses, SyncBatchNorm
 18 | from encoding.parallel import DataParallelModel, DataParallelCriterion
 19 | from encoding.datasets import get_segmentation_dataset
 20 | from encoding.models import get_segmentation_model
 21 | 
 22 | from .option import Options
 23 | 
 24 | torch_ver = torch.__version__[:3]
 25 | if torch_ver == '0.3':
 26 |     from torch.autograd import Variable
 27 | 
 28 | class Trainer():
 29 |     def __init__(self, args):
 30 |         self.args = args
 31 |         # data transforms
 32 |         input_transform = transform.Compose([
 33 |             transform.ToTensor(),
 34 |             transform.Normalize([.485, .456, .406], [.229, .224, .225])])
 35 |         # dataset
 36 |         data_kwargs = {'transform': input_transform, 'base_size': args.base_size,
 37 |                        'crop_size': args.crop_size}
 38 |         trainset = get_segmentation_dataset(args.dataset, split=args.train_split, mode='train',
 39 |                                            **data_kwargs)
 40 |         testset = get_segmentation_dataset(args.dataset, split='val', mode ='val',
 41 |                                            **data_kwargs)
 42 |         # dataloader
 43 |         kwargs = {'num_workers': args.workers, 'pin_memory': True} \
 44 |             if args.cuda else {}
 45 |         self.trainloader = data.DataLoader(trainset, batch_size=args.batch_size,
 46 |                                            drop_last=True, shuffle=True, **kwargs)
 47 |         self.valloader = data.DataLoader(testset, batch_size=args.batch_size,
 48 |                                          drop_last=False, shuffle=False, **kwargs)
 49 |         self.nclass = trainset.num_class
 50 |         # model
 51 |         model = get_segmentation_model(args.model, dataset = args.dataset,
 52 |                                        backbone = args.backbone, dilated = args.dilated,
 53 |                                        lateral = args.lateral, jpu = args.jpu, aux = args.aux,
 54 |                                        se_loss = args.se_loss, norm_layer = SyncBatchNorm,
 55 |                                        base_size = args.base_size, crop_size = args.crop_size)
 56 |         print(model)
 57 |         # optimizer using different LR
 58 |         params_list = [{'params': model.pretrained.parameters(), 'lr': args.lr},]
 59 |         if hasattr(model, 'jpu'):
 60 |             params_list.append({'params': model.jpu.parameters(), 'lr': args.lr*10})
 61 |         if hasattr(model, 'head'):
 62 |             params_list.append({'params': model.head.parameters(), 'lr': args.lr*10})
 63 |         if hasattr(model, 'auxlayer'):
 64 |             params_list.append({'params': model.auxlayer.parameters(), 'lr': args.lr*10})
 65 |         optimizer = torch.optim.SGD(params_list, lr=args.lr,
 66 |             momentum=args.momentum, weight_decay=args.weight_decay)
 67 |         # criterions
 68 |         self.criterion = SegmentationLosses(se_loss=args.se_loss, aux=args.aux,
 69 |                                             nclass=self.nclass, 
 70 |                                             se_weight=args.se_weight,
 71 |                                             aux_weight=args.aux_weight)
 72 |         self.model, self.optimizer = model, optimizer
 73 |         # using cuda
 74 |         if args.cuda:
 75 |             self.model = DataParallelModel(self.model).cuda()
 76 |             self.criterion = DataParallelCriterion(self.criterion).cuda()
 77 |         # resuming checkpoint
 78 |         self.best_pred = 0.0
 79 |         if args.resume is not None:
 80 |             if not os.path.isfile(args.resume):
 81 |                 raise RuntimeError("=> no checkpoint found at '{}'" .format(args.resume))
 82 |             checkpoint = torch.load(args.resume)
 83 |             args.start_epoch = checkpoint['epoch']
 84 |             if args.cuda:
 85 |                 self.model.module.load_state_dict(checkpoint['state_dict'])
 86 |             else:
 87 |                 self.model.load_state_dict(checkpoint['state_dict'])
 88 |             if not args.ft:
 89 |                 self.optimizer.load_state_dict(checkpoint['optimizer'])
 90 |             self.best_pred = checkpoint['best_pred']
 91 |             print("=> loaded checkpoint '{}' (epoch {})"
 92 |                   .format(args.resume, checkpoint['epoch']))
 93 |         # clear start epoch if fine-tuning
 94 |         if args.ft:
 95 |             args.start_epoch = 0
 96 |         # lr scheduler
 97 |         self.scheduler = utils.LR_Scheduler(args.lr_scheduler, args.lr,
 98 |                                             args.epochs, len(self.trainloader))
 99 | 
100 |     def training(self, epoch):
101 |         train_loss = 0.0
102 |         self.model.train()
103 |         tbar = tqdm(self.trainloader)
104 |         for i, (image, target) in enumerate(tbar):
105 |             self.scheduler(self.optimizer, i, epoch, self.best_pred)
106 |             self.optimizer.zero_grad()
107 |             if torch_ver == "0.3":
108 |                 image = Variable(image)
109 |                 target = Variable(target)
110 |             outputs = self.model(image)
111 |             loss = self.criterion(outputs, target)
112 |             loss.backward()
113 |             self.optimizer.step()
114 |             train_loss += loss.item()
115 |             tbar.set_description('Train loss: %.3f' % (train_loss / (i + 1)))
116 | 
117 |         if self.args.no_val:
118 |             # save checkpoint every epoch
119 |             is_best = False
120 |             utils.save_checkpoint({
121 |                 'epoch': epoch + 1,
122 |                 'state_dict': self.model.module.state_dict(),
123 |                 'optimizer': self.optimizer.state_dict(),
124 |                 'best_pred': self.best_pred,
125 |             }, self.args, is_best, filename='checkpoint_{}.pth.tar'.format(epoch))
126 | 
127 | 
128 |     def validation(self, epoch):
129 |         # Fast test during the training
130 |         def eval_batch(model, image, target):
131 |             outputs = model(image)
132 |             outputs = gather(outputs, 0, dim=0)
133 |             pred = outputs[0]
134 |             target = target.cuda()
135 |             correct, labeled = utils.batch_pix_accuracy(pred.data, target)
136 |             inter, union = utils.batch_intersection_union(pred.data, target, self.nclass)
137 |             return correct, labeled, inter, union
138 | 
139 |         is_best = False
140 |         self.model.eval()
141 |         total_inter, total_union, total_correct, total_label = 0, 0, 0, 0
142 |         tbar = tqdm(self.valloader, desc='\r')
143 |         for i, (image, target) in enumerate(tbar):
144 |             if torch_ver == "0.3":
145 |                 image = Variable(image, volatile=True)
146 |                 correct, labeled, inter, union = eval_batch(self.model, image, target)
147 |             else:
148 |                 with torch.no_grad():
149 |                     correct, labeled, inter, union = eval_batch(self.model, image, target)
150 | 
151 |             total_correct += correct
152 |             total_label += labeled
153 |             total_inter += inter
154 |             total_union += union
155 |             pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label)
156 |             IoU = 1.0 * total_inter / (np.spacing(1) + total_union)
157 |             mIoU = IoU.mean()
158 |             tbar.set_description(
159 |                 'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))
160 | 
161 |         new_pred = (pixAcc + mIoU)/2
162 |         if new_pred > self.best_pred:
163 |             is_best = True
164 |             self.best_pred = new_pred
165 |         utils.save_checkpoint({
166 |             'epoch': epoch + 1,
167 |             'state_dict': self.model.module.state_dict(),
168 |             'optimizer': self.optimizer.state_dict(),
169 |             'best_pred': new_pred,
170 |         }, self.args, is_best)
171 | 
172 | 
173 | if __name__ == "__main__":
174 |     args = Options().parse()
175 |     torch.manual_seed(args.seed)
176 |     trainer = Trainer(args)
177 |     print('Starting Epoch:', trainer.args.start_epoch)
178 |     print('Total Epoches:', trainer.args.epochs)
179 |     for epoch in range(trainer.args.start_epoch, trainer.args.epochs):
180 |         trainer.training(epoch)
181 |         if not trainer.args.no_val:
182 |             trainer.validation(epoch)
183 | 


--------------------------------------------------------------------------------
/images/Framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/Framework.png


--------------------------------------------------------------------------------
/images/JPU.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/JPU.png


--------------------------------------------------------------------------------
/images/encnet_2009_001858.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/encnet_2009_001858.png


--------------------------------------------------------------------------------
/images/encnet_ADE_val_00001086.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/encnet_ADE_val_00001086.png


--------------------------------------------------------------------------------
/images/gt_2009_001858.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/gt_2009_001858.png


--------------------------------------------------------------------------------
/images/gt_ADE_val_00001086.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/gt_ADE_val_00001086.png


--------------------------------------------------------------------------------
/images/img_2009_001858.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/img_2009_001858.jpg


--------------------------------------------------------------------------------
/images/img_ADE_val_00001086.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/img_ADE_val_00001086.jpg


--------------------------------------------------------------------------------
/images/ours_2009_001858.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/ours_2009_001858.png


--------------------------------------------------------------------------------
/images/ours_ADE_val_00001086.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wuhuikai/FastFCN/126661b0eb6053ce35b3c778a7d402f0338d98b7/images/ours_ADE_val_00001086.png


--------------------------------------------------------------------------------
/scripts/prepare_ade20k.py:
--------------------------------------------------------------------------------
 1 | """Prepare ADE20K dataset"""
 2 | import os
 3 | import zipfile
 4 | import argparse
 5 | 
 6 | from encoding.utils import download, mkdir
 7 | 
 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(
12 |         description='Initialize ADE20K dataset.',
13 |         epilog='Example: python prepare_ade20k.py',
14 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
15 |     parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
16 |     args = parser.parse_args()
17 |     return args
18 | 
19 | def download_ade(path, overwrite=False):
20 |     _AUG_DOWNLOAD_URLS = [
21 |         ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip', '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
22 |         ('http://data.csail.mit.edu/places/ADEchallenge/release_test.zip', 'e05747892219d10e9243933371a497e905a4860c'),]
23 |     download_dir = os.path.join(path, 'downloads')
24 |     mkdir(download_dir)
25 |     for url, checksum in _AUG_DOWNLOAD_URLS:
26 |         filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
27 |         # extract
28 |         with zipfile.ZipFile(filename,"r") as zip_ref:
29 |             zip_ref.extractall(path=path)
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     args = parse_args()
34 |     mkdir(os.path.expanduser('~/.encoding/data'))
35 |     if args.download_dir is not None:
36 |         if os.path.isdir(_TARGET_DIR):
37 |             os.remove(_TARGET_DIR)
38 |         # make symlink
39 |         os.symlink(args.download_dir, _TARGET_DIR)
40 |     else:
41 |         download_ade(_TARGET_DIR, overwrite=False)
42 | 


--------------------------------------------------------------------------------
/scripts/prepare_cityscapes.py:
--------------------------------------------------------------------------------
 1 | """Prepare ADE20K dataset"""
 2 | import os
 3 | import zipfile
 4 | import argparse
 5 | 
 6 | from encoding.utils import mkdir, check_sha1
 7 | 
 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(
12 |         description='Initialize ADE20K dataset.',
13 |         epilog='Example: python prepare_cityscapes.py',
14 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
15 |     parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
16 |     args = parser.parse_args()
17 |     return args
18 | 
19 | def download_city(path):
20 |     _CITY_DOWNLOAD_URLS = [
21 |         ('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'),
22 |         ('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')]
23 |     download_dir = os.path.join(path, 'downloads')
24 |     mkdir(download_dir)
25 |     for filename, checksum in _CITY_DOWNLOAD_URLS:
26 |         if not check_sha1(filename, checksum):
27 |             raise UserWarning('File {} is downloaded but the content hash does not match. ' \
28 |                               'The repo may be outdated or download may be incomplete. ' \
29 |                               'If the "repo_url" is overridden, consider switching to ' \
30 |                               'the default repo.'.format(filename))
31 |         # extract
32 |         with zipfile.ZipFile(filename,"r") as zip_ref:
33 |             zip_ref.extractall(path=path)
34 |         print("Extracted", filename)
35 | 
36 | if __name__ == '__main__':
37 |     args = parse_args()
38 |     mkdir(os.path.expanduser('~/.encoding/data'))
39 |     mkdir(os.path.expanduser('~/.encoding/data/cityscapes'))
40 |     if args.download_dir is not None:
41 |         if os.path.isdir(_TARGET_DIR):
42 |             os.remove(_TARGET_DIR)
43 |         # make symlink
44 |         os.symlink(args.download_dir, _TARGET_DIR)
45 |     else:
46 |         download_city(_TARGET_DIR)
47 | 


--------------------------------------------------------------------------------
/scripts/prepare_coco.py:
--------------------------------------------------------------------------------
 1 | """Prepare MS COCO datasets"""
 2 | import os
 3 | import shutil
 4 | import zipfile
 5 | import argparse
 6 | 
 7 | from encoding.utils import download, mkdir
 8 | 
 9 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(
13 |         description='Initialize MS COCO dataset.',
14 |         epilog='Example: python mscoco.py --download-dir ~/mscoco',
15 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
16 |     parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
17 |     args = parser.parse_args()
18 |     return args
19 | 
20 | def download_coco(path, overwrite=False):
21 |     _DOWNLOAD_URLS = [
22 |         ('http://images.cocodataset.org/zips/train2017.zip',
23 |          '10ad623668ab00c62c096f0ed636d6aff41faca5'),
24 |         ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
25 |          '8551ee4bb5860311e79dace7e79cb91e432e78b3'),
26 |         ('http://images.cocodataset.org/zips/val2017.zip',
27 |          '4950dc9d00dbe1c933ee0170f5797584351d2a41')
28 |     ]
29 |     mkdir(path)
30 |     for url, checksum in _DOWNLOAD_URLS:
31 |         filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
32 |         # extract
33 |         with zipfile.ZipFile(filename) as zf:
34 |             zf.extractall(path=path)
35 | 
36 | def install_coco_api():
37 |     repo_url = "https://github.com/cocodataset/cocoapi"
38 |     os.system("git clone " + repo_url)
39 |     os.system("cd cocoapi/PythonAPI/ && python setup.py install")
40 |     shutil.rmtree('cocoapi')
41 |     try:
42 |         import pycocotools
43 |     except Exception:
44 |         print("Installing COCO API failed, please install it manually %s"%(repo_url))
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     args = parse_args()
49 |     mkdir(os.path.expanduser('~/.encoding/data'))
50 |     if args.download_dir is not None:
51 |         if os.path.isdir(_TARGET_DIR):
52 |             os.remove(_TARGET_DIR)
53 |         # make symlink
54 |         os.symlink(args.download_dir, _TARGET_DIR)
55 |     else:
56 |         download_coco(_TARGET_DIR, overwrite=False)
57 |     install_coco_api()
58 | 


--------------------------------------------------------------------------------
/scripts/prepare_pascal.py:
--------------------------------------------------------------------------------
 1 | """Prepare PASCAL VOC datasets"""
 2 | import os
 3 | import shutil
 4 | import tarfile
 5 | import argparse
 6 | 
 7 | from encoding.utils import download, mkdir
 8 | 
 9 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(
13 |         description='Initialize PASCAL VOC dataset.',
14 |         epilog='Example: python prepare_pascal.py',
15 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
16 |     parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
17 |     parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
18 |     parser.add_argument('--overwrite', action='store_true', help='overwrite downloaded files if set, in case they are corrputed')
19 |     args = parser.parse_args()
20 |     return args
21 | 
22 | def download_voc(path, overwrite=False):
23 |     _DOWNLOAD_URLS = [
24 |         ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
25 |          '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
26 |     download_dir = os.path.join(path, 'downloads')
27 |     mkdir(download_dir)
28 |     for url, checksum in _DOWNLOAD_URLS:
29 |         filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
30 |         # extract
31 |         with tarfile.open(filename) as tar:
32 |             tar.extractall(path=path)
33 | 
34 | def download_aug(path, overwrite=False):
35 |     _AUG_DOWNLOAD_URLS = [
36 |         ('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz', '7129e0a480c2d6afb02b517bb18ac54283bfaa35')]
37 |     download_dir = os.path.join(path, 'downloads')
38 |     mkdir(download_dir)
39 |     for url, checksum in _AUG_DOWNLOAD_URLS:
40 |         filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
41 |         # extract
42 |         with tarfile.open(filename) as tar:
43 |             tar.extractall(path=path)
44 |             shutil.move(os.path.join(path, 'benchmark_RELEASE'),
45 |                         os.path.join(path, 'VOCaug'))
46 |             filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']
47 |             # generate trainval.txt
48 |             with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile:
49 |                 for fname in filenames:
50 |                     fname = os.path.join(path, fname)
51 |                     with open(fname) as infile:
52 |                         for line in infile:
53 |                             outfile.write(line)
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     args = parse_args()
58 |     mkdir(os.path.expanduser('~/.encoding/datasets'))
59 |     if args.download_dir is not None:
60 |         if os.path.isdir(_TARGET_DIR):
61 |             os.remove(_TARGET_DIR)
62 |         os.symlink(args.download_dir, _TARGET_DIR)
63 |     else:
64 |         download_voc(_TARGET_DIR, overwrite=False)
65 |         download_aug(_TARGET_DIR, overwrite=False)
66 | 


--------------------------------------------------------------------------------
/scripts/prepare_pcontext.py:
--------------------------------------------------------------------------------
 1 | """Prepare PASCAL Context dataset"""
 2 | import os
 3 | import shutil
 4 | import tarfile
 5 | import argparse
 6 | 
 7 | from encoding.utils import download, mkdir
 8 | 
 9 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
10 | PASD_URL="https://codalabuser.blob.core.windows.net/public/%s"
11 | 
12 | def parse_args():
13 |     parser = argparse.ArgumentParser(
14 |         description='Initialize PASCAL Context dataset.',
15 |         epilog='Example: python prepare_pcontext.py',
16 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
17 |     parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
18 |     args = parser.parse_args()
19 |     return args
20 | 
21 | def download_ade(path, overwrite=False):
22 |     _AUG_DOWNLOAD_URLS = [
23 |         ('http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar',
24 |          'bf9985e9f2b064752bf6bd654d89f017c76c395a'),
25 |         ('https://codalabuser.blob.core.windows.net/public/trainval_merged.json',
26 |          '169325d9f7e9047537fedca7b04de4dddf10b881'),
27 |         # You can skip these if the network is slow, the dataset will automatically generate them.
28 |         ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/train.pth',
29 |          '4bfb49e8c1cefe352df876c9b5434e655c9c1d07'),
30 |         ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/val.pth',
31 |          'ebedc94247ec616c57b9a2df15091784826a7b0c'),
32 |         ]
33 |     download_dir = os.path.join(path, 'downloads')
34 |     mkdir(download_dir)
35 |     for url, checksum in _AUG_DOWNLOAD_URLS:
36 |         filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
37 |         # extract
38 |         if os.path.splitext(filename)[1] == '.tar':
39 |             with tarfile.open(filename) as tar:
40 |                 tar.extractall(path=path)
41 |         else:
42 |             shutil.move(filename, os.path.join(path, 'VOCdevkit/VOC2010/'+os.path.basename(filename)))
43 | 
44 | def install_pcontext_api():
45 |     repo_url = "https://github.com/zhanghang1989/detail-api"
46 |     os.system("git clone " + repo_url)
47 |     os.system("cd detail-api/PythonAPI/ && python setup.py install")
48 |     shutil.rmtree('detail-api')
49 |     try:
50 |         import detail
51 |     except Exception:
52 |         print("Installing PASCAL Context API failed, please install it manually %s"%(repo_url))
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     args = parse_args()
57 |     mkdir(os.path.expanduser('~/.encoding/data'))
58 |     if args.download_dir is not None:
59 |         if os.path.isdir(_TARGET_DIR):
60 |             os.remove(_TARGET_DIR)
61 |         # make symlink
62 |         os.symlink(args.download_dir, _TARGET_DIR)
63 |     else:
64 |         download_ade(_TARGET_DIR, overwrite=False)
65 |     install_pcontext_api()
66 | 


--------------------------------------------------------------------------------