├── .editorconfig
├── .github
└── workflows
│ ├── pypi.yml
│ ├── pypi_release.yml
│ ├── sphix_build_master.yml
│ ├── sphix_build_pr.yml
│ └── unit_test.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── dist
├── torch_encoding-1.2.1b20200708-py3.6.egg
├── torch_encoding-1.2.2b20200707-py3.7.egg
├── torch_encoding-1.2.2b20200708-py3.6.egg
├── torch_encoding-1.2.2b20200708-py3.7.egg
├── torch_encoding-1.2.2b20200709-py3.6.egg
├── torch_encoding-1.2.2b20200725-py3.6.egg
├── torch_encoding-1.2.2b20200725-py3.7.egg
├── torch_encoding-1.2.2b20200801-py3.6.egg
├── torch_encoding-1.2.2b20200802-py3.6.egg
└── torch_encoding-1.2.2b20200814-py3.6.egg
├── docs
├── Makefile
├── make.bat
├── requirements.txt
└── source
│ ├── _static
│ ├── css
│ │ └── encoding.css
│ ├── img
│ │ ├── EncNet32k128d.svg
│ │ ├── cvpr17.svg
│ │ ├── deep_ten_curve.svg
│ │ ├── favicon.png
│ │ ├── figure1.jpg
│ │ ├── icon.png
│ │ ├── myimage.gif
│ │ └── upconv.png
│ ├── js
│ │ └── hidebib.js
│ └── theme_overrides.css
│ ├── _templates
│ └── layout.html
│ ├── conf.py
│ ├── custom_directives.py
│ ├── index.rst
│ ├── model_zoo
│ ├── imagenet.rst
│ └── segmentation.rst
│ ├── nn.rst
│ ├── notes
│ └── compile.rst
│ ├── parallel.rst
│ ├── tutorials
│ ├── style.rst
│ ├── syncbn.rst
│ └── texture.rst
│ └── utils.rst
├── encoding
├── __init__.py
├── datasets
│ ├── __init__.py
│ ├── ade20k.py
│ ├── base.py
│ ├── cityscapes.py
│ ├── cityscapes_v0.py
│ ├── cityscapescoarse.py
│ ├── coco.py
│ ├── folder.py
│ ├── hpw18.py
│ ├── imagenet.py
│ ├── minc.py
│ ├── pascal_aug.py
│ ├── pascal_voc.py
│ └── pcontext.py
├── functions
│ ├── __init__.py
│ ├── customize.py
│ ├── dist_syncbn.py
│ ├── encoding.py
│ ├── rectify.py
│ └── syncbn.py
├── lib
│ ├── __init__.py
│ ├── cpu
│ │ ├── encoding_cpu.cpp
│ │ ├── nms_cpu.cpp
│ │ ├── operator.cpp
│ │ ├── operator.h
│ │ ├── rectify_cpu.cpp
│ │ ├── roi_align_cpu.cpp
│ │ ├── setup.py
│ │ └── syncbn_cpu.cpp
│ └── gpu
│ │ ├── activation_kernel.cu
│ │ ├── common.h
│ │ ├── device_tensor.h
│ │ ├── encoding_kernel.cu
│ │ ├── lib_ssd.cu
│ │ ├── nms_kernel.cu
│ │ ├── operator.cpp
│ │ ├── operator.h
│ │ ├── rectify_cuda.cu
│ │ ├── roi_align_kernel.cu
│ │ ├── setup.py
│ │ └── syncbn_kernel.cu
├── models
│ ├── __init__.py
│ ├── backbone
│ │ ├── __init__.py
│ │ ├── resnest.py
│ │ ├── resnet.py
│ │ ├── resnet_variants.py
│ │ ├── resnext.py
│ │ ├── wideresnet.py
│ │ └── xception.py
│ ├── deepten.py
│ ├── model_store.py
│ ├── model_zoo.py
│ └── sseg
│ │ ├── __init__.py
│ │ ├── atten.py
│ │ ├── base.py
│ │ ├── danet.py
│ │ ├── deeplab.py
│ │ ├── dran.py
│ │ ├── encnet.py
│ │ ├── fcfpn.py
│ │ ├── fcn.py
│ │ ├── psp.py
│ │ └── upernet.py
├── nn
│ ├── __init__.py
│ ├── attention.py
│ ├── customize.py
│ ├── da_att.py
│ ├── dran_att.py
│ ├── dropblock.py
│ ├── encoding.py
│ ├── loss.py
│ ├── rectify.py
│ ├── splat.py
│ └── syncbn.py
├── parallel.py
├── transforms
│ ├── __init__.py
│ ├── autoaug.py
│ ├── get_transform.py
│ └── transforms.py
└── utils
│ ├── __init__.py
│ ├── dist_helper.py
│ ├── files.py
│ ├── lr_scheduler.py
│ ├── metrics.py
│ ├── misc.py
│ ├── pallete.py
│ ├── precise_bn.py
│ ├── presets.py
│ └── train_helper.py
├── experiments
├── recognition
│ ├── README.md
│ ├── resnet50_baseline.sh
│ ├── test_flops.py
│ ├── train_dist.py
│ └── verify.py
└── segmentation
│ ├── demo.py
│ ├── model_mapping.py
│ ├── test.py
│ ├── test.sh
│ ├── test_danet.sh
│ ├── test_models.py
│ ├── train.py
│ └── train_dist.py
├── img
├── overview.jpg
├── overview.png
├── tab3.jpg
└── tab3.png
├── scripts
├── prepare_ade20k.py
├── prepare_citys.py
├── prepare_coco.py
├── prepare_imagenet.py
├── prepare_minc.py
├── prepare_pascal.py
└── prepare_pcontext.py
├── setup.cfg
├── setup.py
├── tests
├── lint.py
├── pylintrc
└── unit_test
│ ├── test_dataset.py
│ ├── test_function.py
│ ├── test_model.py
│ ├── test_module.py
│ └── test_utils.py
└── torch_encoding.egg-info
├── PKG-INFO
├── SOURCES.txt
├── dependency_links.txt
├── requires.txt
└── top_level.txt
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | indent_style = space
5 | indent_size = 4
6 |
--------------------------------------------------------------------------------
/.github/workflows/pypi.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | schedule:
8 | - cron: "0 12 * * *"
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-18.04
14 |
15 | steps:
16 | - uses: actions/checkout@master
17 | - name: Set up Python
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: '3.7'
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install setuptools wheel twine pypandoc
25 | - name: Build and publish
26 | env:
27 | TWINE_USERNAME: ${{ secrets. PYPI_USERNAME }}
28 | TWINE_PASSWORD: ${{ secrets. PYPI_PASSWORD }}
29 | run: |
30 | python setup.py sdist bdist_wheel
31 | twine upload dist/* --verbose
32 |
--------------------------------------------------------------------------------
/.github/workflows/pypi_release.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Pypi Release
5 |
6 | on:
7 | release:
8 | types: [created]
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-18.04
14 |
15 | steps:
16 | - uses: actions/checkout@master
17 | - name: Set up Python
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: '3.7'
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install setuptools wheel twine pypandoc
25 | - name: Build and publish
26 | env:
27 | TWINE_USERNAME: ${{ secrets.pypi_username }}
28 | TWINE_PASSWORD: ${{ secrets.pypi_password }}
29 | RELEASE: 1
30 | run: |
31 | python setup.py sdist bdist_wheel
32 | twine upload dist/* --verbose
33 |
--------------------------------------------------------------------------------
/.github/workflows/sphix_build_master.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Build Docs
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 |
10 | jobs:
11 | docs:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v1
15 | - uses: seanmiddleditch/gha-setup-ninja@master
16 |
17 | - name: Set up Python
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: 3.7
21 |
22 | - name: Install dependencies
23 | run: |
24 | python -m pip install --upgrade pip
25 | pip install numpy -I
26 | pip install pytest torch
27 |
28 | - name: Install package
29 | run: |
30 | pip install -e .
31 |
32 | - name: Install Sphix Dependencies
33 | run: |
34 | cd docs/
35 | pip install -r requirements.txt
36 |
37 | - name: Build Sphinx docs
38 | run: |
39 | cd docs/
40 | make html
41 | touch build/html/.nojekyll
42 |
43 | # https://github.com/marketplace/actions/github-pages
44 | - name: Deploy
45 | if: success()
46 | uses: crazy-max/ghaction-github-pages@v1
47 | env:
48 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
49 | with:
50 | build_dir: docs/build/html/
51 | target_branch: gh-pages
52 |
--------------------------------------------------------------------------------
/.github/workflows/sphix_build_pr.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Build Docs
5 |
6 | on:
7 | pull_request:
8 | branches: [ master ]
9 |
10 | jobs:
11 | docs:
12 | runs-on: self-hosted
13 | steps:
14 | - uses: actions/checkout@v2
15 | - uses: seanmiddleditch/gha-setup-ninja@master
16 |
17 | - name: Set PR Number
18 | uses: actions/github-script@0.3.0
19 | with:
20 | github-token: ${{github.token}}
21 | script: |
22 | const core = require('@actions/core')
23 | const prNumber = context.payload.number;
24 | core.exportVariable('PULL_NUMBER', prNumber);
25 | core.exportVariable("PATH", "/home/ubuntu/anaconda3/bin:/usr/local/bin:/usr/bin/:/bin:$PATH")
26 |
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install numpy -I
31 | pip install pytest torch
32 |
33 | - name: Install package
34 | run: |
35 | pip install -e .
36 |
37 | - name: Install Sphix Dependencies
38 | run: |
39 | cd docs/
40 | pip install -r requirements.txt
41 |
42 | - name: Build Sphinx docs
43 | run: |
44 | cd docs/
45 | make html
46 | touch build/html/.nojekyll
47 | aws s3 sync build/html/ s3://hangzh/encoding/docs/${{ env.PULL_NUMBER }}/ --acl public-read --follow-symlinks --delete
48 |
49 | - name: Comment
50 | if: success()
51 | uses: thollander/actions-comment-pull-request@master
52 | with:
53 | message: "The docs are uploaded and can be previewed at http://hangzh.s3.amazonaws.com/encoding/docs/${{ env.PULL_NUMBER }}/index.html"
54 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
55 |
--------------------------------------------------------------------------------
/.github/workflows/unit_test.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Unit Test
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: self-hosted
16 |
17 | steps:
18 | - uses: actions/checkout@v2
19 | - uses: seanmiddleditch/gha-setup-ninja@master
20 |
21 | - name: Set up Python
22 | uses: actions/github-script@0.3.0
23 | with:
24 | github-token: ${{github.token}}
25 | script: |
26 | const core = require('@actions/core')
27 | core.exportVariable("PATH", "/home/ubuntu/anaconda3/bin:/usr/local/bin:/usr/bin/:/bin:$PATH")
28 |
29 | - name: Install package
30 | run: |
31 | python -m pip install --upgrade pip
32 | pip install -e .
33 |
34 | - name: Run pytest
35 | run: |
36 | pip install nose
37 | nosetests -v tests/unit_test/
38 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.DS_Store
2 | *.swp
3 | *.pyc
4 | version.py
5 | build/
6 | data/
7 | docs/src/
8 | docs/html/
9 | encoding/_ext/
10 | encoding.egg-info/
11 | *.o
12 | *.so
13 | *.ninja*
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017- Hang Zhang. All rights reserved.
4 | Copyright (c) 2018- Amazon.com, Inc. or its affiliates. All rights reserved.
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | ROOTDIR = $(CURDIR)
2 |
3 | lint: cpplint pylint
4 |
5 | cpplint:
6 | tests/lint.py encoding cpp src kernel
7 |
8 | pylint:
9 | pylint --rcfile=$(ROOTDIR)/tests/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" encoding --ignore=_ext
10 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # [Dual Attention Network for Scene Segmentation(CVPR2019)](https://arxiv.org/pdf/1809.02983.pdf)
2 |
3 | [Jun Fu](https://scholar.google.com/citations?user=h3vzrgkAAAAJ&hl=zh-CN), [Jing Liu](http://www.nlpr.ia.ac.cn/iva/liujing/index.html), [Haijie Tian](https://github.com/tianhaijie), [Yong Li](http://www.foreverlee.net/), Yongjun Bao, Zhiwei Fang,and Hanqing Lu
4 |
5 | ## Introduction
6 |
7 | We propose a Dual Attention Network (DANet) to adaptively integrate local features with their global dependencies based on the self-attention mechanism. And we achieve new state-of-the-art segmentation performance on three challenging scene segmentation datasets, i.e., Cityscapes, PASCAL Context and COCO Stuff-10k dataset.
8 |
9 | 
10 |
11 | ## Cityscapes testing set result
12 |
13 | We train our DANet-101 with only fine annotated data and submit our test results to the official evaluation server.
14 |
15 | 
16 |
17 | ## Updates
18 |
19 | **2020/9**:**Renew the code**, which supports **Pytorch 1.4.0** or later!
20 |
21 | 2020/8:The new TNNLS version DRANet achieves [**82.9%**](https://www.cityscapes-dataset.com/method-details/?submissionID=4792) on Cityscapes test set (submit the result on August, 2019), which is a new state-of-the-arts performance with only using fine annotated dataset and Resnet-101. The code will be released in [DRANet]().
22 |
23 | 2020/7:DANet is supported on [MMSegmentation](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet), in which DANet achieves **80.47%** with single scale testing and **82.02%** with multi-scale testing on Cityscapes val set.
24 |
25 | 2018/9:DANet released. The trained model with ResNet101 achieves 81.5% on Cityscapes test set.
26 |
27 | ## Usage
28 |
29 | 1. Install pytorch
30 |
31 | - The code is tested on python3.6 and torch 1.4.0.
32 | - The code is modified from [PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding).
33 |
34 | 2. Clone the resposity
35 |
36 | ```shell
37 | git clone https://github.com/junfu1115/DANet.git
38 | cd DANet
39 | python setup.py install
40 | ```
41 |
42 | 3. Dataset
43 | - Download the [Cityscapes](https://www.cityscapes-dataset.com/) dataset.
44 | - Please put dataset in folder `./datasets`
45 |
46 | 4. Evaluation for DANet
47 |
48 | - Download trained model [DANet101](https://drive.google.com/open?id=1XmpFEF-tbPH0Rmv4eKRxYJngr3pTbj6p) and put it in folder `./experiments/segmentation/models/`
49 |
50 | - `cd ./experiments/segmentation/`
51 |
52 | - For single scale testing, please run:
53 |
54 | - ```shell
55 | CUDA_VISIBLE_DEVICES=0,1,2,3 python test.py --dataset citys --model danet --backbone resnet101 --resume models/DANet101.pth.tar --eval --base-size 2048 --crop-size 768 --workers 1 --multi-grid --multi-dilation 4 8 16 --os 8 --aux --no-deepstem
56 | ```
57 |
58 | - Evaluation Result
59 |
60 | The expected scores will show as follows: DANet101 on cityscapes val set (mIoU/pAcc): **79.93/95.97**(ss)
61 |
62 | 5. Evaluation for DRANet
63 |
64 | - Download trained model [DRANet101](https://drive.google.com/file/d/1xCl2N0b0rVFH4y30HCGfy7RY3-ars7Ce/view?usp=sharing) and put it in folder `./experiments/segmentation/models/`
65 |
66 | - Evaluation code is in folder `./experiments/segmentation/`
67 |
68 | - `cd ./experiments/segmentation/`
69 |
70 | - For single scale testing, please run:
71 |
72 | - ```shell
73 | CUDA_VISIBLE_DEVICES=0,1,2,3 python test.py --dataset citys --model dran --backbone resnet101 --resume models/dran101.pth.tar --eval --base-size 2048 --crop-size 768 --workers 1 --multi-grid --multi-dilation 4 8 16 --os 8 --aux
74 | ```
75 |
76 | - Evaluation Result
77 |
78 | The expected scores will show as follows: DRANet101 on cityscapes val set (mIoU/pAcc): **81.63/96.62** (ss)
79 |
80 | ## Citation
81 |
82 | if you find DANet and DRANet useful in your research, please consider citing:
83 |
84 | ```
85 | @article{fu2020scene,
86 | title={Scene Segmentation With Dual Relation-Aware Attention Network},
87 | author={Fu, Jun and Liu, Jing and Jiang, Jie and Li, Yong and Bao, Yongjun and Lu, Hanqing},
88 | journal={IEEE Transactions on Neural Networks and Learning Systems},
89 | year={2020},
90 | publisher={IEEE}
91 | }
92 | ```
93 |
94 | ```
95 | @inproceedings{fu2019dual,
96 | title={Dual attention network for scene segmentation},
97 | author={Fu, Jun and Liu, Jing and Tian, Haijie and Li, Yong and Bao, Yongjun and Fang, Zhiwei and Lu, Hanqing},
98 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
99 | pages={3146--3154},
100 | year={2019}
101 | }
102 | ```
103 |
104 |
105 |
106 | ## Acknowledgement
107 |
108 | Thanks [PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding), especially the Synchronized BN!
109 |
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.1b20200708-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.1b20200708-py3.6.egg
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.2b20200707-py3.7.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200707-py3.7.egg
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.2b20200708-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200708-py3.6.egg
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.2b20200708-py3.7.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200708-py3.7.egg
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.2b20200709-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200709-py3.6.egg
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.2b20200725-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200725-py3.6.egg
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.2b20200725-py3.7.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200725-py3.7.egg
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.2b20200801-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200801-py3.6.egg
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.2b20200802-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200802-py3.6.egg
--------------------------------------------------------------------------------
/dist/torch_encoding-1.2.2b20200814-py3.6.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200814-py3.6.egg
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = Encoding
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | docset: html
16 | doc2dash --name $(SPHINXPROJ) --icon $(SOURCEDIR)/_static/img/favicon.png --enable-js --online-redirect-url http://hangzh.com/PyTorch-Encoding/ --force $(BUILDDIR)/html/
17 |
18 | # Manually fix because Zeal doesn't deal well with `icon.png`-only at 2x resolution.
19 | cp $(SPHINXPROJ).docset/icon.png $(SPHINXPROJ).docset/icon@2x.png
20 | convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png
21 |
22 | .PHONY: help Makefile docset
23 |
24 | # Catch-all target: route all unknown targets to Sphinx using the new
25 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
26 | %: Makefile
27 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
28 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | set SPHINXPROJ=Encoding
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | echo.installed, then set the SPHINXBUILD environment variable to point
21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | echo.may add the Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx-gallery
3 | sphinxcontrib-googleanalytics
4 | -e git://github.com/zhanghang1989/autorch_sphinx_theme.git#egg=autorch_sphinx_theme
5 |
--------------------------------------------------------------------------------
/docs/source/_static/css/encoding.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
3 | }
4 |
5 | /* Default header fonts are ugly */
6 | h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption {
7 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
8 | }
9 |
10 | /* Use white for docs background */
11 | .wy-side-nav-search {
12 | background-color: #a0e2ff;
13 | }
14 |
15 | .wy-nav-content-wrap, .wy-menu li.current > a {
16 | background-color: #fff;
17 | }
18 |
19 | @media screen and (min-width: 1400px) {
20 | .wy-nav-content-wrap {
21 | background-color: rgba(0, 0, 0, 0.0470588);
22 | }
23 |
24 | .wy-nav-content {
25 | background-color: #fff;
26 | }
27 | }
28 |
29 | /* Fixes for mobile */
30 | .wy-nav-top {
31 | background-color: #fff;
32 | background-repeat: no-repeat;
33 | background-position: center;
34 | padding: 0;
35 | margin: 0.4045em 0.809em;
36 | color: #333;
37 | }
38 |
39 | .wy-nav-top > a {
40 | display: none;
41 | }
42 |
43 | @media screen and (max-width: 768px) {
44 | .wy-side-nav-search>a img.logo {
45 | height: 60px;
46 | }
47 | }
48 |
49 | /* This is needed to ensure that logo above search scales properly */
50 | .wy-side-nav-search a {
51 | display: block;
52 | }
53 |
54 | /* This ensures that multiple constructors will remain in separate lines. */
55 | .rst-content dl:not(.docutils) dt {
56 | display: table;
57 | }
58 |
59 | /* Use our blue for literals */
60 | .rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal {
61 | color: #4080bf;
62 | }
63 |
64 | .rst-content tt.xref, a .rst-content tt, .rst-content tt.xref,
65 | .rst-content code.xref, a .rst-content tt, a .rst-content code {
66 | color: #404040;
67 | }
68 |
69 | /* Change link colors (except for the menu) */
70 |
71 | a {
72 | color: #4080bf;
73 | }
74 |
75 | a:hover {
76 | color: #4080bf;
77 | }
78 |
79 |
80 | a:visited {
81 | color: #306293;
82 | }
83 |
84 | .wy-menu a {
85 | color: #b3b3b3;
86 | }
87 |
88 | .wy-menu a:hover {
89 | color: #b3b3b3;
90 | }
91 |
92 | /* Default footer text is quite big */
93 | footer {
94 | font-size: 80%;
95 | }
96 |
97 | footer .rst-footer-buttons {
98 | font-size: 125%; /* revert footer settings - 1/80% = 125% */
99 | }
100 |
101 | footer p {
102 | font-size: 100%;
103 | }
104 |
105 | /* For hidden headers that appear in TOC tree */
106 | /* see http://stackoverflow.com/a/32363545/3343043
107 | */
108 | .rst-content .hidden-section {
109 | display: none;
110 | }
111 |
112 | nav .hidden-section {
113 | display: inherit;
114 | }
115 |
116 | .wy-side-nav-search>div.version {
117 | color: #000;
118 | }
119 |
--------------------------------------------------------------------------------
/docs/source/_static/img/cvpr17.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/source/_static/img/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/favicon.png
--------------------------------------------------------------------------------
/docs/source/_static/img/figure1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/figure1.jpg
--------------------------------------------------------------------------------
/docs/source/_static/img/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/icon.png
--------------------------------------------------------------------------------
/docs/source/_static/img/myimage.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/myimage.gif
--------------------------------------------------------------------------------
/docs/source/_static/img/upconv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/upconv.png
--------------------------------------------------------------------------------
/docs/source/_static/js/hidebib.js:
--------------------------------------------------------------------------------
1 | // adapted from: http://www.robots.ox.ac.uk/~vedaldi/assets/hidebib.js
2 | function hideallbibs()
3 | {
4 | var el = document.getElementsByTagName("div") ;
5 | for (var i = 0 ; i < el.length ; ++i) {
6 | if (el[i].className == "paper") {
7 | var bib = el[i].getElementsByTagName("pre") ;
8 | if (bib.length > 0) {
9 | bib [0] .style.display = 'none' ;
10 | }
11 | }
12 | }
13 | }
14 |
15 | function togglebib(paperid)
16 | {
17 | var paper = document.getElementById(paperid) ;
18 | var bib = paper.getElementsByTagName('pre') ;
19 | if (bib.length > 0) {
20 | if (bib [0] .style.display == 'none') {
21 | bib [0] .style.display = 'block' ;
22 | } else {
23 | bib [0] .style.display = 'none' ;
24 | }
25 | }
26 | }
27 |
28 | function toggleblock(blockId)
29 | {
30 | var block = document.getElementById(blockId);
31 | if (block.style.display == 'none') {
32 | block.style.display = 'block' ;
33 | } else {
34 | block.style.display = 'none' ;
35 | }
36 | }
37 |
38 | function hideblock(blockId)
39 | {
40 | var block = document.getElementById(blockId);
41 | block.style.display = 'none' ;
42 | }
43 |
--------------------------------------------------------------------------------
/docs/source/_static/theme_overrides.css:
--------------------------------------------------------------------------------
1 | /* override table width restrictions */
2 | @media screen and (min-width: 767px) {
3 |
4 | .wy-table-responsive table td {
5 | /* !important prevents the common CSS stylesheets from overriding
6 | this as on RTD they are loaded after this stylesheet */
7 | white-space: normal !important;
8 | }
9 |
10 | .wy-table-responsive {
11 | overflow: visible !important;
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/docs/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 |
3 | {%- block extrahead %}
4 |
5 |
6 |
7 | {% endblock %}
8 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. Encoding documentation master file
2 |
3 | :github_url: https://github.com/zhanghang1989/PyTorch-Encoding
4 |
5 | Encoding Documentation
6 | ======================
7 |
8 | Created by `Hang Zhang `_
9 |
10 | An optimized PyTorch package with CUDA backend.
11 |
12 |
13 | .. toctree::
14 | :glob:
15 | :maxdepth: 1
16 | :caption: Installation
17 |
18 | notes/*
19 |
20 | .. toctree::
21 | :glob:
22 | :maxdepth: 1
23 | :caption: Model Zoo
24 |
25 | model_zoo/*
26 |
27 | .. toctree::
28 | :glob:
29 | :maxdepth: 1
30 | :caption: Other Tutorials
31 |
32 | tutorials/*
33 |
34 | .. toctree::
35 | :maxdepth: 1
36 | :caption: Package Reference
37 |
38 | nn
39 | parallel
40 | utils
41 |
42 | Indices and tables
43 | ==================
44 |
45 | * :ref:`genindex`
46 | * :ref:`modindex`
47 |
--------------------------------------------------------------------------------
/docs/source/nn.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | encoding.nn
5 | ===========
6 |
7 | Customized NN modules in Encoding Package. For Synchronized Cross-GPU Batch Normalization, please visit :class:`encoding.nn.BatchNorm2d`.
8 |
9 | .. currentmodule:: encoding.nn
10 |
11 | :hidden:`Encoding`
12 | ~~~~~~~~~~~~~~~~~~
13 |
14 | .. autoclass:: Encoding
15 | :members:
16 |
17 | :hidden:`DistSyncBatchNorm`
18 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
19 |
20 | .. autoclass:: DistSyncBatchNorm
21 | :members:
22 |
23 | :hidden:`SyncBatchNorm`
24 | ~~~~~~~~~~~~~~~~~~~~~~~~
25 |
26 | .. autoclass:: SyncBatchNorm
27 | :members:
28 |
29 | :hidden:`BatchNorm1d`
30 | ~~~~~~~~~~~~~~~~~~~~~~~~
31 |
32 | .. autoclass:: BatchNorm1d
33 | :members:
34 |
35 | :hidden:`BatchNorm2d`
36 | ~~~~~~~~~~~~~~~~~~~~~~~~
37 |
38 | .. autoclass:: BatchNorm2d
39 | :members:
40 |
41 | :hidden:`BatchNorm3d`
42 | ~~~~~~~~~~~~~~~~~~~~~~~~
43 |
44 | .. autoclass:: BatchNorm3d
45 | :members:
46 |
47 | :hidden:`Inspiration`
48 | ~~~~~~~~~~~~~~~~~~~~~
49 |
50 | .. autoclass:: Inspiration
51 | :members:
52 |
53 | :hidden:`UpsampleConv2d`
54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
55 |
56 | .. autoclass:: UpsampleConv2d
57 | :members:
58 |
59 | :hidden:`GramMatrix`
60 | ~~~~~~~~~~~~~~~~~~~~
61 |
62 | .. autoclass:: GramMatrix
63 | :members:
64 |
--------------------------------------------------------------------------------
/docs/source/notes/compile.rst:
--------------------------------------------------------------------------------
1 | Install and Citations
2 | =====================
3 |
4 |
5 | Installation
6 | ------------
7 |
8 | * Install PyTorch 1.4.0 by following the `PyTorch instructions `_.
9 |
10 | * PIP Install::
11 |
12 | pip install torch-encoding --pre
13 |
14 | * Install from source::
15 |
16 | git clone https://github.com/zhanghang1989/PyTorch-Encoding && cd PyTorch-Encoding
17 | python setup.py install
18 |
19 |
20 | Detailed Steps
21 | --------------
22 |
23 | This tutorial is a sucessful setup example for AWS EC2 p3 instance with ubuntu 16.04, CUDA 10.
24 | We cannot guarantee it to work for all the machines, but the steps should be similar.
25 | Assuming CUDA and cudnn are already sucessfully installed, otherwise please refer to other tutorials.
26 |
27 | * Install Anaconda from the `link `_ .
28 |
29 | * Install ninja::
30 |
31 | wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip
32 | sudo unzip ninja-linux.zip -d /usr/local/bin/
33 | sudo update-alternatives --install /usr/bin/ninja ninja /usr/local/bin/ninja 1 --force
34 |
35 | * Install PyTorch::
36 |
37 | conda install pytorch torchvision cudatoolkit=10.0 -c pytorch
38 |
39 | * Install this package::
40 |
41 | pip install torch-encoding --pre
42 |
43 | Citations
44 | ---------
45 |
46 | .. note::
47 | * Hang Zhang et al. "ResNeSt: Split-Attention Networks" *arXiv 2020*::
48 |
49 | @article{zhang2020resnest,
50 | title={ResNeSt: Split-Attention Networks},
51 | author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
52 | journal={arXiv preprint arXiv:2004.08955},
53 | year={2020}
54 | }
55 |
56 | * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*::
57 |
58 | @InProceedings{Zhang_2018_CVPR,
59 | author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
60 | title = {Context Encoding for Semantic Segmentation},
61 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
62 | month = {June},
63 | year = {2018}
64 | }
65 |
66 |
67 | * Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*::
68 |
69 | @InProceedings{Zhang_2017_CVPR,
70 | author = {Zhang, Hang and Xue, Jia and Dana, Kristin},
71 | title = {Deep TEN: Texture Encoding Network},
72 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
73 | month = {July},
74 | year = {2017}
75 | }
76 |
--------------------------------------------------------------------------------
/docs/source/parallel.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | encoding.parallel
5 | =================
6 |
7 | - Current PyTorch DataParallel Table is not supporting mutl-gpu loss calculation, which makes the gpu memory usage very in-balance. We address this issue here by doing DataParallel for Model & Criterion.
8 |
9 | .. note::
10 | Deprecated, please use torch.nn.parallel.DistributedDataParallel with :class:`encoding.nn.DistSyncBatchNorm` for the best performance.
11 |
12 | .. automodule:: encoding.parallel
13 | .. currentmodule:: encoding.parallel
14 |
15 | :hidden:`DataParallelModel`
16 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
17 |
18 | .. autoclass:: DataParallelModel
19 | :members:
20 |
21 | :hidden:`DataParallelCriterion`
22 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23 |
24 | .. autoclass:: DataParallelCriterion
25 | :members:
26 |
27 |
28 | :hidden:`allreduce`
29 | ~~~~~~~~~~~~~~~~~~~
30 |
31 | .. autofunction:: allreduce
32 |
--------------------------------------------------------------------------------
/docs/source/tutorials/syncbn.rst:
--------------------------------------------------------------------------------
1 | Implementing Synchronized Multi-GPU Batch Normalization
2 | =======================================================
3 |
4 | In this tutorial, we discuss the implementation detail of Multi-GPU Batch Normalization (BN) (classic implementation: :class:`encoding.nn.BatchNorm2d`. We will provide the training example in a later version.
5 |
6 | How BN works?
7 | -------------
8 |
9 | BN layer was introduced in the paper `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift `_, which dramatically speed up the training process of the network (enables larger learning rate) and makes the network less sensitive to the weight initialization.
10 |
11 | .. image:: http://hangzh.com/blog/images/bn1.png
12 | :align: center
13 |
14 | - Forward Pass:
15 | For the input data :math:`X={x_1, ...x_N}`, the data are normalized to be zero-mean and unit-variance, then scale and shift:
16 |
17 | .. math::
18 | y_i = \gamma\cdot\frac{x_i-\mu}{\sigma} + \beta ,
19 |
20 | where :math:`\mu=\frac{\sum_i^N x_i}{N} , \sigma = \sqrt{\frac{\sum_i^N (x_i-\mu)^2}{N}+\epsilon}` and :math:`\gamma, \beta` are the learnable parameters.
21 |
22 | - Backward Pass:
23 | For calculating the gradient :math:`\frac{d_\ell}{d_{x_i}}`, we need to consider the partial gradient from :math:`\frac{d_\ell}{d_y}` and the gradients from :math:`\frac{d_\ell}{d_\mu}` and :math:`\frac{d_\ell}{d_\sigma}`, since the :math:`\mu \text{ and } \sigma` are the function of the input :math:`x_i`. We use partial derivative in the notations:
24 |
25 | .. math::
26 |
27 | \frac{d_\ell}{d_{x_i}} = \frac{d_\ell}{d_{y_i}}\cdot\frac{\partial_{y_i}}{\partial_{x_i}} + \frac{d_\ell}{d_\mu}\cdot\frac{d_\mu}{d_{x_i}} + \frac{d_\ell}{d_\sigma}\cdot\frac{d_\sigma}{d_{x_i}}
28 |
29 | where :math:`\frac{\partial_{y_i}}{\partial_{x_i}}=\frac{\gamma}{\sigma}, \frac{d_\ell}{d_\mu}=-\frac{\gamma}{\sigma}\sum_i^N\frac{d_\ell}{d_{y_i}}
30 | \text{ and } \frac{d_\sigma}{d_{x_i}}=-\frac{1}{\sigma}(\frac{x_i-\mu}{N})`.
31 |
32 | Why Synchronize BN?
33 | -------------------
34 |
35 | - Standard implementations of BN in public frameworks (such as Caffe, MXNet, Torch, TF, PyTorch) are unsynchronized, which means that the data are normalized within each GPU. Therefore the `working batch-size` of the BN layer is `BatchSize/nGPU` (batch-size in each GPU).
36 |
37 | .. image:: http://hangzh.com/blog/images/bn2.png
38 | :align: center
39 |
40 | - Since the `working batch-size` is typically large enough for standard vision tasks, such as classification and detection, there is no need to synchronize BN layer during the training. The synchronization will slow down the training.
41 |
42 | - However, for the Semantic Segmentation task, the state-of-the-art approaches typically adopt dilated convoluton, which is very memory consuming. The `working bath-size` can be too small for BN layers (2 or 4 in each GPU) when using larger/deeper pre-trained networks, such as :class:`encoding.dilated.ResNet` or :class:`encoding.dilated.DenseNet`.
43 |
44 | How to Synchronize?
45 | -------------------
46 |
47 | Suppose we have :math:`K` number of GPUs, :math:`sum(x)_k` and :math:`sum(x^2)_k` denotes the sum of elements and sum of element squares in :math:`k^{th}` GPU.
48 |
49 | - Forward Pass:
50 | We can calculate the sum of elements :math:`sum(x)=\sum x_i \text{ and sum of squares } sum(x^2)=\sum x_i^2` in each GPU, then apply :class:`encoding.parallel.allreduce` operation to sum accross GPUs. Then calculate the global mean :math:`\mu=\frac{sum(x)}{N} \text{ and global variance } \sigma=\sqrt{\frac{sum(x^2)}{N}-\mu^2+\epsilon}`.
51 |
52 | - Backward Pass:
53 | * :math:`\frac{d_\ell}{d_{x_i}}=\frac{d_\ell}{d_{y_i}}\frac{\gamma}{\sigma}` can be calculated locally in each GPU.
54 | * Calculate the gradient of :math:`sum(x)` and :math:`sum(x^2)` individually in each GPU :math:`\frac{d_\ell}{d_{sum(x)_k}}` and :math:`\frac{d_\ell}{d_{sum(x^2)_k}}`.
55 |
56 | * Then sync the gradient (automatically handled by :class:`encoding.parallel.allreduce`) and continue the backward.
57 |
58 | .. image:: http://hangzh.com/blog/images/bn3.png
59 | :align: center
60 |
61 | Citation
62 | --------
63 |
64 | .. note::
65 | This code is provided together with the paper, please cite our work.
66 |
67 | * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*::
68 |
69 | @InProceedings{Zhang_2018_CVPR,
70 | author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
71 | title = {Context Encoding for Semantic Segmentation},
72 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
73 | month = {June},
74 | year = {2018}
75 | }
76 |
--------------------------------------------------------------------------------
/docs/source/tutorials/texture.rst:
--------------------------------------------------------------------------------
1 | Deep TEN: Deep Texture Encoding Network Example
2 | ===============================================
3 |
4 | .. image:: ../_static/img/cvpr17.svg
5 | :width: 100%
6 | :align: left
7 |
8 | In this section, we show an example of training/testing Encoding-Net for texture recognition on MINC-2500 dataset. Comparing to original Torch implementation, we use *different learning rate* for pre-trained base network and encoding layer (10x), disable color jittering after reducing lr and adopt much *smaller training image size* (224 instead of 352).
9 |
10 |
11 | Test Pre-trained Model
12 | ----------------------
13 |
14 | - Clone the GitHub repo::
15 |
16 | git clone https://github.com/zhanghang1989/PyTorch-Encoding
17 |
18 | - Install PyTorch Encoding (if not yet). Please follow the installation guide `Installing PyTorch Encoding <../notes/compile.html>`_.
19 |
20 | - Download the `MINC-2500 `_ dataset using the providied script::
21 |
22 | cd PyTorch-Encoding/
23 | python scripts/prepare_minc.py
24 |
25 | - Test pre-trained model on MINC-2500. The pre-trained weight will be automatic downloaded (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`18.96\%` using single crop on test-1 set)::
26 |
27 | python verify.py --dataset minc --model deepten_resnet50_minc
28 | # Teriminal Output:
29 | # Top1: 81.043 | Top5: 95.617: 100%|███████████████████████████████████| 45/45 [00:18<00:00, 2.40it/s]
30 | # Top1 Acc: 81.043 | Top5 Acc: 95.617
31 |
32 |
33 | Train Your Own Model
34 | --------------------
35 |
36 | - Example training command for training above model::
37 |
38 | CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset minc --model deepten_resnet50_minc --batch-size 512 --lr 0.004 --epochs 80 --lr-step 60 --lr-scheduler step --weight-decay 5e-4
39 |
40 | - Detail training options::
41 |
42 | -h, --help show this help message and exit
43 | --dataset DATASET training dataset (default: cifar10)
44 | --model MODEL network model type (default: densenet)
45 | --backbone BACKBONE backbone name (default: resnet50)
46 | --batch-size N batch size for training (default: 128)
47 | --test-batch-size N batch size for testing (default: 1000)
48 | --epochs N number of epochs to train (default: 300)
49 | --start_epoch N the epoch number to start (default: 0)
50 | --lr LR learning rate (default: 0.1)
51 | --momentum M SGD momentum (default: 0.9)
52 | --weight-decay M SGD weight decay (default: 1e-4)
53 | --no-cuda disables CUDA training
54 | --plot matplotlib
55 | --seed S random seed (default: 1)
56 | --resume RESUME put the path to resuming file if needed
57 | --checkname set the checkpoint name
58 | --eval evaluating
59 |
60 |
61 | Citation
62 | --------
63 |
64 | .. note::
65 | * Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*::
66 |
67 | @InProceedings{Zhang_2017_CVPR,
68 | author = {Zhang, Hang and Xue, Jia and Dana, Kristin},
69 | title = {Deep TEN: Texture Encoding Network},
70 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
71 | month = {July},
72 | year = {2017}
73 | }
74 |
--------------------------------------------------------------------------------
/docs/source/utils.rst:
--------------------------------------------------------------------------------
1 | .. role:: hidden
2 | :class: hidden-section
3 |
4 | encoding.utils
5 | ==============
6 |
7 | Useful util functions.
8 |
9 | .. automodule:: encoding.utils
10 | .. currentmodule:: encoding.utils
11 |
12 | :hidden:`LR_Scheduler`
13 | ~~~~~~~~~~~~~~~~~~~~~~
14 |
15 | .. autoclass:: LR_Scheduler
16 | :members:
17 |
18 | :hidden:`save_checkpoint`
19 | ~~~~~~~~~~~~~~~~~~~~~~~~~
20 |
21 | .. autofunction:: save_checkpoint
22 |
23 | :hidden:`SegmentationMetric`
24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25 |
26 | .. autoclass:: SegmentationMetric
27 | :members:
28 |
29 | :hidden:`batch_pix_accuracy`
30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
31 |
32 | .. autofunction:: batch_pix_accuracy
33 |
34 | :hidden:`batch_intersection_union`
35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
36 |
37 | .. autofunction:: batch_intersection_union
38 |
--------------------------------------------------------------------------------
/encoding/__init__.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | """An optimized PyTorch package with CUDA backend."""
12 | from .version import __version__
13 | from . import nn, functions, parallel, utils, models, datasets, transforms
14 |
--------------------------------------------------------------------------------
/encoding/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | from torchvision.datasets import *
3 | from .base import *
4 | from .coco import COCOSegmentation
5 | from .ade20k import ADE20KSegmentation
6 | from .pascal_voc import VOCSegmentation
7 | from .pascal_aug import VOCAugSegmentation
8 | from .pcontext import ContextSegmentation
9 | from .cityscapes import CitySegmentation
10 | from .imagenet import ImageNetDataset
11 | from .minc import MINCDataset
12 |
13 | from ..utils import EncodingDeprecationWarning
14 |
15 | datasets = {
16 | 'coco': COCOSegmentation,
17 | 'ade20k': ADE20KSegmentation,
18 | 'pascal_voc': VOCSegmentation,
19 | 'pascal_aug': VOCAugSegmentation,
20 | 'pcontext': ContextSegmentation,
21 | 'citys': CitySegmentation,
22 | 'imagenet': ImageNetDataset,
23 | 'minc': MINCDataset,
24 | 'cifar10': CIFAR10,
25 | }
26 |
27 | acronyms = {
28 | 'coco': 'coco',
29 | 'pascal_voc': 'voc',
30 | 'pascal_aug': 'voc',
31 | 'pcontext': 'pcontext',
32 | 'ade20k': 'ade',
33 | 'citys': 'citys',
34 | 'minc': 'minc',
35 | 'cifar10': 'cifar10',
36 | }
37 |
38 | def get_dataset(name, **kwargs):
39 | return datasets[name.lower()](**kwargs)
40 |
41 | def _make_deprecate(meth, old_name):
42 | new_name = meth.__name__
43 |
44 | def deprecated_init(*args, **kwargs):
45 | warnings.warn("encoding.dataset.{} is now deprecated in favor of encoding.dataset.{}."
46 | .format(old_name, new_name), EncodingDeprecationWarning)
47 | return meth(*args, **kwargs)
48 |
49 | deprecated_init.__doc__ = r"""
50 | {old_name}(...)
51 | .. warning::
52 | This method is now deprecated in favor of :func:`torch.nn.init.{new_name}`.
53 | See :func:`~torch.nn.init.{new_name}` for details.""".format(
54 | old_name=old_name, new_name=new_name)
55 | deprecated_init.__name__ = old_name
56 | return deprecated_init
57 |
58 | get_segmentation_dataset = _make_deprecate(get_dataset, 'get_segmentation_dataset')
59 |
--------------------------------------------------------------------------------
/encoding/datasets/base.py:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | # Created by: Hang Zhang
3 | # Email: zhang.hang@rutgers.edu
4 | # Copyright (c) 2017
5 | ###########################################################################
6 |
7 | import random
8 | import numpy as np
9 | from PIL import Image, ImageOps, ImageFilter
10 | import torch
11 | import torch.utils.data as data
12 |
13 | __all__ = ['BaseDataset', 'test_batchify_fn']
14 |
15 | class BaseDataset(data.Dataset):
16 | def __init__(self, root, split, mode=None, transform=None,
17 | target_transform=None, base_size=520, crop_size=480):
18 | self.root = root
19 | self.transform = transform
20 | self.target_transform = target_transform
21 | self.split = split
22 | self.mode = mode if mode is not None else split
23 | self.base_size = base_size
24 | self.crop_size = crop_size
25 | if self.mode == 'train':
26 | print('BaseDataset: base_size {}, crop_size {}'. \
27 | format(base_size, crop_size))
28 |
29 | def __getitem__(self, index):
30 | raise NotImplemented
31 |
32 | @property
33 | def num_class(self):
34 | return self.NUM_CLASS
35 |
36 | @property
37 | def pred_offset(self):
38 | raise NotImplemented
39 |
40 | def make_pred(self, x):
41 | return x + self.pred_offset
42 |
43 | def _val_sync_transform(self, img, mask):
44 | outsize = self.crop_size
45 | short_size = outsize
46 | w, h = img.size
47 | if w > h:
48 | oh = short_size
49 | ow = int(1.0 * w * oh / h)
50 | else:
51 | ow = short_size
52 | oh = int(1.0 * h * ow / w)
53 | img = img.resize((ow, oh), Image.BILINEAR)
54 | mask = mask.resize((ow, oh), Image.NEAREST)
55 | # center crop
56 | w, h = img.size
57 | x1 = int(round((w - outsize) / 2.))
58 | y1 = int(round((h - outsize) / 2.))
59 | img = img.crop((x1, y1, x1+outsize, y1+outsize))
60 | mask = mask.crop((x1, y1, x1+outsize, y1+outsize))
61 | # final transform
62 | return img, self._mask_transform(mask)
63 |
64 | def _sync_transform(self, img, mask):
65 | # random mirror
66 | if random.random() < 0.5:
67 | img = img.transpose(Image.FLIP_LEFT_RIGHT)
68 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
69 | crop_size = self.crop_size
70 | # random scale (short edge)
71 | w, h = img.size
72 | long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0))
73 | if h > w:
74 | oh = long_size
75 | ow = int(1.0 * w * long_size / h + 0.5)
76 | short_size = ow
77 | else:
78 | ow = long_size
79 | oh = int(1.0 * h * long_size / w + 0.5)
80 | short_size = oh
81 | img = img.resize((ow, oh), Image.BILINEAR)
82 | mask = mask.resize((ow, oh), Image.NEAREST)
83 | # pad crop
84 | if short_size < crop_size:
85 | padh = crop_size - oh if oh < crop_size else 0
86 | padw = crop_size - ow if ow < crop_size else 0
87 | img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
88 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
89 | # random crop crop_size
90 | w, h = img.size
91 | x1 = random.randint(0, w - crop_size)
92 | y1 = random.randint(0, h - crop_size)
93 | img = img.crop((x1, y1, x1+crop_size, y1+crop_size))
94 | mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size))
95 | # final transform
96 | return img, self._mask_transform(mask)
97 |
98 | def _mask_transform(self, mask):
99 | return torch.from_numpy(np.array(mask)).long()
100 |
101 |
102 | def test_batchify_fn(data):
103 | error_msg = "batch must contain tensors, tuples or lists; found {}"
104 | if isinstance(data[0], (str, torch.Tensor)):
105 | return list(data)
106 | elif isinstance(data[0], (tuple, list)):
107 | data = zip(*data)
108 | return [test_batchify_fn(i) for i in data]
109 | raise TypeError((error_msg.format(type(batch[0]))))
110 |
--------------------------------------------------------------------------------
/encoding/datasets/folder.py:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | # Created by: Hang Zhang
3 | # Email: zhang.hang@rutgers.edu
4 | # Copyright (c) 2017
5 | ###########################################################################
6 |
7 | import os
8 | import sys
9 | import numpy as np
10 | import random
11 | import math
12 |
13 | import torch.utils.data as data
14 | from PIL import Image, ImageOps
15 |
16 | import torch.utils.data as data
17 | import torchvision.transforms as transform
18 | from .dataset import ToLabel
19 |
20 | class FolderLoader(data.Dataset):
21 | def __init__(self, root, transform=None):
22 | self.root = root
23 | self.transform = transform
24 | self.images = get_folder_images(root)
25 | if len(self.images) == 0:
26 | raise(RuntimeError("Found 0 images in subfolders of: \
27 | " + self.root + "\n"))
28 |
29 | def __getitem__(self, index):
30 | img = Image.open(self.images[index]).convert('RGB')
31 | if self.transform is not None:
32 | img = self.transform(img)
33 | return img, os.path.basename(self.images[index])
34 |
35 | def __len__(self):
36 | return len(self.images)
37 |
38 |
39 | def get_folder_images(img_folder):
40 | img_paths = []
41 | for filename in os.listdir(img_folder):
42 | if filename.endswith(".jpg"):
43 | imgpath = os.path.join(img_folder, filename)
44 | img_paths.append(imgpath)
45 | return img_paths
46 |
47 |
48 |
49 | class Dataloder():
50 | def __init__(self, args):
51 | # the data augmentation is implemented as part of the dataloader
52 | assert(args.test)
53 | input_transform = transform.Compose([
54 | transform.ToTensor(),
55 | transform.Normalize(args.mean, args.std)])
56 | args.test_batch_size = 1
57 |
58 | assert(args.test_folder is not None)
59 | print('loading the data from: {}'.format(args.test_folder))
60 |
61 | testset = FolderLoader(args.test_folder, input_transform)
62 | kwargs = {'num_workers': args.workers, 'pin_memory': True} \
63 | if args.cuda else {}
64 | self.trainloader = None
65 | self.testloader = data.DataLoader(testset,
66 | batch_size=args.test_batch_size,
67 | shuffle=False, **kwargs)
68 |
69 | def getloader(self):
70 | return self.trainloader, self.testloader
71 |
--------------------------------------------------------------------------------
/encoding/datasets/hpw18.py:
--------------------------------------------------------------------------------
1 | # created by: Sean Liu
2 | # Amazon Lab 126
3 | from __future__ import print_function
4 |
5 | import errno
6 | import hashlib
7 | import os
8 | import sys
9 | import tarfile
10 | import numpy as np
11 | import random
12 | import math
13 |
14 | import torch.utils.data as data
15 | import PIL
16 | from PIL import Image, ImageOps
17 |
18 | from six.moves import urllib
19 |
20 |
21 | class Segmentation_HPW18(data.Dataset):
22 | CLASSES = [
23 | 'background', 'hat', 'hair', 'sunglasses', 'upper-clothes',
24 | 'skirt', 'pants', 'dress', 'belt', 'left-shoe', 'right-shoe',
25 | 'face', 'left-leg', 'right-leg', 'left-arm', 'right-arm', 'bag',
26 | 'scarf'
27 | ]
28 |
29 | URL = "/cvdata1/lliuqian/humanParsingDataset"
30 | FILE = "hpw18.tar.gz"
31 | MD5 = ''
32 | BASE_DIR = ''
33 |
34 | def __init__(self,
35 | root,
36 | train=True,
37 | transform=None,
38 | target_transform=None,
39 | download=False):
40 | self.root = root
41 | _hpw18_root = os.path.join(self.root, self.BASE_DIR)
42 | _mask_dir = os.path.join(_hpw18_root, 'SegmentationClassAug_256x384')
43 | _image_dir = os.path.join(_hpw18_root, 'JPEGImages_256x384')
44 | self.transform = transform
45 | self.target_transform = target_transform
46 | self.train = train
47 |
48 | if download:
49 | self._download()
50 |
51 | # train/val/test splits are pre-cut
52 | _splits_dir = _hpw18_root
53 | _split_f = os.path.join(_splits_dir, 'humanparsingImageMask_256x384_absPath_train.txt')
54 | if not self.train:
55 | _split_f = os.path.join(_splits_dir, 'humanparsingImageMask_256x384_absPath_val.txt')
56 |
57 | print("reading from ", _split_f)
58 |
59 | self.images = []
60 | self.masks = []
61 | with open(os.path.join(_split_f), "r") as lines:
62 | for line in lines:
63 | s = line.split()
64 | _image = s[0] # image absolution path
65 | _mask = s[1] # mask absolution path
66 | assert os.path.isfile(_image)
67 | assert os.path.isfile(_mask)
68 | self.images.append(_image)
69 | self.masks.append(_mask)
70 | assert (len(self.images) == len(self.masks))
71 |
72 | def __getitem__(self, index):
73 | _img = Image.open(self.images[index]).convert('RGB')
74 | _timg = Image.open(self.masks[index])
75 | _target = np.array(_timg, dtype=np.uint8)
76 | _target = Image.fromarray(_target)
77 |
78 | # synchrosized transform
79 | if self.train:
80 | _img, _target = self._sync_transform( _img, _target)
81 |
82 | # general resize, normalize and toTensor
83 | if self.transform is not None:
84 | _img = self.transform(_img)
85 | if self.target_transform is not None:
86 | _target = self.target_transform(_target)
87 |
88 | return _img, _target
89 |
90 | def __len__(self):
91 | return len(self.images)
92 |
93 | def _sync_transform(self, img, mask):
94 | # random rotate -10~10
95 | deg = random.uniform(-10,10)
96 | img = img.rotate(deg)
97 | mask = mask.rotate(deg, PIL.Image.NEAREST)
98 |
99 | return img, mask
100 |
101 | if __name__ == '__main__':
102 | hpw18 = Segmentation_HPW18('/cvdata1/lliuqian/', train=True)
103 | print(hpw18[0])
104 | print (len(hpw18))
105 |
--------------------------------------------------------------------------------
/encoding/datasets/imagenet.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2018
5 | ##
6 | ## This source code is licensed under the MIT-style license found in the
7 | ## LICENSE file in the root directory of this source tree
8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 |
10 | import os
11 | import torchvision.transforms as transforms
12 | import torchvision.datasets as datasets
13 |
14 | import warnings
15 | warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)
16 |
17 | class ImageNetDataset(datasets.ImageFolder):
18 | BASE_DIR = "ILSVRC2012"
19 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), transform=None,
20 | target_transform=None, train=True, **kwargs):
21 | split='train' if train == True else 'val'
22 | root = os.path.join(root, self.BASE_DIR, split)
23 | super(ImageNetDataset, self).__init__(
24 | root, transform, target_transform)
25 |
--------------------------------------------------------------------------------
/encoding/datasets/minc.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | import os
12 | from PIL import Image
13 |
14 | import torch
15 | import torch.utils.data as data
16 |
17 | class MINCDataset(data.Dataset):
18 | NUM_CLASS = 23
19 | def __init__(self, root=os.path.expanduser('~/.encoding/data/'),
20 | train=True, transform=None, download=None):
21 | split='train' if train == True else 'val'
22 | root = os.path.join(root, 'minc-2500')
23 | self.transform = transform
24 | classes, class_to_idx = find_classes(root + '/images')
25 | if split=='train':
26 | filename = os.path.join(root, 'labels/train1.txt')
27 | else:
28 | filename = os.path.join(root, 'labels/test1.txt')
29 |
30 | self.images, self.labels = make_dataset(filename, root,
31 | class_to_idx)
32 | assert (len(self.images) == len(self.labels))
33 |
34 | def __getitem__(self, index):
35 | _img = Image.open(self.images[index]).convert('RGB')
36 | _label = self.labels[index]
37 | if self.transform is not None:
38 | _img = self.transform(_img)
39 |
40 | return _img, _label
41 |
42 | def __len__(self):
43 | return len(self.images)
44 |
45 | def find_classes(dir):
46 | classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
47 | classes.sort()
48 | class_to_idx = {classes[i]: i for i in range(len(classes))}
49 | return classes, class_to_idx
50 |
51 |
52 | def make_dataset(filename, datadir, class_to_idx):
53 | images = []
54 | labels = []
55 | with open(os.path.join(filename), "r") as lines:
56 | for line in lines:
57 | _image = os.path.join(datadir, line.rstrip('\n'))
58 | _dirname = os.path.split(os.path.dirname(_image))[1]
59 | assert os.path.isfile(_image)
60 | label = class_to_idx[_dirname]
61 | images.append(_image)
62 | labels.append(label)
63 |
64 | return images, labels
65 |
66 |
--------------------------------------------------------------------------------
/encoding/datasets/pascal_aug.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import scipy.io
4 | import numpy as np
5 | from PIL import Image, ImageOps, ImageFilter
6 |
7 | from .base import BaseDataset
8 |
9 | class VOCAugSegmentation(BaseDataset):
10 | voc = [
11 | 'background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle',
12 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
13 | 'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
14 | 'tv'
15 | ]
16 | NUM_CLASS = 21
17 | TRAIN_BASE_DIR = 'VOCaug/dataset/'
18 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
19 | mode=None, transform=None, target_transform=None, **kwargs):
20 | super(VOCAugSegmentation, self).__init__(root, split, mode, transform,
21 | target_transform, **kwargs)
22 | # train/val/test splits are pre-cut
23 | _voc_root = os.path.join(root, self.TRAIN_BASE_DIR)
24 | _mask_dir = os.path.join(_voc_root, 'cls')
25 | _image_dir = os.path.join(_voc_root, 'img')
26 | if self.mode == 'train':
27 | _split_f = os.path.join(_voc_root, 'trainval.txt')
28 | elif self.mode == 'val':
29 | _split_f = os.path.join(_voc_root, 'val.txt')
30 | else:
31 | raise RuntimeError('Unknown dataset split.')
32 | self.images = []
33 | self.masks = []
34 | with open(os.path.join(_split_f), "r") as lines:
35 | for line in lines:
36 | _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg")
37 | assert os.path.isfile(_image)
38 | self.images.append(_image)
39 | if self.mode != 'test':
40 | _mask = os.path.join(_mask_dir, line.rstrip('\n')+".mat")
41 | assert os.path.isfile(_mask)
42 | self.masks.append(_mask)
43 |
44 | assert (len(self.images) == len(self.masks))
45 |
46 | def __getitem__(self, index):
47 | _img = Image.open(self.images[index]).convert('RGB')
48 | if self.mode == 'test':
49 | if self.transform is not None:
50 | _img = self.transform(_img)
51 | return _img, os.path.basename(self.images[index])
52 | _target = self._load_mat(self.masks[index])
53 | # synchrosized transform
54 | if self.mode == 'train':
55 | _img, _target = self._sync_transform( _img, _target)
56 | elif self.mode == 'val':
57 | _img, _target = self._val_sync_transform( _img, _target)
58 | # general resize, normalize and toTensor
59 | if self.transform is not None:
60 | _img = self.transform(_img)
61 | if self.target_transform is not None:
62 | _target = self.target_transform(_target)
63 | return _img, _target
64 |
65 | def _load_mat(self, filename):
66 | mat = scipy.io.loadmat(filename, mat_dtype=True, squeeze_me=True,
67 | struct_as_record=False)
68 | mask = mat['GTcls'].Segmentation
69 | return Image.fromarray(mask)
70 |
71 | def __len__(self):
72 | return len(self.images)
73 |
--------------------------------------------------------------------------------
/encoding/datasets/pascal_voc.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import numpy as np
4 | from PIL import Image, ImageOps, ImageFilter
5 | from tqdm import tqdm
6 |
7 | import torch
8 | from .base import BaseDataset
9 |
10 | class VOCSegmentation(BaseDataset):
11 | CLASSES = [
12 | 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
13 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
14 | 'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train',
15 | 'tv/monitor', 'ambigious'
16 | ]
17 | NUM_CLASS = 21
18 | BASE_DIR = 'VOCdevkit/VOC2012'
19 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
20 | mode=None, transform=None, target_transform=None, **kwargs):
21 | super(VOCSegmentation, self).__init__(root, split, mode, transform,
22 | target_transform, **kwargs)
23 | _voc_root = os.path.join(self.root, self.BASE_DIR)
24 | _mask_dir = os.path.join(_voc_root, 'SegmentationClass')
25 | _image_dir = os.path.join(_voc_root, 'JPEGImages')
26 | # train/val/test splits are pre-cut
27 | _splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation')
28 | if self.mode == 'train':
29 | _split_f = os.path.join(_splits_dir, 'trainval.txt')
30 | elif self.mode == 'val':
31 | _split_f = os.path.join(_splits_dir, 'val.txt')
32 | elif self.mode == 'test':
33 | _split_f = os.path.join(_splits_dir, 'test.txt')
34 | else:
35 | raise RuntimeError('Unknown dataset split.')
36 | self.images = []
37 | self.masks = []
38 | with open(os.path.join(_split_f), "r") as lines:
39 | for line in tqdm(lines):
40 | _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg")
41 | assert os.path.isfile(_image)
42 | self.images.append(_image)
43 | if self.mode != 'test':
44 | _mask = os.path.join(_mask_dir, line.rstrip('\n')+".png")
45 | assert os.path.isfile(_mask)
46 | self.masks.append(_mask)
47 |
48 | if self.mode != 'test':
49 | assert (len(self.images) == len(self.masks))
50 |
51 | def __getitem__(self, index):
52 | img = Image.open(self.images[index]).convert('RGB')
53 | if self.mode == 'test':
54 | if self.transform is not None:
55 | img = self.transform(img)
56 | return img, os.path.basename(self.images[index])
57 | target = Image.open(self.masks[index])
58 | # synchrosized transform
59 | if self.mode == 'train':
60 | img, target = self._sync_transform( img, target)
61 | elif self.mode == 'val':
62 | img, target = self._val_sync_transform( img, target)
63 | else:
64 | assert self.mode == 'testval'
65 | mask = self._mask_transform(mask)
66 | # general resize, normalize and toTensor
67 | if self.transform is not None:
68 | img = self.transform(img)
69 | if self.target_transform is not None:
70 | target = self.target_transform(target)
71 | return img, target
72 |
73 | def _mask_transform(self, mask):
74 | target = np.array(mask).astype('int32')
75 | target[target == 255] = -1
76 | return torch.from_numpy(target).long()
77 |
78 | def __len__(self):
79 | return len(self.images)
80 |
81 | @property
82 | def pred_offset(self):
83 | return 0
84 |
--------------------------------------------------------------------------------
/encoding/datasets/pcontext.py:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | # Created by: Hang Zhang
3 | # Email: zhang.hang@rutgers.edu
4 | # Copyright (c) 2017
5 | ###########################################################################
6 |
7 | from PIL import Image, ImageOps, ImageFilter
8 | import os
9 | import math
10 | import random
11 | import numpy as np
12 | from tqdm import trange
13 |
14 | import torch
15 | from .base import BaseDataset
16 |
17 | class ContextSegmentation(BaseDataset):
18 | BASE_DIR = 'VOCdevkit/VOC2010'
19 | NUM_CLASS = 59
20 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train',
21 | mode=None, transform=None, target_transform=None, **kwargs):
22 | super(ContextSegmentation, self).__init__(
23 | root, split, mode, transform, target_transform, **kwargs)
24 | from detail import Detail
25 | #from detail import mask
26 | root = os.path.join(root, self.BASE_DIR)
27 | annFile = os.path.join(root, 'trainval_merged.json')
28 | imgDir = os.path.join(root, 'JPEGImages')
29 | # training mode
30 | self.detail = Detail(annFile, imgDir, split)
31 | self.transform = transform
32 | self.target_transform = target_transform
33 | self.ids = self.detail.getImgs()
34 | # generate masks
35 | self._mapping = np.sort(np.array([
36 | 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22,
37 | 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296,
38 | 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424,
39 | 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360,
40 | 98, 187, 104, 105, 366, 189, 368, 113, 115]))
41 | self._key = np.array(range(len(self._mapping))).astype('uint8')
42 | mask_file = os.path.join(root, self.split+'.pth')
43 | print('mask_file:', mask_file)
44 | if os.path.exists(mask_file):
45 | self.masks = torch.load(mask_file)
46 | else:
47 | self.masks = self._preprocess(mask_file)
48 |
49 | def _class_to_index(self, mask):
50 | # assert the values
51 | values = np.unique(mask)
52 | for i in range(len(values)):
53 | assert(values[i] in self._mapping)
54 | index = np.digitize(mask.ravel(), self._mapping, right=True)
55 | return self._key[index].reshape(mask.shape)
56 |
57 | def _preprocess(self, mask_file):
58 | masks = {}
59 | tbar = trange(len(self.ids))
60 | print("Preprocessing mask, this will take a while." + \
61 | "But don't worry, it only run once for each split.")
62 | for i in tbar:
63 | img_id = self.ids[i]
64 | mask = Image.fromarray(self._class_to_index(
65 | self.detail.getMask(img_id)))
66 | masks[img_id['image_id']] = mask
67 | tbar.set_description("Preprocessing masks {}".format(img_id['image_id']))
68 | torch.save(masks, mask_file)
69 | return masks
70 |
71 | def __getitem__(self, index):
72 | img_id = self.ids[index]
73 | path = img_id['file_name']
74 | iid = img_id['image_id']
75 | img = Image.open(os.path.join(self.detail.img_folder, path)).convert('RGB')
76 | if self.mode == 'test':
77 | if self.transform is not None:
78 | img = self.transform(img)
79 | return img, os.path.basename(path)
80 | # convert mask to 60 categories
81 | mask = self.masks[iid]
82 | # synchrosized transform
83 | if self.mode == 'train':
84 | img, mask = self._sync_transform(img, mask)
85 | elif self.mode == 'val':
86 | img, mask = self._val_sync_transform(img, mask)
87 | else:
88 | assert self.mode == 'testval'
89 | mask = self._mask_transform(mask)
90 | # general resize, normalize and toTensor
91 | if self.transform is not None:
92 | img = self.transform(img)
93 | if self.target_transform is not None:
94 | mask = self.target_transform(mask)
95 | return img, mask
96 |
97 | def _mask_transform(self, mask):
98 | target = np.array(mask).astype('int32') - 1
99 | return torch.from_numpy(target).long()
100 |
101 | def __len__(self):
102 | return len(self.ids)
103 |
104 | @property
105 | def pred_offset(self):
106 | return 1
107 |
--------------------------------------------------------------------------------
/encoding/functions/__init__.py:
--------------------------------------------------------------------------------
1 | """Encoding Autograd Fuctions"""
2 | from .encoding import *
3 | from .syncbn import *
4 | from .dist_syncbn import dist_syncbatchnorm
5 | from .customize import *
6 | from .rectify import *
7 |
--------------------------------------------------------------------------------
/encoding/functions/customize.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2018
5 | ##
6 | ## This source code is licensed under the MIT-style license found in the
7 | ## LICENSE file in the root directory of this source tree
8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 |
10 | """Customized functions"""
11 |
12 | import torch
13 | from torch.autograd import Variable, Function
14 | from .. import lib
15 |
16 | __all__ = ['NonMaxSuppression']
17 |
18 | def NonMaxSuppression(boxes, scores, threshold):
19 | r"""Non-Maximum Suppression
20 | The algorithm begins by storing the highest-scoring bounding
21 | box, and eliminating any box whose intersection-over-union (IoU)
22 | with it is too great. The procedure repeats on the surviving
23 | boxes, and so on until there are no boxes left.
24 | The stored boxes are returned.
25 |
26 | NB: The function returns a tuple (mask, indices), where
27 | indices index into the input boxes and are sorted
28 | according to score, from higest to lowest.
29 | indices[i][mask[i]] gives the indices of the surviving
30 | boxes from the ith batch, sorted by score.
31 |
32 | Args:
33 | - boxes :math:`(N, n_boxes, 4)`
34 | - scroes :math:`(N, n_boxes)`
35 | - threshold (float): IoU above which to eliminate boxes
36 |
37 | Outputs:
38 | - mask: :math:`(N, n_boxes)`
39 | - indicies: :math:`(N, n_boxes)`
40 |
41 | Examples::
42 |
43 | >>> boxes = torch.Tensor([[[10., 20., 20., 15.],
44 | >>> [24., 22., 50., 54.],
45 | >>> [10., 21., 20. 14.5]]])
46 | >>> scores = torch.abs(torch.randn([1, 3]))
47 | >>> mask, indices = NonMaxSuppression(boxes, scores, 0.7)
48 | >>> #indices are SORTED according to score.
49 | >>> surviving_box_indices = indices[mask]
50 | """
51 | if boxes.is_cuda:
52 | return lib.gpu.non_max_suppression(boxes, scores, threshold)
53 | else:
54 | return lib.cpu.non_max_suppression(boxes, scores, threshold)
55 |
--------------------------------------------------------------------------------
/encoding/functions/dist_syncbn.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2020
5 | ##
6 | ## LICENSE file in the root directory of this source tree
7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 |
9 | import torch
10 | from torch.autograd.function import Function
11 | from .. import lib
12 |
13 | __all__ = ['dist_syncbatchnorm']
14 |
15 | class dist_syncbatchnorm_(Function):
16 | @staticmethod
17 | def forward(ctx, x, gamma, beta, running_mean, running_var, eps, momentum, training, process_group):
18 | x = x.contiguous()
19 | ctx.training = training
20 | ctx.momentum = momentum
21 | ctx.eps = eps
22 | ctx.process_group = process_group
23 |
24 | if not ctx.training:
25 | _ex, _var = running_mean.contiguous(), running_var.contiguous()
26 | _exs = _var + _ex ** 2
27 | if x.is_cuda:
28 | y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
29 | else:
30 | y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
31 | ctx.save_for_backward(x, _ex, _exs, gamma, beta)
32 | return y
33 |
34 | size = x.numel() // x.size(1)
35 | if size == 1:
36 | raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))
37 |
38 | if x.is_cuda:
39 | _ex, _exs = lib.gpu.expectation_forward(x)
40 | else:
41 | raise NotImplemented
42 |
43 | count = torch.Tensor([1]).to(x.device)
44 | count_all_reduce = torch.distributed.all_reduce(count, group=process_group, async_op=True)
45 | _ex_all_reduce = torch.distributed.all_reduce(_ex, group=process_group, async_op=True)
46 | _exs_all_reduce = torch.distributed.all_reduce(_exs, group=process_group, async_op=True)
47 |
48 | count_all_reduce.wait()
49 | _ex_all_reduce.wait()
50 | _exs_all_reduce.wait()
51 |
52 | _ex = _ex / count
53 | _exs = _exs / count
54 |
55 | # Update running stats
56 | _var = _exs - _ex ** 2
57 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * _ex)
58 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * _var)
59 |
60 | # Mark in-place modified tensors
61 | ctx.mark_dirty(running_mean, running_var)
62 |
63 | # BN forward + activation
64 | if x.is_cuda:
65 | y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
66 | else:
67 | y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
68 |
69 | ctx.save_for_backward(x, _ex, _exs, gamma, beta)
70 | return y
71 |
72 | @staticmethod
73 | def backward(ctx, dz):
74 | x, _ex, _exs, gamma, beta = ctx.saved_tensors
75 | dz = dz.contiguous()
76 |
77 | # BN backward
78 | if dz.is_cuda:
79 | dx, _dex, _dexs, dgamma, dbeta = \
80 | lib.gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps)
81 | else:
82 | raise NotImplemented
83 |
84 | if ctx.training:
85 | process_group = ctx.process_group
86 | count = torch.Tensor([1]).to(x.device)
87 | count_all_reduce = torch.distributed.all_reduce(count, group=process_group, async_op=True)
88 | _dex_all_reduce = torch.distributed.all_reduce(_dex, group=process_group, async_op=True)
89 | _dexs_all_reduce = torch.distributed.all_reduce(_dexs, group=process_group, async_op=True)
90 |
91 | count_all_reduce.wait()
92 | _dex_all_reduce.wait()
93 | _dexs_all_reduce.wait()
94 |
95 | _dex = _dex / count
96 | _dexs = _dexs / count
97 |
98 | if x.is_cuda:
99 | dx_ = lib.gpu.expectation_backward(x, _dex, _dexs)
100 | else:
101 | raise NotImplemented
102 | dx = dx + dx_
103 |
104 | return dx, dgamma, dbeta, None, None, None, None, None, None
105 |
106 | dist_syncbatchnorm = dist_syncbatchnorm_.apply
107 |
--------------------------------------------------------------------------------
/encoding/functions/encoding.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2018
5 | ##
6 | ## This source code is licensed under the MIT-style license found in the
7 | ## LICENSE file in the root directory of this source tree
8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 |
10 | """Functions for Encoding Layer"""
11 | import torch
12 | from torch.autograd import Function, Variable
13 | import torch.nn.functional as F
14 | from .. import lib
15 |
16 | __all__ = ['aggregate', 'scaled_l2', 'pairwise_cosine']
17 |
18 | class _aggregate(Function):
19 | @staticmethod
20 | def forward(ctx, A, X, C):
21 | # A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
22 | ctx.save_for_backward(A, X, C)
23 | if A.is_cuda:
24 | E = lib.gpu.aggregate_forward(A, X, C)
25 | else:
26 | E = lib.cpu.aggregate_forward(A, X, C)
27 | return E
28 |
29 | @staticmethod
30 | def backward(ctx, gradE):
31 | A, X, C = ctx.saved_variables
32 | if A.is_cuda:
33 | gradA, gradX, gradC = lib.gpu.aggregate_backward(gradE, A, X, C)
34 | else:
35 | gradA, gradX, gradC = lib.cpu.aggregate_backward(gradE, A, X, C)
36 | return gradA, gradX, gradC
37 |
38 | def aggregate(A, X, C):
39 | r""" Aggregate operation, aggregate the residuals of inputs (:math:`X`) with repect
40 | to the codewords (:math:`C`) with assignment weights (:math:`A`).
41 |
42 | .. math::
43 |
44 | e_{k} = \sum_{i=1}^{N} a_{ik} (x_i - d_k)
45 |
46 | Shape:
47 | - Input: :math:`A\in\mathcal{R}^{B\times N\times K}`
48 | :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}`
49 | (where :math:`B` is batch, :math:`N` is total number of features,
50 | :math:`K` is number is codewords, :math:`D` is feature dimensions.)
51 | - Output: :math:`E\in\mathcal{R}^{B\times K\times D}`
52 |
53 | Examples:
54 | >>> B,N,K,D = 2,3,4,5
55 | >>> A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), requires_grad=True)
56 | >>> X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), requires_grad=True)
57 | >>> C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), requires_grad=True)
58 | >>> func = encoding.aggregate()
59 | >>> E = func(A, X, C)
60 | """
61 | return _aggregate.apply(A, X, C)
62 |
63 | class _scaled_l2(Function):
64 | @staticmethod
65 | def forward(ctx, X, C, S):
66 | if X.is_cuda:
67 | SL = lib.gpu.scaled_l2_forward(X, C, S)
68 | else:
69 | SL = lib.cpu.scaled_l2_forward(X, C, S)
70 | ctx.save_for_backward(X, C, S, SL)
71 | return SL
72 |
73 | @staticmethod
74 | def backward(ctx, gradSL):
75 | X, C, S, SL = ctx.saved_variables
76 | if X.is_cuda:
77 | gradX, gradC, gradS = lib.gpu.scaled_l2_backward(gradSL, X, C, S, SL)
78 | else:
79 | gradX, gradC, gradS = lib.cpu.scaled_l2_backward(gradSL, X, C, S, SL)
80 | return gradX, gradC, gradS
81 |
82 | def scaled_l2(X, C, S):
83 | r""" scaled_l2 distance
84 |
85 | .. math::
86 | sl_{ik} = s_k \|x_i-c_k\|^2
87 |
88 | Shape:
89 | - Input: :math:`X\in\mathcal{R}^{B\times N\times D}`
90 | :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K`
91 | (where :math:`B` is batch, :math:`N` is total number of features,
92 | :math:`K` is number is codewords, :math:`D` is feature dimensions.)
93 | - Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
94 | """
95 | return _scaled_l2.apply(X, C, S)
96 |
97 | # Experimental
98 | def pairwise_cosine(X, C, normalize=False):
99 | r"""Pairwise Cosine Similarity or Dot-product Similarity
100 | Shape:
101 | - Input: :math:`X\in\mathcal{R}^{B\times N\times D}`
102 | :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K`
103 | (where :math:`B` is batch, :math:`N` is total number of features,
104 | :math:`K` is number is codewords, :math:`D` is feature dimensions.)
105 | - Output: :math:`E\in\mathcal{R}^{B\times N\times K}`
106 | """
107 | if normalize:
108 | X = F.normalize(X, dim=2, eps=1e-8)
109 | C = F.normalize(C, dim=1, eps=1e-8)
110 | return torch.matmul(X, C.t())
111 |
--------------------------------------------------------------------------------
/encoding/functions/rectify.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2020
5 | ##
6 | ## LICENSE file in the root directory of this source tree
7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 |
9 | """Rectify function"""
10 | import torch
11 | from torch.autograd import Function
12 |
13 | from .. import lib
14 |
15 | __all__ = ['rectify']
16 |
17 | class _rectify(Function):
18 | @staticmethod
19 | def forward(ctx, y, x, kernel_size, stride, padding, dilation, average):
20 | ctx.save_for_backward(x)
21 | # assuming kernel_size is 3
22 | kernel_size = [k + 2 * (d - 1) for k,d in zip(kernel_size, dilation)]
23 | ctx.kernel_size = kernel_size
24 | ctx.stride = stride
25 | ctx.padding = padding
26 | ctx.dilation = dilation
27 | ctx.average = average
28 | if x.is_cuda:
29 | lib.gpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average)
30 | else:
31 | lib.cpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average)
32 | ctx.mark_dirty(y)
33 | return y
34 |
35 | @staticmethod
36 | def backward(ctx, grad_y):
37 | x, = ctx.saved_variables
38 | if x.is_cuda:
39 | lib.gpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride,
40 | ctx.padding, ctx.dilation, ctx.average)
41 | else:
42 | lib.cpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride,
43 | ctx.padding, ctx.dilation, ctx.average)
44 | ctx.mark_dirty(grad_y)
45 | return grad_y, None, None, None, None, None, None
46 |
47 | rectify = _rectify.apply
48 |
--------------------------------------------------------------------------------
/encoding/lib/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.cpp_extension import load
4 |
5 | cwd = os.path.dirname(os.path.realpath(__file__))
6 | cpu_path = os.path.join(cwd, 'cpu')
7 | gpu_path = os.path.join(cwd, 'gpu')
8 |
9 | cpu = load('enclib_cpu', [
10 | os.path.join(cpu_path, 'operator.cpp'),
11 | os.path.join(cpu_path, 'encoding_cpu.cpp'),
12 | os.path.join(cpu_path, 'syncbn_cpu.cpp'),
13 | os.path.join(cpu_path, 'roi_align_cpu.cpp'),
14 | os.path.join(cpu_path, 'nms_cpu.cpp'),
15 | os.path.join(cpu_path, 'rectify_cpu.cpp'),
16 | ], build_directory=cpu_path, verbose=False)
17 |
18 | if torch.cuda.is_available():
19 | gpu = load('enclib_gpu', [
20 | os.path.join(gpu_path, 'operator.cpp'),
21 | os.path.join(gpu_path, 'activation_kernel.cu'),
22 | os.path.join(gpu_path, 'encoding_kernel.cu'),
23 | os.path.join(gpu_path, 'syncbn_kernel.cu'),
24 | os.path.join(gpu_path, 'roi_align_kernel.cu'),
25 | os.path.join(gpu_path, 'nms_kernel.cu'),
26 | os.path.join(gpu_path, 'rectify_cuda.cu'),
27 | os.path.join(gpu_path, 'lib_ssd.cu'),
28 | ], extra_cuda_cflags=["--expt-extended-lambda"],
29 | build_directory=gpu_path, verbose=False)
30 |
--------------------------------------------------------------------------------
/encoding/lib/cpu/encoding_cpu.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | at::Tensor Aggregate_Forward_CPU(
5 | const at::Tensor A,
6 | const at::Tensor X,
7 | const at::Tensor C) {
8 | auto E = (A.unsqueeze(3) * (X.unsqueeze(2).expand({X.size(0), X.size(1),
9 | C.size(0), C.size(1)}) - C.unsqueeze(0).unsqueeze(0))).sum(1);
10 | return E;
11 | }
12 |
13 | std::vector Aggregate_Backward_CPU(
14 | const at::Tensor GE,
15 | const at::Tensor A,
16 | const at::Tensor X,
17 | const at::Tensor C) {
18 | auto gradA = (GE.unsqueeze(1) * (X.unsqueeze(2).expand({X.size(0), X.size(1),
19 | C.size(0), C.size(1)}) - C.unsqueeze(0).unsqueeze(0))).sum(3);
20 | auto gradX = at::bmm(A, GE);
21 | auto gradC = (-GE * A.sum(1).unsqueeze(2)).sum(0);
22 | return {gradA, gradX, gradC};
23 | }
24 |
25 | at::Tensor ScaledL2_Forward_CPU(
26 | const at::Tensor X,
27 | const at::Tensor C,
28 | const at::Tensor S) {
29 | auto SL = S.view({1, 1, C.size(0)}) * (X.unsqueeze(2).expand({X.size(0), X.size(1),
30 | C.size(0), C.size(1)}) - C.unsqueeze(0).unsqueeze(0)).pow(2).sum(3);
31 | return SL;
32 | }
33 |
34 | std::vector ScaledL2_Backward_CPU(
35 | const at::Tensor GSL,
36 | const at::Tensor X,
37 | const at::Tensor C,
38 | const at::Tensor S,
39 | const at::Tensor SL) {
40 | auto tmp = (2 * GSL * S.view({1, 1, C.size(0)})).unsqueeze(3) *
41 | (X.unsqueeze(2).expand({X.size(0), X.size(1), C.size(0), C.size(1)}) -
42 | C.unsqueeze(0).unsqueeze(0));
43 | auto GX = tmp.sum(2);
44 | auto GC = tmp.sum(0).sum(0);
45 | auto GS = (GSL * (SL / S.view({1, 1, C.size(0)}))).sum(0).sum(0);
46 | return {GX, GC, GS};
47 | }
48 |
--------------------------------------------------------------------------------
/encoding/lib/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #ifdef _OPENMP
6 | #include
7 | #endif
8 |
9 | template
10 | inline scalar IoU(scalar* rawInput, int idx_x, int idx_y) {
11 | scalar lr = std::fmin(rawInput[idx_x*4] + rawInput[idx_x*4+2],
12 | rawInput[idx_y*4] + rawInput[idx_y*4+2]);
13 | scalar rl = std::fmax(rawInput[idx_x*4], rawInput[idx_y*4]);
14 | scalar tb = std::fmin(rawInput[idx_x*4+1] + rawInput[idx_x*4+3],
15 | rawInput[idx_y*4+1] + rawInput[idx_y*4+3]);
16 | scalar bt = std::fmax(rawInput[idx_x*4+1], rawInput[idx_y*4+1]);
17 | scalar inter = std::fmax(0, lr-rl)*std::fmax(0, tb-bt);
18 | scalar uni = (rawInput[idx_x*4+2]*rawInput[idx_x*4+3]
19 | + rawInput[idx_y*4+2]*rawInput[idx_y*4+3] - inter);
20 | return inter/uni;
21 | }
22 |
23 |
24 | std::vector Non_Max_Suppression_CPU(
25 | const at::Tensor& input,
26 | const at::Tensor& scores,
27 | double thresh) {
28 | AT_ASSERT(input.ndimension() == 3);
29 | AT_ASSERT(scores.ndimension() == 2);
30 | AT_ASSERT(input.size(0) == scores.size(0));
31 | AT_ASSERT(input.size(1) == scores.size(1));
32 | AT_ASSERT(input.size(2) == 4);
33 | AT_ASSERT(input.is_contiguous());
34 | AT_ASSERT(scores.is_contiguous());
35 | AT_ASSERT(input.type().scalarType() == at::kFloat || input.type().scalarType() == at::kDouble);
36 | AT_ASSERT(scores.type().scalarType() == at::kFloat || scores.type().scalarType() == at::kDouble);
37 | AT_ASSERT(input.is_contiguous());
38 | AT_ASSERT(scores.is_contiguous());
39 |
40 |
41 | at::Tensor sorted_inds = std::get<1>(scores.sort(-1, true));
42 | //at::Tensor rawIdx = std::get<1>(scores.sort(-1, true));
43 |
44 | auto num_boxes = input.size(1);
45 | auto batch_size = input.size(0);
46 | auto mask = torch::zeros({batch_size, num_boxes}, input.type().toScalarType(at::kByte));
47 | //auto mask = input.type().toScalarType(at::kByte).tensor({batch_size, num_boxes});
48 | mask.fill_(1);
49 | auto *rawMask = mask.data();
50 | auto *rawIdx = sorted_inds.data();
51 |
52 | if (input.type().scalarType() == at::kFloat)
53 | {
54 | auto *rawInput = input.data();
55 |
56 | for(int batch=0; batch thresh)
67 | rawMask[i] = 0;
68 | }
69 | ++pos;
70 | while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0))
71 | ++pos;
72 | }
73 | }
74 | }
75 | else
76 | {
77 | auto *rawInput = input.data();
78 | for(int batch=0; batch thresh)
89 | rawMask[i] = 0;
90 | }
91 | ++pos;
92 | while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0))
93 | ++pos;
94 | }
95 | }
96 | }
97 | //see ./cuda/NonMaxSuppression.cu for comment about return value.
98 | return {mask, sorted_inds};
99 | }
100 |
--------------------------------------------------------------------------------
/encoding/lib/cpu/operator.cpp:
--------------------------------------------------------------------------------
1 | #include "operator.h"
2 |
3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
4 | m.def("roi_align_forward", &ROIAlign_Forward_CPU, "ROI Align forward (CPU)");
5 | m.def("roi_align_backward", &ROIAlign_Backward_CPU, "ROI Align backward (CPU)");
6 | m.def("aggregate_forward", &Aggregate_Forward_CPU, "Aggregate forward (CPU)");
7 | m.def("aggregate_backward", &Aggregate_Backward_CPU, "Aggregate backward (CPU)");
8 | m.def("scaled_l2_forward", &ScaledL2_Forward_CPU, "ScaledL2 forward (CPU)");
9 | m.def("scaled_l2_backward", &ScaledL2_Backward_CPU, "ScaledL2 backward (CPU)");
10 | m.def("batchnorm_forward", &BatchNorm_Forward_CPU, "BatchNorm forward (CPU)");
11 | m.def("batchnorm_backward", &BatchNorm_Backward_CPU, "BatchNorm backward (CPU)");
12 | m.def("sumsquare_forward", &Sum_Square_Forward_CPU, "SumSqu forward (CPU)");
13 | m.def("sumsquare_backward", &Sum_Square_Backward_CPU, "SumSqu backward (CPU)");
14 | m.def("non_max_suppression", &Non_Max_Suppression_CPU, "NMS (CPU)");
15 | m.def("conv_rectify", &CONV_RECTIFY_CPU, "Convolution Rectifier (CPU)");
16 | // Apply fused color jitter
17 | m.def("apply_transform", &apply_transform, "apply_transform");
18 | }
19 |
--------------------------------------------------------------------------------
/encoding/lib/cpu/operator.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include
6 | #include
7 |
8 | at::Tensor ROIAlign_Forward_CPU(
9 | const at::Tensor& input,
10 | const at::Tensor& bottom_rois,
11 | int64_t pooled_height,
12 | int64_t pooled_width,
13 | double spatial_scale,
14 | int64_t sampling_ratio);
15 |
16 | at::Tensor ROIAlign_Backward_CPU(
17 | const at::Tensor& bottom_rois,
18 | const at::Tensor& grad_output,
19 | int64_t b_size,
20 | int64_t channels,
21 | int64_t height,
22 | int64_t width,
23 | int64_t pooled_height,
24 | int64_t pooled_width,
25 | double spatial_scale,
26 | int64_t sampling_ratio);
27 |
28 | at::Tensor Aggregate_Forward_CPU(
29 | const at::Tensor A,
30 | const at::Tensor X,
31 | const at::Tensor C);
32 |
33 | std::vector Aggregate_Backward_CPU(
34 | const at::Tensor GE,
35 | const at::Tensor A,
36 | const at::Tensor X,
37 | const at::Tensor C);
38 |
39 | at::Tensor ScaledL2_Forward_CPU(
40 | const at::Tensor X_,
41 | const at::Tensor C_,
42 | const at::Tensor S_);
43 |
44 | std::vector ScaledL2_Backward_CPU(
45 | const at::Tensor GSL_,
46 | const at::Tensor X_,
47 | const at::Tensor C_,
48 | const at::Tensor S_,
49 | const at::Tensor SL_);
50 |
51 | at::Tensor BatchNorm_Forward_CPU(
52 | const at::Tensor input_,
53 | const at::Tensor mean_,
54 | const at::Tensor std_,
55 | const at::Tensor gamma_,
56 | const at::Tensor beta_);
57 |
58 | std::vector BatchNorm_Backward_CPU(
59 | const at::Tensor gradoutput_,
60 | const at::Tensor input_,
61 | const at::Tensor mean_,
62 | const at::Tensor std_,
63 | const at::Tensor gamma_,
64 | const at::Tensor beta_,
65 | bool train);
66 |
67 | std::vector Sum_Square_Forward_CPU(
68 | const at::Tensor input_);
69 |
70 | at::Tensor Sum_Square_Backward_CPU(
71 | const at::Tensor input_,
72 | const at::Tensor gradSum_,
73 | const at::Tensor gradSquare_);
74 |
75 | std::vector Non_Max_Suppression_CPU(
76 | const at::Tensor& input,
77 | const at::Tensor& scores,
78 | double thresh);
79 |
80 | void CONV_RECTIFY_CPU(
81 | at::Tensor& output,
82 | const at::Tensor& input,
83 | at::IntArrayRef kernel_size,
84 | at::IntArrayRef stride,
85 | at::IntArrayRef padding,
86 | at::IntArrayRef dilation,
87 | bool avg_mode);
88 |
89 | // Fused color jitter application
90 | // ctm [4,4], img [H, W, C]
91 | py::array_t apply_transform(int H, int W, int C, py::array_t img, py::array_t ctm) {
92 | auto img_buf = img.request();
93 | auto ctm_buf = ctm.request();
94 |
95 | // printf("H: %d, W: %d, C: %d\n", H, W, C);
96 | py::array_t result{(unsigned long)img_buf.size};
97 | auto res_buf = result.request();
98 |
99 | float *img_ptr = (float *)img_buf.ptr;
100 | float *ctm_ptr = (float *)ctm_buf.ptr;
101 | float *res_ptr = (float *)res_buf.ptr;
102 |
103 | for (int h = 0; h < H; ++h) {
104 | for (int w = 0; w < W; ++w) {
105 | float *ptr = &img_ptr[h * W * C + w * C];
106 | float *out_ptr = &res_ptr[h * W * C + w * C];
107 | // manually unroll over C
108 | out_ptr[0] = ctm_ptr[0] * ptr[0] + ctm_ptr[1] * ptr[1] + ctm_ptr[2] * ptr[2] + ctm_ptr[3];
109 | out_ptr[1] = ctm_ptr[4] * ptr[0] + ctm_ptr[5] * ptr[1] + ctm_ptr[6] * ptr[2] + ctm_ptr[7];
110 | out_ptr[2] = ctm_ptr[8] * ptr[0] + ctm_ptr[9] * ptr[1] + ctm_ptr[10] * ptr[2] + ctm_ptr[11];
111 | }
112 | }
113 |
114 | result.resize({H, W, C});
115 |
116 | return result;
117 | }
118 |
--------------------------------------------------------------------------------
/encoding/lib/cpu/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CppExtension
3 |
4 | setup(
5 | name='enclib_cpu',
6 | ext_modules=[
7 | CppExtension('enclib_cpu', [
8 | 'operator.cpp',
9 | 'roi_align_cpu.cpp',
10 | 'encoding_cpu.cpp',
11 | 'syncbn_cpu.cpp',
12 | 'nms_cpu.cpp',
13 | ]),
14 | ],
15 | cmdclass={
16 | 'build_ext': BuildExtension
17 | })
18 |
--------------------------------------------------------------------------------
/encoding/lib/cpu/syncbn_cpu.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
6 | if (x.ndimension() == 2) {
7 | return v;
8 | } else {
9 | std::vector broadcast_size = {1, -1};
10 | for (int64_t i = 2; i < x.ndimension(); ++i)
11 | broadcast_size.push_back(1);
12 |
13 | return v.view(broadcast_size);
14 | }
15 | }
16 |
17 | at::Tensor BatchNorm_Forward_CPU(
18 | const at::Tensor input,
19 | const at::Tensor mean,
20 | const at::Tensor std,
21 | const at::Tensor gamma,
22 | const at::Tensor beta) {
23 | auto output = (input - broadcast_to(mean, input)) / broadcast_to(std, input);
24 | output = output * broadcast_to(gamma, input) + broadcast_to(beta, input);
25 | return output;
26 | }
27 |
28 | // Not implementing CPU backward for now
29 | std::vector BatchNorm_Backward_CPU(
30 | const at::Tensor gradoutput,
31 | const at::Tensor input,
32 | const at::Tensor mean,
33 | const at::Tensor std,
34 | const at::Tensor gamma,
35 | const at::Tensor beta,
36 | bool train) {
37 | /* outputs*/
38 | at::Tensor gradinput = at::zeros_like(input);
39 | at::Tensor gradgamma = at::zeros_like(gamma);
40 | at::Tensor gradbeta = at::zeros_like(beta);
41 | at::Tensor gradMean = at::zeros_like(mean);
42 | at::Tensor gradStd = at::zeros_like(std);
43 | return {gradinput, gradMean, gradStd, gradgamma, gradbeta};
44 | }
45 |
46 | std::vector Sum_Square_Forward_CPU(
47 | const at::Tensor input) {
48 | /* outputs */
49 | at::Tensor sum = torch::zeros({input.size(1)}, input.options());
50 | at::Tensor square = torch::zeros({input.size(1)}, input.options());
51 | return {sum, square};
52 | }
53 |
54 | at::Tensor Sum_Square_Backward_CPU(
55 | const at::Tensor input,
56 | const at::Tensor gradSum,
57 | const at::Tensor gradSquare) {
58 | /* outputs */
59 | at::Tensor gradInput = at::zeros_like(input);
60 | return gradInput;
61 | }
62 |
--------------------------------------------------------------------------------
/encoding/lib/gpu/activation_kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include
6 | #include
7 | #include
8 | #include "common.h"
9 |
10 |
11 | namespace {
12 |
13 | template
14 | inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
15 | // Create thrust pointers
16 | thrust::device_ptr th_z = thrust::device_pointer_cast(z);
17 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz);
18 |
19 | thrust::transform_if(th_dz, th_dz + count, th_z, th_dz,
20 | [slope] __device__ (const T& dz) { return dz * slope; },
21 | [] __device__ (const T& z) { return z < 0; });
22 | thrust::transform_if(th_z, th_z + count, th_z,
23 | [slope] __device__ (const T& z) { return z / slope; },
24 | [] __device__ (const T& z) { return z < 0; });
25 | }
26 |
27 | }
28 |
29 | void LeakyRelu_Forward_CUDA(at::Tensor z, float slope) {
30 | at::leaky_relu_(z, slope);
31 | }
32 |
33 | void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope) {
34 | int64_t count = z.numel();
35 |
36 | AT_DISPATCH_FLOATING_TYPES(z.type(), "LeakyRelu_Backward_CUDA", ([&] {
37 | leaky_relu_backward_impl(z.data(), dz.data(), slope, count);
38 | }));
39 | /*
40 | // unstable after scaling
41 | at::leaky_relu_(z, 1.0 / slope);
42 | at::leaky_relu_backward(dz, z, slope);
43 | */
44 | }
45 |
--------------------------------------------------------------------------------
/encoding/lib/gpu/device_tensor.h:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | template
4 | struct DeviceTensor {
5 | public:
6 | inline __device__ __host__ DeviceTensor(DType *p, const int *size)
7 | : dptr_(p) {
8 | for (int i = 0; i < Dim; ++i) {
9 | size_[i] = size ? size[i] : 0;
10 | }
11 | }
12 |
13 | inline __device__ __host__ unsigned getSize(const int i) const {
14 | assert(i < Dim);
15 | return size_[i];
16 | }
17 |
18 | inline __device__ __host__ int numElements() const {
19 | int n = 1;
20 | for (int i = 0; i < Dim; ++i) {
21 | n *= size_[i];
22 | }
23 | return n;
24 | }
25 |
26 | inline __device__ __host__ DeviceTensor select(const size_t x) const {
27 | assert(Dim > 1);
28 | int offset = x;
29 | for (int i = 1; i < Dim; ++i) {
30 | offset *= size_[i];
31 | }
32 | DeviceTensor tensor(dptr_ + offset, nullptr);
33 | for (int i = 0; i < Dim - 1; ++i) {
34 | tensor.size_[i] = this->size_[i+1];
35 | }
36 | return tensor;
37 | }
38 |
39 | inline __device__ __host__ DeviceTensor operator[](const size_t x) const {
40 | assert(Dim > 1);
41 | int offset = x;
42 | for (int i = 1; i < Dim; ++i) {
43 | offset *= size_[i];
44 | }
45 | DeviceTensor tensor(dptr_ + offset, nullptr);
46 | for (int i = 0; i < Dim - 1; ++i) {
47 | tensor.size_[i] = this->size_[i+1];
48 | }
49 | return tensor;
50 | }
51 |
52 | inline __device__ __host__ size_t InnerSize() const {
53 | assert(Dim >= 3);
54 | size_t sz = 1;
55 | for (size_t i = 2; i < Dim; ++i) {
56 | sz *= size_[i];
57 | }
58 | return sz;
59 | }
60 |
61 | inline __device__ __host__ size_t ChannelCount() const {
62 | assert(Dim >= 3);
63 | return size_[1];
64 | }
65 |
66 | inline __device__ __host__ DType* data_ptr() const {
67 | return dptr_;
68 | }
69 |
70 | DType *dptr_;
71 | int size_[Dim];
72 | };
73 |
74 | template
75 | struct DeviceTensor {
76 | inline __device__ __host__ DeviceTensor(DType *p, const int *size)
77 | : dptr_(p) {
78 | size_[0] = size ? size[0] : 0;
79 | }
80 |
81 | inline __device__ __host__ unsigned getSize(const int i) const {
82 | assert(i == 0);
83 | return size_[0];
84 | }
85 |
86 | inline __device__ __host__ int numElements() const {
87 | return size_[0];
88 | }
89 |
90 | inline __device__ __host__ DType &operator[](const size_t x) const {
91 | return *(dptr_ + x);
92 | }
93 |
94 | inline __device__ __host__ DType* data_ptr() const {
95 | return dptr_;
96 | }
97 |
98 | DType *dptr_;
99 | int size_[1];
100 | };
101 |
102 | template
103 | static DeviceTensor devicetensor(const at::Tensor &blob) {
104 | DType *data = blob.data();
105 | DeviceTensor tensor(data, nullptr);
106 | for (int i = 0; i < Dim; ++i) {
107 | tensor.size_[i] = blob.size(i);
108 | }
109 | return tensor;
110 | }
111 |
--------------------------------------------------------------------------------
/encoding/lib/gpu/operator.cpp:
--------------------------------------------------------------------------------
1 | #include "operator.h"
2 |
3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
4 | m.def("roi_align_forward", &ROIAlign_Forward_CUDA, "ROI Align forward (CUDA)");
5 | m.def("roi_align_backward", &ROIAlign_Backward_CUDA, "ROI Align backward (CUDA)");
6 | m.def("non_max_suppression", &Non_Max_Suppression_CUDA, "NMS (CUDA)");
7 | m.def("aggregate_forward", &Aggregate_Forward_CUDA, "Aggregate forward (CUDA)");
8 | m.def("aggregate_backward", &Aggregate_Backward_CUDA, "Aggregate backward (CUDA)");
9 | m.def("scaled_l2_forward", &ScaledL2_Forward_CUDA, "ScaledL2 forward (CUDA)");
10 | m.def("scaled_l2_backward", &ScaledL2_Backward_CUDA, "ScaledL2 backward (CUDA)");
11 | m.def("batchnorm_forward", &BatchNorm_Forward_CUDA, "BatchNorm forward (CUDA)");
12 | m.def("batchnorm_inp_forward", &BatchNorm_Forward_Inp_CUDA, "BatchNorm forward (CUDA)");
13 | m.def("batchnorm_backward", &BatchNorm_Backward_CUDA, "BatchNorm backward (CUDA)");
14 | m.def("batchnorm_inp_backward", &BatchNorm_Inp_Backward_CUDA, "BatchNorm backward (CUDA)");
15 | m.def("expectation_forward", &Expectation_Forward_CUDA, "Expectation forward (CUDA)");
16 | m.def("expectation_backward", &Expectation_Backward_CUDA, "Expectation backward (CUDA)");
17 | m.def("expectation_inp_backward", &Expectation_Inp_Backward_CUDA,
18 | "Inplace Expectation backward (CUDA)");
19 | m.def("leaky_relu_forward", &LeakyRelu_Forward_CUDA, "Learky ReLU forward (CUDA)");
20 | m.def("leaky_relu_backward", &LeakyRelu_Backward_CUDA, "Learky ReLU backward (CUDA)");
21 | m.def("conv_rectify", &CONV_RECTIFY_CUDA, "Convolution Rectifier (CUDA)");
22 | // batched box encoder
23 | m.def("box_encoder", &box_encoder, "box_encoder");
24 | m.def("random_horiz_flip", &random_horiz_flip, "random_horiz_flip");
25 | }
26 |
--------------------------------------------------------------------------------
/encoding/lib/gpu/operator.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | std::vector box_encoder(
6 | const int N_img,
7 | const at::Tensor& bbox_input,
8 | const at::Tensor& bbox_offsets,
9 | const at::Tensor& labels_input,
10 | const at::Tensor& dbox,
11 | const float criteria = 0.5);
12 |
13 | std::vector random_horiz_flip(
14 | at::Tensor& img,
15 | at::Tensor& bboxes,
16 | const at::Tensor& bbox_offsets,
17 | const float p,
18 | const bool nhwc);
19 |
20 | at::Tensor ROIAlign_Forward_CUDA(
21 | const at::Tensor input,
22 | const at::Tensor rois,
23 | int64_t pooled_height,
24 | int64_t pooled_width,
25 | double spatial_scale,
26 | int64_t sample_ratio);
27 |
28 | at::Tensor ROIAlign_Backward_CUDA(
29 | const at::Tensor rois,
30 | const at::Tensor grad_output,
31 | int64_t b_size,
32 | int64_t channels,
33 | int64_t height,
34 | int64_t width,
35 | int64_t pooled_height,
36 | int64_t pooled_width,
37 | double spatial_scale,
38 | int64_t sampling_ratio);
39 |
40 | std::vector Non_Max_Suppression_CUDA(
41 | const at::Tensor& input,
42 | const at::Tensor& scores,
43 | double thresh);
44 |
45 | at::Tensor Aggregate_Forward_CUDA(
46 | const at::Tensor A_,
47 | const at::Tensor X_,
48 | const at::Tensor C_);
49 |
50 | std::vector Aggregate_Backward_CUDA(
51 | const at::Tensor GE_,
52 | const at::Tensor A_,
53 | const at::Tensor X_,
54 | const at::Tensor C_);
55 |
56 | at::Tensor ScaledL2_Forward_CUDA(
57 | const at::Tensor X_,
58 | const at::Tensor C_,
59 | const at::Tensor S_);
60 |
61 | std::vector ScaledL2_Backward_CUDA(
62 | const at::Tensor GSL_,
63 | const at::Tensor X_,
64 | const at::Tensor C_,
65 | const at::Tensor S_,
66 | const at::Tensor SL_);
67 |
68 | at::Tensor BatchNorm_Forward_CUDA(
69 | const at::Tensor input_,
70 | const at::Tensor mean_,
71 | const at::Tensor std_,
72 | const at::Tensor gamma_,
73 | const at::Tensor beta_,
74 | float eps);
75 |
76 | at::Tensor BatchNorm_Forward_Inp_CUDA(
77 | const at::Tensor input_,
78 | const at::Tensor ex_,
79 | const at::Tensor exs_,
80 | const at::Tensor gamma_,
81 | const at::Tensor beta_,
82 | float eps);
83 |
84 | std::vector BatchNorm_Backward_CUDA(
85 | const at::Tensor gradoutput_,
86 | const at::Tensor input_,
87 | const at::Tensor ex_,
88 | const at::Tensor exs_,
89 | const at::Tensor gamma_,
90 | const at::Tensor beta_,
91 | float eps);
92 |
93 | std::vector BatchNorm_Inp_Backward_CUDA(
94 | const at::Tensor gradoutput_,
95 | const at::Tensor output_,
96 | const at::Tensor ex_,
97 | const at::Tensor exs_,
98 | const at::Tensor gamma_,
99 | const at::Tensor beta_,
100 | float eps);
101 |
102 | std::vector Expectation_Forward_CUDA(
103 | const at::Tensor input_);
104 |
105 | at::Tensor Expectation_Backward_CUDA(
106 | const at::Tensor input_,
107 | const at::Tensor gradEx_,
108 | const at::Tensor gradExs_);
109 |
110 | at::Tensor Expectation_Inp_Backward_CUDA(
111 | const at::Tensor gradInput_,
112 | const at::Tensor output_,
113 | const at::Tensor gradEx_,
114 | const at::Tensor gradExs_,
115 | const at::Tensor ex_,
116 | const at::Tensor exs_,
117 | const at::Tensor gamma_,
118 | const at::Tensor beta_,
119 | float eps);
120 |
121 | void LeakyRelu_Forward_CUDA(at::Tensor z, float slope);
122 |
123 | void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope);
124 |
125 | void CONV_RECTIFY_CUDA(
126 | at::Tensor& output,
127 | const at::Tensor& input,
128 | at::IntArrayRef kernel_size,
129 | at::IntArrayRef stride,
130 | at::IntArrayRef padding,
131 | at::IntArrayRef dilation,
132 | bool avg_mode);
133 |
--------------------------------------------------------------------------------
/encoding/lib/gpu/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
3 |
4 | setup(
5 | name='enclib_gpu',
6 | ext_modules=[
7 | CUDAExtension('enclib_gpu', [
8 | 'operator.cpp',
9 | 'activation_kernel.cu',
10 | 'encoding_kernel.cu',
11 | 'syncbn_kernel.cu',
12 | 'roi_align_kernel.cu',
13 | 'nms_kernel.cu',
14 | 'rectify.cu',
15 | ]),
16 | ],
17 | cmdclass={
18 | 'build_ext': BuildExtension
19 | })
20 |
--------------------------------------------------------------------------------
/encoding/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .model_zoo import get_model
2 | from .model_zoo import model_list
3 | from .model_store import get_model_file, pretrained_model_list
4 |
5 | from .sseg import get_segmentation_model, MultiEvalModule
6 |
--------------------------------------------------------------------------------
/encoding/models/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | from .resnest import *
3 | from .resnext import *
4 | from .resnet_variants import *
5 | from .wideresnet import *
6 | from .xception import *
7 |
--------------------------------------------------------------------------------
/encoding/models/backbone/resnest.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2020
5 | ##
6 | ## LICENSE file in the root directory of this source tree
7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 | """ResNeSt models"""
9 |
10 | import torch
11 | from .resnet import ResNet, Bottleneck
12 | from ..model_store import get_model_file
13 |
14 | __all__ = ['resnest50', 'resnest101', 'resnest200', 'resnest269']
15 |
16 | _url_format = 'https://hangzh.s3.amazonaws.com/encoding/models/{}-{}.pth'
17 |
18 |
19 | def resnest50(pretrained=False, root='~/.encoding/models', **kwargs):
20 | model = ResNet(Bottleneck, [3, 4, 6, 3],
21 | radix=2, groups=1, bottleneck_width=64,
22 | deep_stem=True, stem_width=32, avg_down=True,
23 | avd=True, avd_first=False, **kwargs)
24 | if pretrained:
25 | model.load_state_dict(torch.load(
26 | get_model_file('resnest50', root=root)), strict=True)
27 | return model
28 |
29 | def resnest101(pretrained=False, root='~/.encoding/models', **kwargs):
30 | model = ResNet(Bottleneck, [3, 4, 23, 3],
31 | radix=2, groups=1, bottleneck_width=64,
32 | deep_stem=True, stem_width=64, avg_down=True,
33 | avd=True, avd_first=False, **kwargs)
34 | if pretrained:
35 | model.load_state_dict(torch.load(
36 | get_model_file('resnest101', root=root)), strict=True)
37 | return model
38 |
39 | def resnest200(pretrained=False, root='~/.encoding/models', **kwargs):
40 | model = ResNet(Bottleneck, [3, 24, 36, 3],
41 | radix=2, groups=1, bottleneck_width=64,
42 | deep_stem=True, stem_width=64, avg_down=True,
43 | avd=True, avd_first=False, **kwargs)
44 | if pretrained:
45 | model.load_state_dict(torch.load(
46 | get_model_file('resnest200', root=root)), strict=False)
47 | return model
48 |
49 | def resnest269(pretrained=False, root='~/.encoding/models', **kwargs):
50 | model = ResNet(Bottleneck, [3, 30, 48, 8],
51 | radix=2, groups=1, bottleneck_width=64,
52 | deep_stem=True, stem_width=64, avg_down=True,
53 | avd=True, avd_first=False, **kwargs)
54 | if pretrained:
55 | model.load_state_dict(torch.load(
56 | get_model_file('resnest269', root=root)), strict=True)
57 | return model
58 |
59 | def resnest50_fast(pretrained=False, root='~/.encoding/models', **kwargs):
60 | model = ResNet(Bottleneck, [3, 4, 6, 3],
61 | radix=2, groups=1, bottleneck_width=64,
62 | deep_stem=True, stem_width=32, avg_down=True,
63 | avd=True, avd_first=True, **kwargs)
64 | if pretrained:
65 | model.load_state_dict(torch.load(
66 | get_model_file('resnest50fast', root=root)), strict=True)
67 | return model
68 |
69 | def resnest101_fast(pretrained=False, root='~/.encoding/models', **kwargs):
70 | model = ResNet(Bottleneck, [3, 4, 23, 3],
71 | radix=2, groups=1, bottleneck_width=64,
72 | deep_stem=True, stem_width=64, avg_down=True,
73 | avd=True, avd_first=True, **kwargs)
74 | if pretrained:
75 | model.load_state_dict(torch.load(
76 | get_model_file('resnest101fast', root=root)), strict=True)
77 | return model
78 |
--------------------------------------------------------------------------------
/encoding/models/backbone/resnet_variants.py:
--------------------------------------------------------------------------------
1 | """ResNet variants"""
2 |
3 | import torch
4 | from .resnet import ResNet, Bottleneck
5 | from ..model_store import get_model_file
6 |
7 | __all__ = ['resnet50s', 'resnet101s', 'resnet152s',
8 | 'resnet50d']
9 |
10 | # pspnet version of ResNet
11 | def resnet50s(pretrained=False, root='~/.encoding/models', **kwargs):
12 | """Constructs a ResNetS-50 model as in PSPNet.
13 |
14 | Args:
15 | pretrained (bool): If True, returns a model pre-trained on ImageNet
16 | """
17 | kwargs['deep_stem'] = True
18 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
19 | if pretrained:
20 | model.load_state_dict(torch.load(
21 | get_model_file('resnet50s', root=root)), strict=False)
22 | return model
23 |
24 | def resnet101s(pretrained=False, root='~/.encoding/models', **kwargs):
25 | """Constructs a ResNetS-101 model as in PSPNet.
26 |
27 | Args:
28 | pretrained (bool): If True, returns a model pre-trained on ImageNet
29 | """
30 | kwargs['deep_stem'] = True
31 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
32 | if pretrained:
33 | model.load_state_dict(torch.load(
34 | get_model_file('resnet101s', root=root)), strict=False)
35 | return model
36 |
37 | def resnet152s(pretrained=False, root='~/.encoding/models', **kwargs):
38 | """Constructs a ResNetS-152 model as in PSPNet.
39 |
40 | Args:
41 | pretrained (bool): If True, returns a model pre-trained on ImageNet
42 | """
43 | kwargs['deep_stem'] = True
44 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
45 | if pretrained:
46 | model.load_state_dict(torch.load(
47 | get_model_file('resnet152s', root=root)), strict=False)
48 | return model
49 |
50 | # ResNet-D
51 | def resnet50d(pretrained=False, root='~/.encoding/models', **kwargs):
52 | model = ResNet(Bottleneck, [3, 4, 6, 3],
53 | deep_stem=True, stem_width=32,
54 | avg_down=True, **kwargs)
55 | if pretrained:
56 | model.load_state_dict(torch.load(
57 | get_model_file('resnet50d', root=root)), strict=False)
58 | return model
59 |
--------------------------------------------------------------------------------
/encoding/models/backbone/resnext.py:
--------------------------------------------------------------------------------
1 | """ResNeXt models"""
2 |
3 | from .resnet import ResNet, Bottleneck
4 | from ..model_store import get_model_file
5 |
6 | __all__ = ['resnext50_32x4d', 'resnext101_32x8d']
7 |
8 | def resnext50_32x4d(pretrained=False, root='~/.encoding/models', **kwargs):
9 | r"""ResNeXt-50 32x4d model from
10 | `"Aggregated Residual Transformation for Deep Neural Networks" `_
11 |
12 | Args:
13 | pretrained (bool): If True, returns a model pre-trained on ImageNet
14 | progress (bool): If True, displays a progress bar of the download to stderr
15 | """
16 | kwargs['groups'] = 32
17 | kwargs['bottleneck_width'] = 4
18 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
19 | if pretrained:
20 | model.load_state_dict(torch.load(
21 | get_model_file('resnext50_32x4d', root=root)), strict=False)
22 | return model
23 |
24 | def resnext101_32x8d(pretrained=False, root='~/.encoding/models', **kwargs):
25 | r"""ResNeXt-101 32x8d model from
26 | `"Aggregated Residual Transformation for Deep Neural Networks" `_
27 |
28 | Args:
29 | pretrained (bool): If True, returns a model pre-trained on ImageNet
30 | progress (bool): If True, displays a progress bar of the download to stderr
31 | """
32 | kwargs['groups'] = 32
33 | kwargs['bottleneck_width'] = 8
34 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
35 | if pretrained:
36 | model.load_state_dict(torch.load(
37 | get_model_file('resnext101_32x8d', root=root)), strict=False)
38 | return model
39 |
40 |
--------------------------------------------------------------------------------
/encoding/models/deepten.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | import torch
12 | import torch.nn as nn
13 |
14 | from ..nn import Encoding, View, Normalize
15 | from .backbone import resnet50s, resnet101s, resnet152s
16 |
17 | __all__ = ['DeepTen', 'get_deepten', 'get_deepten_resnet50_minc']
18 |
19 | class DeepTen(nn.Module):
20 | def __init__(self, nclass, backbone):
21 | super(DeepTen, self).__init__()
22 | self.backbone = backbone
23 | # copying modules from pretrained models
24 | if self.backbone == 'resnet50':
25 | self.pretrained = resnet50s(pretrained=True, dilated=False)
26 | elif self.backbone == 'resnet101':
27 | self.pretrained = resnet101s(pretrained=True, dilated=False)
28 | elif self.backbone == 'resnet152':
29 | self.pretrained = resnet152s(pretrained=True, dilated=False)
30 | else:
31 | raise RuntimeError('unknown backbone: {}'.format(self.backbone))
32 | n_codes = 32
33 | self.head = nn.Sequential(
34 | nn.Conv2d(2048, 128, 1),
35 | nn.BatchNorm2d(128),
36 | nn.ReLU(inplace=True),
37 | Encoding(D=128,K=n_codes),
38 | View(-1, 128*n_codes),
39 | Normalize(),
40 | nn.Linear(128*n_codes, nclass),
41 | )
42 |
43 | def forward(self, x):
44 | _, _, h, w = x.size()
45 | x = self.pretrained.conv1(x)
46 | x = self.pretrained.bn1(x)
47 | x = self.pretrained.relu(x)
48 | x = self.pretrained.maxpool(x)
49 | x = self.pretrained.layer1(x)
50 | x = self.pretrained.layer2(x)
51 | x = self.pretrained.layer3(x)
52 | x = self.pretrained.layer4(x)
53 | return self.head(x)
54 |
55 | def get_deepten(dataset='pascal_voc', backbone='resnet50', pretrained=False,
56 | root='~/.encoding/models', **kwargs):
57 | r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network"
58 | `_
59 | Parameters
60 | ----------
61 | dataset : str, default pascal_voc
62 | The dataset that model pretrained on. (pascal_voc, ade20k)
63 | pretrained : bool, default False
64 | Whether to load the pretrained weights for model.
65 | root : str, default '~/.encoding/models'
66 | Location for keeping the model parameters.
67 | Examples
68 | --------
69 | >>> model = get_deepten(dataset='minc', backbone='resnet50', pretrained=False)
70 | >>> print(model)
71 | """
72 | from ..datasets import datasets, acronyms
73 | model = DeepTen(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, **kwargs)
74 | if pretrained:
75 | from .model_store import get_model_file
76 | model.load_state_dict(torch.load(
77 | get_model_file('deepten_%s_%s'%(backbone, acronyms[dataset]), root=root)))
78 | return model
79 |
80 | def get_deepten_resnet50_minc(pretrained=False, root='~/.encoding/models', **kwargs):
81 | r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network"
82 | `_
83 | Parameters
84 | ----------
85 | pretrained : bool, default False
86 | Whether to load the pretrained weights for model.
87 | root : str, default '~/.encoding/models'
88 | Location for keeping the model parameters.
89 |
90 |
91 | Examples
92 | --------
93 | >>> model = get_deepten_resnet50_minc(pretrained=True)
94 | >>> print(model)
95 | """
96 | return get_deepten(dataset='minc', backbone='resnet50', pretrained=pretrained,
97 | root=root, **kwargs)
98 |
--------------------------------------------------------------------------------
/encoding/models/model_zoo.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=wildcard-import, unused-wildcard-import
2 |
3 | from .backbone import *
4 | from .sseg import *
5 | from .deepten import *
6 |
7 | __all__ = ['model_list', 'get_model']
8 |
9 | models = {
10 | # resnet
11 | 'resnet50': resnet50,
12 | 'resnet101': resnet101,
13 | 'resnet152': resnet152,
14 | # resnest
15 | 'resnest50': resnest50,
16 | 'resnest101': resnest101,
17 | 'resnest200': resnest200,
18 | 'resnest269': resnest269,
19 | # resnet other variants
20 | 'resnet50s': resnet50s,
21 | 'resnet101s': resnet101s,
22 | 'resnet152s': resnet152s,
23 | 'resnet50d': resnet50d,
24 | 'resnext50_32x4d': resnext50_32x4d,
25 | 'resnext101_32x8d': resnext101_32x8d,
26 | # other segmentation backbones
27 | 'xception65': xception65,
28 | 'wideresnet38': wideresnet38,
29 | 'wideresnet50': wideresnet50,
30 | # deepten paper
31 | 'deepten_resnet50_minc': get_deepten_resnet50_minc,
32 | # segmentation resnet models
33 | 'encnet_resnet101s_coco': get_encnet_resnet101_coco,
34 | 'fcn_resnet50s_pcontext': get_fcn_resnet50_pcontext,
35 | 'encnet_resnet50s_pcontext': get_encnet_resnet50_pcontext,
36 | 'encnet_resnet101s_pcontext': get_encnet_resnet101_pcontext,
37 | 'encnet_resnet50s_ade': get_encnet_resnet50_ade,
38 | 'encnet_resnet101s_ade': get_encnet_resnet101_ade,
39 | 'fcn_resnet50s_ade': get_fcn_resnet50_ade,
40 | 'psp_resnet50s_ade': get_psp_resnet50_ade,
41 | # segmentation resnest models
42 | 'fcn_resnest50_ade': get_fcn_resnest50_ade,
43 | 'deeplab_resnest50_ade': get_deeplab_resnest50_ade,
44 | 'deeplab_resnest101_ade': get_deeplab_resnest101_ade,
45 | 'deeplab_resnest200_ade': get_deeplab_resnest200_ade,
46 | 'deeplab_resnest269_ade': get_deeplab_resnest269_ade,
47 | 'fcn_resnest50_pcontext': get_fcn_resnest50_pcontext,
48 | 'deeplab_resnest50_pcontext': get_deeplab_resnest50_pcontext,
49 | 'deeplab_resnest101_pcontext': get_deeplab_resnest101_pcontext,
50 | 'deeplab_resnest200_pcontext': get_deeplab_resnest200_pcontext,
51 | 'deeplab_resnest269_pcontext': get_deeplab_resnest269_pcontext,
52 | }
53 |
54 | model_list = list(models.keys())
55 |
56 | def get_model(name, **kwargs):
57 | """Returns a pre-defined model by name
58 |
59 | Parameters
60 | ----------
61 | name : str
62 | Name of the model.
63 | pretrained : bool
64 | Whether to load the pretrained weights for model.
65 | root : str, default '~/.encoding/models'
66 | Location for keeping the model parameters.
67 |
68 | Returns
69 | -------
70 | Module:
71 | The model.
72 | """
73 | name = name.lower()
74 | if name not in models:
75 | raise ValueError('%s\n\t%s' % (str(name), '\n\t'.join(sorted(models.keys()))))
76 | net = models[name](**kwargs)
77 | return net
78 |
--------------------------------------------------------------------------------
/encoding/models/sseg/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import *
2 | from .fcn import *
3 | from .psp import *
4 | from .fcfpn import *
5 | from .atten import *
6 | from .encnet import *
7 | from .deeplab import *
8 | from .upernet import *
9 | from .dran import *
10 | from .danet import *
11 |
12 | def get_segmentation_model(name, **kwargs):
13 | models = {
14 | 'fcn': get_fcn,
15 | 'psp': get_psp,
16 | 'fcfpn': get_fcfpn,
17 | 'atten': get_atten,
18 | 'encnet': get_encnet,
19 | 'upernet': get_upernet,
20 | 'deeplab': get_deeplab,
21 | 'dran':get_dran,
22 | 'danet': get_danet
23 | }
24 | return models[name.lower()](**kwargs)
25 |
--------------------------------------------------------------------------------
/encoding/models/sseg/psp.py:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | # Created by: Hang Zhang
3 | # Email: zhang.hang@rutgers.edu
4 | # Copyright (c) 2017
5 | ###########################################################################
6 | from __future__ import division
7 | import os
8 | import numpy as np
9 | import torch
10 | import torch.nn as nn
11 | from torch.nn.functional import interpolate
12 |
13 | from .base import BaseNet
14 | from .fcn import FCNHead
15 | from ...nn import PyramidPooling
16 |
17 | class PSP(BaseNet):
18 | def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs):
19 | super(PSP, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs)
20 | self.head = PSPHead(2048, nclass, norm_layer, self._up_kwargs)
21 | if aux:
22 | self.auxlayer = FCNHead(1024, nclass, norm_layer)
23 |
24 | def forward(self, x):
25 | _, _, h, w = x.size()
26 | _, _, c3, c4 = self.base_forward(x)
27 |
28 | outputs = []
29 | x = self.head(c4)
30 | x = interpolate(x, (h,w), **self._up_kwargs)
31 | outputs.append(x)
32 | if self.aux:
33 | auxout = self.auxlayer(c3)
34 | auxout = interpolate(auxout, (h,w), **self._up_kwargs)
35 | outputs.append(auxout)
36 | return tuple(outputs)
37 |
38 |
39 | class PSPHead(nn.Module):
40 | def __init__(self, in_channels, out_channels, norm_layer, up_kwargs):
41 | super(PSPHead, self).__init__()
42 | inter_channels = in_channels // 4
43 | self.conv5 = nn.Sequential(PyramidPooling(in_channels, norm_layer, up_kwargs),
44 | nn.Conv2d(in_channels * 2, inter_channels, 3, padding=1, bias=False),
45 | norm_layer(inter_channels),
46 | nn.ReLU(True),
47 | nn.Dropout(0.1, False),
48 | nn.Conv2d(inter_channels, out_channels, 1))
49 |
50 | def forward(self, x):
51 | return self.conv5(x)
52 |
53 | def get_psp(dataset='pascal_voc', backbone='resnet50s', pretrained=False,
54 | root='~/.encoding/models', **kwargs):
55 | # infer number of classes
56 | from ...datasets import datasets, acronyms
57 | model = PSP(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs)
58 | if pretrained:
59 | from ..model_store import get_model_file
60 | model.load_state_dict(torch.load(
61 | get_model_file('psp_%s_%s'%(backbone, acronyms[dataset]), root=root)))
62 | return model
63 |
64 | def get_psp_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs):
65 | r"""PSP model from the paper `"Context Encoding for Semantic Segmentation"
66 | `_
67 |
68 | Parameters
69 | ----------
70 | pretrained : bool, default False
71 | Whether to load the pretrained weights for model.
72 | root : str, default '~/.encoding/models'
73 | Location for keeping the model parameters.
74 |
75 |
76 | Examples
77 | --------
78 | >>> model = get_psp_resnet50_ade(pretrained=True)
79 | >>> print(model)
80 | """
81 | return get_psp('ade20k', 'resnet50s', pretrained, root=root, **kwargs)
82 |
--------------------------------------------------------------------------------
/encoding/models/sseg/upernet.py:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | # Created by: Hang Zhang
3 | # Email: zhang.hang@rutgers.edu
4 | # Copyright (c) 2017
5 | ###########################################################################
6 | from __future__ import division
7 | import os
8 | import numpy as np
9 | import torch
10 | import torch.nn as nn
11 | from torch.nn.functional import upsample
12 |
13 | from .base import BaseNet
14 | from .fcfpn import FCFPNHead
15 | from ...nn import PyramidPooling
16 |
17 | torch_ver = torch.__version__[:3]
18 |
19 | __all__ = ['UperNet', 'get_upernet', 'get_upernet_50_ade']
20 |
21 | class UperNet(BaseNet):
22 | r"""Fully Convolutional Networks for Semantic Segmentation
23 |
24 | Parameters
25 | ----------
26 | nclass : int
27 | Number of categories for the training dataset.
28 | backbone : string
29 | Pre-trained dilated backbone network type (default:'resnet50s'; 'resnet50s',
30 | 'resnet101s' or 'resnet152s').
31 | norm_layer : object
32 | Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`;
33 |
34 |
35 | Reference:
36 |
37 | Long, Jonathan, Evan Shelhamer, and Trevor Darrell. "Fully convolutional networks
38 | for semantic segmentation." *CVPR*, 2015
39 |
40 | Examples
41 | --------
42 | >>> model = UperNet(nclass=21, backbone='resnet50s')
43 | >>> print(model)
44 | """
45 | def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs):
46 | super(UperNet, self).__init__(nclass, backbone, aux, se_loss, dilated=False, norm_layer=norm_layer)
47 | self.head = UperNetHead(nclass, norm_layer, up_kwargs=self._up_kwargs)
48 | assert not aux, "UperNet does not support aux loss"
49 |
50 | def forward(self, x):
51 | imsize = x.size()[2:]
52 | features = self.base_forward(x)
53 |
54 | x = list(self.head(*features))
55 | x[0] = upsample(x[0], imsize, **self._up_kwargs)
56 | return tuple(x)
57 |
58 |
59 | class UperNetHead(FCFPNHead):
60 | def __init__(self, out_channels, norm_layer=None, fpn_inchannels=[256, 512, 1024, 2048],
61 | fpn_dim=256, up_kwargs=None):
62 | fpn_inchannels[-1] = fpn_inchannels[-1] * 2
63 | super(UperNetHead, self).__init__(out_channels, norm_layer, fpn_inchannels,
64 | fpn_dim, up_kwargs)
65 | self.extramodule = PyramidPooling(fpn_inchannels[-1] // 2, norm_layer, up_kwargs)
66 |
67 |
68 | def get_upernet(dataset='pascal_voc', backbone='resnet50s', pretrained=False,
69 | root='~/.encoding/models', **kwargs):
70 | r"""UperNet model from the paper `"Fully Convolutional Network for semantic segmentation"
71 | `_
72 | Parameters
73 | ----------
74 | dataset : str, default pascal_voc
75 | The dataset that model pretrained on. (pascal_voc, ade20k)
76 | pretrained : bool, default False
77 | Whether to load the pretrained weights for model.
78 | root : str, default '~/.encoding/models'
79 | Location for keeping the model parameters.
80 | Examples
81 | --------
82 | >>> model = get_upernet(dataset='pascal_voc', backbone='resnet50s', pretrained=False)
83 | >>> print(model)
84 | """
85 | acronyms = {
86 | 'pascal_voc': 'voc',
87 | 'pascal_aug': 'voc',
88 | 'ade20k': 'ade',
89 | }
90 | # infer number of classes
91 | from ...datasets import datasets, VOCSegmentation, VOCAugSegmentation, ADE20KSegmentation
92 | model = UperNet(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, **kwargs)
93 | if pretrained:
94 | from ..model_store import get_model_file
95 | model.load_state_dict(torch.load(
96 | get_model_file('upernet_%s_%s'%(backbone, acronyms[dataset]), root=root)))
97 | return model
98 |
99 |
100 | def get_upernet_50_ade(pretrained=False, root='~/.encoding/models', **kwargs):
101 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation"
102 | `_
103 |
104 | Parameters
105 | ----------
106 | pretrained : bool, default False
107 | Whether to load the pretrained weights for model.
108 | root : str, default '~/.encoding/models'
109 | Location for keeping the model parameters.
110 |
111 |
112 | Examples
113 | --------
114 | >>> model = get_upernet_50_ade(pretrained=True)
115 | >>> print(model)
116 | """
117 | return get_upernet('ade20k', 'resnet50s', pretrained)
118 |
--------------------------------------------------------------------------------
/encoding/nn/__init__.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | """Encoding NN Modules"""
12 | from .encoding import *
13 | from .syncbn import *
14 | from .customize import *
15 | from .attention import *
16 | from .loss import *
17 | from .rectify import *
18 | from .splat import SplAtConv2d
19 | from .dropblock import *
20 | from .dran_att import *
21 | from .da_att import *
22 |
--------------------------------------------------------------------------------
/encoding/nn/da_att.py:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | # Created by: CASIA IVA
3 | # Email: jliu@nlpr.ia.ac.cn
4 | # Copyright (c) 2018
5 | ###########################################################################
6 |
7 | import numpy as np
8 | import torch
9 | import math
10 | from torch.nn import Module, Sequential, Conv2d, ReLU,AdaptiveMaxPool2d, AdaptiveAvgPool2d, \
11 | NLLLoss, BCELoss, CrossEntropyLoss, AvgPool2d, MaxPool2d, Parameter, Linear, Sigmoid, Softmax, Dropout, Embedding
12 | from torch.nn import functional as F
13 | from torch.autograd import Variable
14 | torch_ver = torch.__version__[:3]
15 |
16 | __all__ = ['PAM_Module', 'CAM_Module']
17 |
18 |
19 | class PAM_Module(Module):
20 | """ Position attention module"""
21 | #Ref from SAGAN
22 | def __init__(self, in_dim):
23 | super(PAM_Module, self).__init__()
24 | self.chanel_in = in_dim
25 |
26 | self.query_conv = Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
27 | self.key_conv = Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
28 | self.value_conv = Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
29 | self.gamma = Parameter(torch.zeros(1))
30 |
31 | self.softmax = Softmax(dim=-1)
32 | def forward(self, x):
33 | """
34 | inputs :
35 | x : input feature maps( B X C X H X W)
36 | returns :
37 | out : attention value + input feature
38 | attention: B X (HxW) X (HxW)
39 | """
40 | m_batchsize, C, height, width = x.size()
41 | proj_query = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0, 2, 1)
42 | proj_key = self.key_conv(x).view(m_batchsize, -1, width*height)
43 | energy = torch.bmm(proj_query, proj_key)
44 | attention = self.softmax(energy)
45 | proj_value = self.value_conv(x).view(m_batchsize, -1, width*height)
46 |
47 | out = torch.bmm(proj_value, attention.permute(0, 2, 1))
48 | out = out.view(m_batchsize, C, height, width)
49 |
50 | out = self.gamma*out + x
51 | return out
52 |
53 |
54 | class CAM_Module(Module):
55 | """ Channel attention module"""
56 | def __init__(self, in_dim):
57 | super(CAM_Module, self).__init__()
58 | self.chanel_in = in_dim
59 |
60 |
61 | self.gamma = Parameter(torch.zeros(1))
62 | self.softmax = Softmax(dim=-1)
63 | def forward(self,x):
64 | """
65 | inputs :
66 | x : input feature maps( B X C X H X W)
67 | returns :
68 | out : attention value + input feature
69 | attention: B X C X C
70 | """
71 | m_batchsize, C, height, width = x.size()
72 | proj_query = x.view(m_batchsize, C, -1)
73 | proj_key = x.view(m_batchsize, C, -1).permute(0, 2, 1)
74 | energy = torch.bmm(proj_query, proj_key)
75 | energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy)-energy
76 | attention = self.softmax(energy_new)
77 | proj_value = x.view(m_batchsize, C, -1)
78 |
79 | out = torch.bmm(attention, proj_value)
80 | out = out.view(m_batchsize, C, height, width)
81 |
82 | out = self.gamma*out + x
83 | return out
84 |
85 |
--------------------------------------------------------------------------------
/encoding/nn/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | import torch.nn as nn
4 | from torch.autograd import Variable
5 |
6 | __all__ = ['LabelSmoothing', 'NLLMultiLabelSmooth', 'SegmentationLosses']
7 |
8 | class LabelSmoothing(nn.Module):
9 | """
10 | NLL loss with label smoothing.
11 | """
12 | def __init__(self, smoothing=0.1):
13 | """
14 | Constructor for the LabelSmoothing module.
15 | :param smoothing: label smoothing factor
16 | """
17 | super(LabelSmoothing, self).__init__()
18 | self.confidence = 1.0 - smoothing
19 | self.smoothing = smoothing
20 |
21 | def forward(self, x, target):
22 | logprobs = torch.nn.functional.log_softmax(x, dim=-1)
23 |
24 | nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
25 | nll_loss = nll_loss.squeeze(1)
26 | smooth_loss = -logprobs.mean(dim=-1)
27 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss
28 | return loss.mean()
29 |
30 | class NLLMultiLabelSmooth(nn.Module):
31 | def __init__(self, smoothing = 0.1):
32 | super(NLLMultiLabelSmooth, self).__init__()
33 | self.confidence = 1.0 - smoothing
34 | self.smoothing = smoothing
35 |
36 | def forward(self, x, target):
37 | if self.training:
38 | x = x.float()
39 | target = target.float()
40 | logprobs = torch.nn.functional.log_softmax(x, dim = -1)
41 |
42 | nll_loss = -logprobs * target
43 | nll_loss = nll_loss.sum(-1)
44 |
45 | smooth_loss = -logprobs.mean(dim=-1)
46 |
47 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss
48 |
49 | return loss.mean()
50 | else:
51 | return torch.nn.functional.cross_entropy(x, target)
52 |
53 | class SegmentationLosses(nn.CrossEntropyLoss):
54 | """2D Cross Entropy Loss with Auxilary Loss"""
55 | def __init__(self, se_loss=False, se_weight=0.2, nclass=-1,
56 | aux=False, aux_weight=0.4, weight=None,
57 | ignore_index=-1):
58 | super(SegmentationLosses, self).__init__(weight, None, ignore_index)
59 | self.se_loss = se_loss
60 | self.aux = aux
61 | self.nclass = nclass
62 | self.se_weight = se_weight
63 | self.aux_weight = aux_weight
64 | self.bceloss = nn.BCELoss(weight)
65 |
66 | def forward(self, *inputs):
67 | if not self.se_loss and not self.aux:
68 | return super(SegmentationLosses, self).forward(*inputs)
69 | elif not self.se_loss:
70 | pred1, pred2, target = tuple(inputs)
71 | loss1 = super(SegmentationLosses, self).forward(pred1, target)
72 | loss2 = super(SegmentationLosses, self).forward(pred2, target)
73 | return loss1 + self.aux_weight * loss2
74 | elif not self.aux:
75 | pred, se_pred, target = tuple(inputs)
76 | se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred)
77 | loss1 = super(SegmentationLosses, self).forward(pred, target)
78 | loss2 = self.bceloss(torch.sigmoid(se_pred), se_target)
79 | return loss1 + self.se_weight * loss2
80 | else:
81 | pred1, se_pred, pred2, target = tuple(inputs)
82 | se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred1)
83 | loss1 = super(SegmentationLosses, self).forward(pred1, target)
84 | loss2 = super(SegmentationLosses, self).forward(pred2, target)
85 | loss3 = self.bceloss(torch.sigmoid(se_pred), se_target)
86 | return loss1 + self.aux_weight * loss2 + self.se_weight * loss3
87 |
88 | @staticmethod
89 | def _get_batch_label_vector(target, nclass):
90 | # target is a 3D Variable BxHxW, output is 2D BxnClass
91 | batch = target.size(0)
92 | tvect = Variable(torch.zeros(batch, nclass))
93 | for i in range(batch):
94 | hist = torch.histc(target[i].cpu().data.float(),
95 | bins=nclass, min=0,
96 | max=nclass-1)
97 | vect = hist>0
98 | tvect[i] = vect
99 | return tvect
100 |
--------------------------------------------------------------------------------
/encoding/nn/rectify.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2020
5 | ##
6 | ## LICENSE file in the root directory of this source tree
7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 |
9 | """Rectify Module"""
10 | import warnings
11 |
12 | import torch
13 | from torch.nn import Conv2d
14 | import torch.nn.functional as F
15 | from torch.nn.modules.utils import _pair
16 |
17 | from ..functions import rectify
18 |
19 | __all__ = ['RFConv2d']
20 |
21 |
22 | class RFConv2d(Conv2d):
23 | """Rectified Convolution
24 | """
25 | def __init__(self, in_channels, out_channels, kernel_size, stride=1,
26 | padding=0, dilation=1, groups=1,
27 | bias=True, padding_mode='zeros',
28 | average_mode=False):
29 | kernel_size = _pair(kernel_size)
30 | stride = _pair(stride)
31 | padding = _pair(padding)
32 | dilation = _pair(dilation)
33 | self.rectify = average_mode or (padding[0] > 0 or padding[1] > 0)
34 | self.average = average_mode
35 |
36 | super(RFConv2d, self).__init__(
37 | in_channels, out_channels, kernel_size, stride=stride,
38 | padding=padding, dilation=dilation, groups=groups,
39 | bias=bias, padding_mode=padding_mode)
40 |
41 | def _conv_forward(self, input, weight):
42 | if self.padding_mode != 'zeros':
43 | return F.conv2d(F.pad(input, self._padding_repeated_twice, mode=self.padding_mode),
44 | weight, self.bias, self.stride,
45 | _pair(0), self.dilation, self.groups)
46 | return F.conv2d(input, weight, self.bias, self.stride,
47 | self.padding, self.dilation, self.groups)
48 |
49 | def forward(self, input):
50 | output = self._conv_forward(input, self.weight)
51 | if self.rectify:
52 | output = rectify(output, input, self.kernel_size, self.stride,
53 | self.padding, self.dilation, self.average)
54 | return output
55 |
56 | def extra_repr(self):
57 | return super().extra_repr() + ', rectify={}, average_mode={}'. \
58 | format(self.rectify, self.average)
59 |
--------------------------------------------------------------------------------
/encoding/nn/splat.py:
--------------------------------------------------------------------------------
1 | """Split-Attention"""
2 |
3 | import torch
4 | from torch import nn
5 | import torch.nn.functional as F
6 | from torch.nn import Conv2d, Module, Linear, BatchNorm2d, ReLU
7 | from torch.nn.modules.utils import _pair
8 |
9 | from .rectify import RFConv2d
10 | from .dropblock import DropBlock2D
11 |
12 | __all__ = ['SplAtConv2d']
13 |
14 | class SplAtConv2d(Module):
15 | """Split-Attention Conv2d
16 | """
17 | def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0),
18 | dilation=(1, 1), groups=1, bias=True,
19 | radix=2, reduction_factor=4,
20 | rectify=False, rectify_avg=False, norm_layer=None,
21 | dropblock_prob=0.0, **kwargs):
22 | super(SplAtConv2d, self).__init__()
23 | padding = _pair(padding)
24 | self.rectify = rectify and (padding[0] > 0 or padding[1] > 0)
25 | self.rectify_avg = rectify_avg
26 | inter_channels = max(in_channels*radix//reduction_factor, 32)
27 | self.radix = radix
28 | self.cardinality = groups
29 | self.channels = channels
30 | self.dropblock_prob = dropblock_prob
31 | if self.rectify:
32 | self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
33 | groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs)
34 | else:
35 | self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation,
36 | groups=groups*radix, bias=bias, **kwargs)
37 | self.use_bn = norm_layer is not None
38 | self.bn0 = norm_layer(channels*radix)
39 | self.relu = ReLU(inplace=True)
40 | self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality)
41 | self.bn1 = norm_layer(inter_channels)
42 | self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality)
43 | if dropblock_prob > 0.0:
44 | self.dropblock = DropBlock2D(dropblock_prob, 3)
45 | self.rsoftmax = rSoftMax(radix, groups)
46 |
47 | def forward(self, x):
48 | x = self.conv(x)
49 | if self.use_bn:
50 | x = self.bn0(x)
51 | if self.dropblock_prob > 0.0:
52 | x = self.dropblock(x)
53 | x = self.relu(x)
54 |
55 | batch, channel = x.shape[:2]
56 | if self.radix > 1:
57 | splited = torch.split(x, channel//self.radix, dim=1)
58 | gap = sum(splited)
59 | else:
60 | gap = x
61 | gap = F.adaptive_avg_pool2d(gap, 1)
62 | gap = self.fc1(gap)
63 |
64 | if self.use_bn:
65 | gap = self.bn1(gap)
66 | gap = self.relu(gap)
67 |
68 | atten = self.fc2(gap)
69 | atten = self.rsoftmax(atten).view(batch, -1, 1, 1)
70 |
71 | if self.radix > 1:
72 | atten = torch.split(atten, channel//self.radix, dim=1)
73 | out = sum([att*split for (att, split) in zip(atten, splited)])
74 | else:
75 | out = atten * x
76 | return out.contiguous()
77 |
78 | class rSoftMax(nn.Module):
79 | def __init__(self, radix, cardinality):
80 | super().__init__()
81 | self.radix = radix
82 | self.cardinality = cardinality
83 |
84 | def forward(self, x):
85 | batch = x.size(0)
86 | if self.radix > 1:
87 | x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
88 | x = F.softmax(x, dim=1)
89 | x = x.reshape(batch, -1)
90 | else:
91 | x = torch.sigmoid(x)
92 | return x
93 |
--------------------------------------------------------------------------------
/encoding/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import *
2 | from .get_transform import get_transform
3 |
--------------------------------------------------------------------------------
/encoding/transforms/get_transform.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2020
5 | ##
6 | ## LICENSE file in the root directory of this source tree
7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 | import torch
9 | from torchvision.transforms import *
10 | from .transforms import *
11 |
12 | def get_transform(dataset, base_size=None, crop_size=224, rand_aug=False, etrans=True, **kwargs):
13 | normalize = Normalize(mean=[0.485, 0.456, 0.406],
14 | std=[0.229, 0.224, 0.225])
15 | base_size = base_size if base_size is not None else int(1.0 * crop_size / 0.875)
16 | if dataset == 'imagenet':
17 | train_transforms = []
18 | val_transforms = []
19 | if rand_aug:
20 | from .autoaug import RandAugment
21 | train_transforms.append(RandAugment(2, 12))
22 | if etrans:
23 | train_transforms.extend([
24 | ERandomCrop(crop_size),
25 | ])
26 | val_transforms.extend([
27 | ECenterCrop(crop_size),
28 | ])
29 |
30 | else:
31 | train_transforms.extend([
32 | RandomResizedCrop(crop_size),
33 | ])
34 | val_transforms.extend([
35 | Resize(base_size),
36 | CenterCrop(crop_size),
37 | ])
38 | train_transforms.extend([
39 | RandomHorizontalFlip(),
40 | ColorJitter(0.4, 0.4, 0.4),
41 | ToTensor(),
42 | Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']),
43 | normalize,
44 | ])
45 | val_transforms.extend([
46 | ToTensor(),
47 | normalize,
48 | ])
49 | transform_train = Compose(train_transforms)
50 | transform_val = Compose(val_transforms)
51 | elif dataset == 'minc':
52 | transform_train = Compose([
53 | Resize(base_size),
54 | RandomResizedCrop(crop_size),
55 | RandomHorizontalFlip(),
56 | ColorJitter(0.4, 0.4, 0.4),
57 | ToTensor(),
58 | Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']),
59 | normalize,
60 | ])
61 | transform_val = Compose([
62 | Resize(base_size),
63 | CenterCrop(crop_size),
64 | ToTensor(),
65 | normalize,
66 | ])
67 | elif dataset == 'cifar10':
68 | transform_train = Compose([
69 | RandomCrop(32, padding=4),
70 | RandomHorizontalFlip(),
71 | ToTensor(),
72 | Normalize((0.4914, 0.4822, 0.4465),
73 | (0.2023, 0.1994, 0.2010)),
74 | ])
75 | transform_val = Compose([
76 | ToTensor(),
77 | Normalize((0.4914, 0.4822, 0.4465),
78 | (0.2023, 0.1994, 0.2010)),
79 | ])
80 | return transform_train, transform_val
81 |
82 | _imagenet_pca = {
83 | 'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]),
84 | 'eigvec': torch.Tensor([
85 | [-0.5675, 0.7192, 0.4009],
86 | [-0.5808, -0.0045, -0.8140],
87 | [-0.5836, -0.6948, 0.4203],
88 | ])
89 | }
90 |
--------------------------------------------------------------------------------
/encoding/utils/__init__.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | """Encoding Util Tools"""
12 | from .lr_scheduler import *
13 | from .metrics import *
14 | from .pallete import get_mask_pallete
15 | from .train_helper import *
16 | from .presets import load_image
17 | from .files import *
18 | from .misc import *
19 | from .dist_helper import *
20 |
--------------------------------------------------------------------------------
/encoding/utils/dist_helper.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | import torch
12 |
13 | __all__ = ['torch_dist_sum']
14 |
15 | def torch_dist_sum(gpu, *args):
16 | process_group = torch.distributed.group.WORLD
17 | tensor_args = []
18 | pending_res = []
19 | for arg in args:
20 | if isinstance(arg, torch.Tensor):
21 | tensor_arg = arg.clone().reshape(-1).detach().cuda(gpu)
22 | else:
23 | tensor_arg = torch.tensor(arg).reshape(-1).cuda(gpu)
24 | tensor_args.append(tensor_arg)
25 | pending_res.append(torch.distributed.all_reduce(tensor_arg, group=process_group, async_op=True))
26 | for res in pending_res:
27 | res.wait()
28 | return tensor_args
29 |
--------------------------------------------------------------------------------
/encoding/utils/files.py:
--------------------------------------------------------------------------------
1 | import os
2 | import requests
3 | import errno
4 | import shutil
5 | import hashlib
6 | from tqdm import tqdm
7 | import torch
8 |
9 | __all__ = ['save_checkpoint', 'download', 'mkdir', 'check_sha1']
10 |
11 | def save_checkpoint(state, args, is_best, filename='checkpoint.pth.tar'):
12 | """Saves checkpoint to disk"""
13 | if hasattr(args, 'backbone'):
14 | directory = "runs/%s/%s/%s/%s/"%(args.dataset, args.model, args.backbone, args.checkname)
15 | else:
16 | directory = "runs/%s/%s/%s/"%(args.dataset, args.model, args.checkname)
17 | if not os.path.exists(directory):
18 | os.makedirs(directory)
19 | filename = directory + filename
20 | torch.save(state, filename)
21 | if is_best:
22 | shutil.copyfile(filename, directory + 'model_best.pth.tar')
23 |
24 |
25 | def download(url, path=None, overwrite=False, sha1_hash=None):
26 | """Download an given URL
27 | Parameters
28 | ----------
29 | url : str
30 | URL to download
31 | path : str, optional
32 | Destination path to store downloaded file. By default stores to the
33 | current directory with same name as in url.
34 | overwrite : bool, optional
35 | Whether to overwrite destination file if already exists.
36 | sha1_hash : str, optional
37 | Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
38 | but doesn't match.
39 | Returns
40 | -------
41 | str
42 | The file path of the downloaded file.
43 | """
44 | if path is None:
45 | fname = url.split('/')[-1]
46 | else:
47 | path = os.path.expanduser(path)
48 | if os.path.isdir(path):
49 | fname = os.path.join(path, url.split('/')[-1])
50 | else:
51 | fname = path
52 |
53 | if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)):
54 | dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
55 | if not os.path.exists(dirname):
56 | os.makedirs(dirname)
57 |
58 | print('Downloading %s from %s...'%(fname, url))
59 | r = requests.get(url, stream=True)
60 | if r.status_code != 200:
61 | raise RuntimeError("Failed downloading url %s"%url)
62 | total_length = r.headers.get('content-length')
63 | with open(fname, 'wb') as f:
64 | if total_length is None: # no content length header
65 | for chunk in r.iter_content(chunk_size=1024):
66 | if chunk: # filter out keep-alive new chunks
67 | f.write(chunk)
68 | else:
69 | total_length = int(total_length)
70 | for chunk in tqdm(r.iter_content(chunk_size=1024),
71 | total=int(total_length / 1024. + 0.5),
72 | unit='KB', unit_scale=False, dynamic_ncols=True):
73 | f.write(chunk)
74 |
75 | if sha1_hash and not check_sha1(fname, sha1_hash):
76 | raise UserWarning('File {} is downloaded but the content hash does not match. ' \
77 | 'The repo may be outdated or download may be incomplete. ' \
78 | 'If the "repo_url" is overridden, consider switching to ' \
79 | 'the default repo.'.format(fname))
80 |
81 | return fname
82 |
83 |
84 | def check_sha1(filename, sha1_hash):
85 | """Check whether the sha1 hash of the file content matches the expected hash.
86 | Parameters
87 | ----------
88 | filename : str
89 | Path to the file.
90 | sha1_hash : str
91 | Expected sha1 hash in hexadecimal digits.
92 | Returns
93 | -------
94 | bool
95 | Whether the file content matches the expected hash.
96 | """
97 | sha1 = hashlib.sha1()
98 | with open(filename, 'rb') as f:
99 | while True:
100 | data = f.read(1048576)
101 | if not data:
102 | break
103 | sha1.update(data)
104 |
105 | return sha1.hexdigest() == sha1_hash
106 |
107 |
108 | def mkdir(path):
109 | """make dir exists okay"""
110 | try:
111 | os.makedirs(path)
112 | except OSError as exc: # Python >2.5
113 | if exc.errno == errno.EEXIST and os.path.isdir(path):
114 | pass
115 | else:
116 | raise
117 |
--------------------------------------------------------------------------------
/encoding/utils/lr_scheduler.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | import math
12 |
13 | __all__ = ['LR_Scheduler', 'LR_Scheduler_Head']
14 |
15 | class LR_Scheduler(object):
16 | """Learning Rate Scheduler
17 |
18 | Step mode: ``lr = baselr * 0.1 ^ {floor(epoch-1 / lr_step)}``
19 |
20 | Cosine mode: ``lr = baselr * 0.5 * (1 + cos(iter/maxiter))``
21 |
22 | Poly mode: ``lr = baselr * (1 - iter/maxiter) ^ 0.9``
23 |
24 | Args:
25 | args: :attr:`args.lr_scheduler` lr scheduler mode (`cos`, `poly`),
26 | :attr:`args.lr` base learning rate, :attr:`args.epochs` number of epochs,
27 | :attr:`args.lr_step`
28 |
29 | iters_per_epoch: number of iterations per epoch
30 | """
31 | def __init__(self, mode, base_lr, num_epochs, iters_per_epoch=0,
32 | lr_step=0, warmup_epochs=0, quiet=False):
33 | self.mode = mode
34 | self.quiet = quiet
35 | if not quiet:
36 | print('Using {} LR scheduler with warm-up epochs of {}!'.format(self.mode, warmup_epochs))
37 | if mode == 'step':
38 | assert lr_step
39 | self.base_lr = base_lr
40 | self.lr_step = lr_step
41 | self.iters_per_epoch = iters_per_epoch
42 | self.epoch = -1
43 | self.warmup_iters = warmup_epochs * iters_per_epoch
44 | self.total_iters = (num_epochs - warmup_epochs) * iters_per_epoch
45 |
46 | def __call__(self, optimizer, i, epoch, best_pred):
47 | T = epoch * self.iters_per_epoch + i
48 | # warm up lr schedule
49 | if self.warmup_iters > 0 and T < self.warmup_iters:
50 | lr = self.base_lr * 1.0 * T / self.warmup_iters
51 | elif self.mode == 'cos':
52 | T = T - self.warmup_iters
53 | lr = 0.5 * self.base_lr * (1 + math.cos(1.0 * T / self.total_iters * math.pi))
54 | elif self.mode == 'poly':
55 | T = T - self.warmup_iters
56 | lr = self.base_lr * pow((1 - 1.0 * T / self.total_iters), 0.9)
57 | elif self.mode == 'step':
58 | lr = self.base_lr * (0.1 ** (epoch // self.lr_step))
59 | else:
60 | raise NotImplemented
61 | if epoch > self.epoch and (epoch == 0 or best_pred > 0.0):
62 | if not self.quiet:
63 | print('\n=>Epoch %i, learning rate = %.4f, \
64 | previous best = %.4f' % (epoch, lr, best_pred))
65 | self.epoch = epoch
66 | assert lr >= 0
67 | self._adjust_learning_rate(optimizer, lr)
68 |
69 | def _adjust_learning_rate(self, optimizer, lr):
70 | for i in range(len(optimizer.param_groups)):
71 | optimizer.param_groups[i]['lr'] = lr
72 |
73 | class LR_Scheduler_Head(LR_Scheduler):
74 | """Incease the additional head LR to be 10 times"""
75 | def _adjust_learning_rate(self, optimizer, lr):
76 | if len(optimizer.param_groups) == 1:
77 | optimizer.param_groups[0]['lr'] = lr
78 | else:
79 | # enlarge the lr at the head
80 | optimizer.param_groups[0]['lr'] = lr
81 | for i in range(1, len(optimizer.param_groups)):
82 | optimizer.param_groups[i]['lr'] = lr * 10
83 |
--------------------------------------------------------------------------------
/encoding/utils/misc.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2020
5 | ##
6 | ## LICENSE file in the root directory of this source tree
7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 | import warnings
9 |
10 | __all__ = ['AverageMeter', 'EncodingDeprecationWarning']
11 |
12 | class AverageMeter(object):
13 | """Computes and stores the average and current value"""
14 | def __init__(self):
15 | self.reset()
16 |
17 | def reset(self):
18 | #self.val = 0
19 | self.sum = 0
20 | self.count = 0
21 |
22 | def update(self, val, n=1):
23 | #self.val = val
24 | self.sum += val * n
25 | self.count += n
26 |
27 | @property
28 | def avg(self):
29 | avg = 0 if self.count == 0 else self.sum / self.count
30 | return avg
31 |
32 | class EncodingDeprecationWarning(DeprecationWarning):
33 | pass
34 |
35 | warnings.simplefilter('once', EncodingDeprecationWarning)
36 |
--------------------------------------------------------------------------------
/encoding/utils/precise_bn.py:
--------------------------------------------------------------------------------
1 | # Adapted from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/precise_bn.py
2 | import itertools
3 | from typing import Any, Iterable, List, Tuple, Type
4 |
5 | import torch
6 | from torch import nn
7 | from ..nn import DistSyncBatchNorm, SyncBatchNorm
8 |
9 | BN_MODULE_TYPES: Tuple[Type[nn.Module]] = (
10 | torch.nn.BatchNorm1d,
11 | torch.nn.BatchNorm2d,
12 | torch.nn.BatchNorm3d,
13 | torch.nn.SyncBatchNorm,
14 | DistSyncBatchNorm,
15 | SyncBatchNorm,
16 | )
17 |
18 |
19 | @torch.no_grad()
20 | def update_bn_stats(
21 | model: nn.Module, data_loader: Iterable[Any], num_iters: int = 200 # pyre-ignore
22 | ) -> None:
23 | """
24 | Recompute and update the batch norm stats to make them more precise. During
25 | training both BN stats and the weight are changing after every iteration, so
26 | the running average can not precisely reflect the actual stats of the
27 | current model.
28 | In this function, the BN stats are recomputed with fixed weights, to make
29 | the running average more precise. Specifically, it computes the true average
30 | of per-batch mean/variance instead of the running average.
31 | Args:
32 | model (nn.Module): the model whose bn stats will be recomputed.
33 | Note that:
34 | 1. This function will not alter the training mode of the given model.
35 | Users are responsible for setting the layers that needs
36 | precise-BN to training mode, prior to calling this function.
37 | 2. Be careful if your models contain other stateful layers in
38 | addition to BN, i.e. layers whose state can change in forward
39 | iterations. This function will alter their state. If you wish
40 | them unchanged, you need to either pass in a submodule without
41 | those layers, or backup the states.
42 | data_loader (iterator): an iterator. Produce data as inputs to the model.
43 | num_iters (int): number of iterations to compute the stats.
44 | """
45 | bn_layers = get_bn_modules(model)
46 |
47 | if len(bn_layers) == 0:
48 | return
49 |
50 | # In order to make the running stats only reflect the current batch, the
51 | # momentum is disabled.
52 | # bn.running_mean = (1 - momentum) * bn.running_mean + momentum * batch_mean
53 | # Setting the momentum to 1.0 to compute the stats without momentum.
54 | momentum_actual = [bn.momentum for bn in bn_layers] # pyre-ignore
55 | for bn in bn_layers:
56 | bn.momentum = 1.0
57 |
58 | # Note that running_var actually means "running average of variance"
59 | running_mean = [
60 | torch.zeros_like(bn.running_mean) for bn in bn_layers # pyre-ignore
61 | ]
62 | running_var = [torch.zeros_like(bn.running_var) for bn in bn_layers] # pyre-ignore
63 |
64 | ind = -1
65 | for ind, inputs in enumerate(itertools.islice(data_loader, num_iters)):
66 | inputs=inputs.cuda()
67 | with torch.no_grad(): # No need to backward
68 | model(inputs)
69 |
70 | for i, bn in enumerate(bn_layers):
71 | # Accumulates the bn stats.
72 | running_mean[i] += (bn.running_mean - running_mean[i]) / (ind + 1)
73 | running_var[i] += (bn.running_var - running_var[i]) / (ind + 1)
74 | # We compute the "average of variance" across iterations.
75 | assert ind == num_iters - 1, (
76 | "update_bn_stats is meant to run for {} iterations, "
77 | "but the dataloader stops at {} iterations.".format(num_iters, ind)
78 | )
79 |
80 | for i, bn in enumerate(bn_layers):
81 | # Sets the precise bn stats.
82 | bn.running_mean = running_mean[i]
83 | bn.running_var = running_var[i]
84 | bn.momentum = momentum_actual[i]
85 |
86 |
87 | def get_bn_modules(model: nn.Module) -> List[nn.Module]:
88 | """
89 | Find all BatchNorm (BN) modules that are in training mode. See
90 | fvcore.precise_bn.BN_MODULE_TYPES for a list of all modules that are
91 | included in this search.
92 | Args:
93 | model (nn.Module): a model possibly containing BN modules.
94 | Returns:
95 | list[nn.Module]: all BN modules in the model.
96 | """
97 | # Finds all the bn layers.
98 | bn_layers = [
99 | m for m in model.modules() if m.training and isinstance(m, BN_MODULE_TYPES)
100 | ]
101 | return bn_layers
102 |
--------------------------------------------------------------------------------
/encoding/utils/presets.py:
--------------------------------------------------------------------------------
1 | """Preset Transforms for Demos"""
2 | from PIL import Image
3 | import numpy as np
4 | import torch
5 | import torchvision.transforms as transform
6 |
7 | __all__ = ['load_image']
8 |
9 | input_transform = transform.Compose([
10 | transform.ToTensor(),
11 | transform.Normalize([.485, .456, .406], [.229, .224, .225])])
12 |
13 | def load_image(filename, size=None, scale=None, keep_asp=True, transform=input_transform):
14 | """Load the image for demos"""
15 | img = Image.open(filename).convert('RGB')
16 | if size is not None:
17 | if keep_asp:
18 | size2 = int(size * 1.0 / img.size[0] * img.size[1])
19 | img = img.resize((size, size2), Image.ANTIALIAS)
20 | else:
21 | img = img.resize((size, size), Image.ANTIALIAS)
22 | elif scale is not None:
23 | img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
24 |
25 | if transform:
26 | img = transform(img)
27 | return img
28 |
--------------------------------------------------------------------------------
/encoding/utils/train_helper.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | import numpy as np
12 | import torch
13 | import torch.nn as nn
14 |
15 | #from ..nn import SyncBatchNorm
16 | from torch.nn.modules.batchnorm import _BatchNorm
17 |
18 | __all__ = ['MixUpWrapper', 'get_selabel_vector']
19 |
20 | class MixUpWrapper(object):
21 | def __init__(self, alpha, num_classes, dataloader, device):
22 | self.alpha = alpha
23 | self.dataloader = dataloader
24 | self.num_classes = num_classes
25 | self.device = device
26 |
27 | def mixup_loader(self, loader):
28 | def mixup(alpha, num_classes, data, target):
29 | with torch.no_grad():
30 | bs = data.size(0)
31 | c = np.random.beta(alpha, alpha)
32 | perm = torch.randperm(bs).cuda()
33 |
34 | md = c * data + (1-c) * data[perm, :]
35 | mt = c * target + (1-c) * target[perm, :]
36 | return md, mt
37 |
38 | for input, target in loader:
39 | input, target = input.cuda(self.device), target.cuda(self.device)
40 | target = torch.nn.functional.one_hot(target, self.num_classes)
41 | i, t = mixup(self.alpha, self.num_classes, input, target)
42 | yield i, t
43 |
44 | def __len__(self):
45 | return len(self.dataloader)
46 |
47 | def __iter__(self):
48 | return self.mixup_loader(self.dataloader)
49 |
50 |
51 | def get_selabel_vector(target, nclass):
52 | r"""Get SE-Loss Label in a batch
53 | Args:
54 | predict: input 4D tensor
55 | target: label 3D tensor (BxHxW)
56 | nclass: number of categories (int)
57 | Output:
58 | 2D tensor (BxnClass)
59 | """
60 | batch = target.size(0)
61 | tvect = torch.zeros(batch, nclass)
62 | for i in range(batch):
63 | hist = torch.histc(target[i].data.float(),
64 | bins=nclass, min=0,
65 | max=nclass-1)
66 | vect = hist>0
67 | tvect[i] = vect
68 | return tvect
69 |
--------------------------------------------------------------------------------
/experiments/recognition/README.md:
--------------------------------------------------------------------------------
1 | - [Link to the EncNet CIFAR experiments and pre-trained models](http://hangzh.com/PyTorch-Encoding/experiments/cifar.html)
2 |
3 | - [Link to the Deep TEN experiments and pre-trained models](http://hangzh.com/PyTorch-Encoding/experiments/texture.html)
4 |
--------------------------------------------------------------------------------
/experiments/recognition/resnet50_baseline.sh:
--------------------------------------------------------------------------------
1 | # baseline
2 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_check --lr 0.025 --batch-size 64
3 |
4 | # rectify
5 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt --lr 0.1 --batch-size 256 --rectify
6 |
7 | # warmup
8 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_warm --lr 0.1 --batch-size 256 --warmup-epochs 5 --rectify
9 |
10 | # no-bn-wd
11 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_nobnwd_warm --lr 0.1 --batch-size 256 --no-bn-wd --warmup-epochs 5 --rectify
12 |
13 | # LS
14 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_ls --lr 0.1 --batch-size 256 --label-smoothing 0.1 --rectify
15 |
16 | # Mixup + LS
17 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 200 --checkname resnet50_rt_ls_mixup --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --rectify
18 |
19 | # last-gamma
20 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_gamma --lr 0.1 --batch-size 256 --last-gamma --rectify
21 |
22 | # BoTs
23 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 200 --checkname resnet50_rt_bots --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --last-gamma --no-bn-wd --warmup-epochs 5 --rectify
24 |
25 | # resnet50d
26 | python train_dist.py --dataset imagenet --model resnet50d --lr-scheduler cos --epochs 200 --checkname resnet50d_rt_bots --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --last-gamma --no-bn-wd --warmup-epochs 5 --rectify
27 |
28 | # dropblock
29 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 200 --checkname --label-smoothing 0.1 --mixup 0.2 --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --dropblock-prob 0.1 --rectify
30 |
31 | # resnest50
32 | python train_dist.py --dataset imagenet --model resnest50 --lr-scheduler cos --epochs 270 --checkname resnest50_rt_bots --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --last-gamma --no-bn-wd --warmup-epochs 5 --dropblock-prob 0.1 --rectify
33 |
--------------------------------------------------------------------------------
/experiments/recognition/test_flops.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2020
5 | ##
6 | ## This source code is licensed under the MIT-style license found in the
7 | ## LICENSE file in the root directory of this source tree
8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 |
10 | import argparse
11 | import torch
12 |
13 | from thop import profile, clever_format
14 |
15 | import encoding
16 |
17 | def get_args():
18 | # data settings
19 | parser = argparse.ArgumentParser(description='Deep Encoding')
20 | parser.add_argument('--crop-size', type=int, default=224,
21 | help='crop image size')
22 | # model params
23 | parser.add_argument('--model', type=str, default='densenet',
24 | help='network model type (default: densenet)')
25 | parser.add_argument('--rectify', action='store_true',
26 | default=False, help='rectify convolution')
27 | parser.add_argument('--rectify-avg', action='store_true',
28 | default=False, help='rectify convolution')
29 | # checking point
30 | parser = parser
31 |
32 | args = parser.parse_args()
33 | return args
34 |
35 | def main():
36 | args = get_args()
37 |
38 | model_kwargs = {}
39 | if args.rectify:
40 | model_kwargs['rectified_conv'] = True
41 | model_kwargs['rectify_avg'] = args.rectify_avg
42 |
43 | model = encoding.models.get_model(args.model, **model_kwargs)
44 | print(model)
45 |
46 | dummy_images = torch.rand(1, 3, args.crop_size, args.crop_size)
47 |
48 | #count_ops(model, dummy_images, verbose=False)
49 | macs, params = profile(model, inputs=(dummy_images, ))
50 | macs, params = clever_format([macs, params], "%.3f")
51 |
52 | print(f"macs: {macs}, params: {params}")
53 |
54 | if __name__ == '__main__':
55 | main()
56 |
--------------------------------------------------------------------------------
/experiments/segmentation/demo.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import encoding
3 |
4 | # Get the model
5 | model = encoding.models.get_model('fcn_resnet50s_ade', pretrained=True).cuda()
6 | model.eval()
7 |
8 | # Prepare the image
9 | url = 'https://github.com/zhanghang1989/image-data/blob/master/' + \
10 | 'encoding/segmentation/ade20k/ADE_val_00001142.jpg?raw=true'
11 | filename = 'example.jpg'
12 | img = encoding.utils.load_image(
13 | encoding.utils.download(url, filename)).cuda().unsqueeze(0)
14 |
15 | # Make prediction
16 | output = model.evaluate(img)
17 | predict = torch.max(output, 1)[1].cpu().numpy() + 1
18 |
19 | # Get color pallete for visualization
20 | mask = encoding.utils.get_mask_pallete(predict, 'ade20k')
21 | mask.save('output.png')
22 |
--------------------------------------------------------------------------------
/experiments/segmentation/model_mapping.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch
4 |
5 |
6 | # CLGD_key_map = {'head.gamma': 'head.clgd.gamma', 'head.skipconv.0.weight': 'head.clgd.skipconv.0.weight',
7 | # 'head.skipconv.1.weight': 'head.clgd.skipconv.1.weight', 'head.skipconv.1.bias': 'head.clgd.skipconv.1.bias',
8 | # 'head.skipconv.1.running_mean': 'head.clgd.skipconv.1.running_mean', 'head.skipconv.1.running_var': 'head.clgd.skipconv.1.running_var',
9 | # 'head.skipconv.1.num_batches_tracked': 'head.clgd.skipconv.1.num_batches_tracked', 'head.fusion.0.weight': 'head.clgd.fusion.0.weight',
10 | # 'head.fusion.1.weight': 'head.clgd.fusion.1.weight', 'head.fusion.1.bias': 'head.clgd.fusion.1.bias',
11 | # 'head.fusion.1.running_mean': 'head.clgd.fusion.1.running_mean', 'head.fusion.1.running_var': 'head.clgd.fusion.1.running_var',
12 | # 'head.fusion.1.num_batches_tracked': 'head.clgd.fusion.1.num_batches_tracked', 'head.fusion2.0.weight': 'head.clgd.fusion2.0.weight',
13 | # 'head.fusion2.1.weight': 'head.clgd.fusion2.1.weight', 'head.fusion2.1.bias': 'head.clgd.fusion2.1.bias',
14 | # 'head.fusion2.1.running_mean': 'head.clgd.fusion2.1.running_mean', 'head.fusion2.1.running_var': 'head.clgd.fusion2.1.running_var',
15 | # 'head.fusion2.1.num_batches_tracked': 'head.clgd.fusion2.1.num_batches_tracked', 'head.att.0.weight': 'head.clgd.att.0.weight',
16 | # 'head.att.0.bias': 'head.clgd.att.0.bias'}
17 |
18 | del_keys = ["auxlayer.conv5.0.weight", "auxlayer.conv5.1.bias", "auxlayer.conv5.1.num_batches_tracked", \
19 | "auxlayer.conv5.1.running_mean", "auxlayer.conv5.1.running_var", "auxlayer.conv5.1.weight", \
20 | "auxlayer.conv5.4.bias", "auxlayer.conv5.4.weight"]
21 |
22 | def _rename_glgd_weights(layer_keys):
23 |
24 | layer_keys = [k.replace("head.skipconv", "head.clgd.conv_low") for k in layer_keys]
25 | layer_keys = [k.replace("head.fusion2", "head.clgd.conv_out") for k in layer_keys]
26 | layer_keys = [k.replace("head.fusion", "head.clgd.conv_cat") for k in layer_keys]
27 | layer_keys = [k.replace("head.att", "head.clgd.conv_att") for k in layer_keys]
28 | layer_keys = [k.replace("head.gamma", "head.clgd.gamma") for k in layer_keys]
29 |
30 | return layer_keys
31 |
32 | def _rename_dran_weights(layer_keys):
33 |
34 | layer_keys = [k.replace("head.conv5_s", "head.conv_cpam_b") for k in layer_keys]
35 | layer_keys = [k.replace("head.conv5_c", "head.conv_ccam_b") for k in layer_keys]
36 | layer_keys = [k.replace("head.conv51_c", "head.ccam_enc") for k in layer_keys]
37 | layer_keys = [k.replace("head.conv52", "head.conv_cpam_e") for k in layer_keys]
38 | layer_keys = [k.replace("head.conv51", "head.conv_ccam_e") for k in layer_keys]
39 | layer_keys = [k.replace("head.conv_f", "head.conv_cat") for k in layer_keys]
40 | layer_keys = [k.replace("head.conv6", "cls_seg") for k in layer_keys]
41 | layer_keys = [k.replace("head.conv7", "cls_aux") for k in layer_keys]
42 |
43 | layer_keys = [k.replace("head.en_s", "head.cpam_enc") for k in layer_keys]
44 | layer_keys = [k.replace("head.de_s", "head.cpam_dec") for k in layer_keys]
45 | layer_keys = [k.replace("head.de_c", "head.ccam_dec") for k in layer_keys]
46 |
47 | return layer_keys
48 |
49 | def _rename_cpam_weights(layer_keys):
50 |
51 | layer_keys = [k.replace("head.cpam_dec.query_conv2", "head.cpam_dec.conv_query") for k in layer_keys]
52 | layer_keys = [k.replace("head.cpam_dec.key_conv2", "head.cpam_dec.conv_key") for k in layer_keys]
53 | layer_keys = [k.replace("head.cpam_dec.value2", "head.cpam_dec.conv_value") for k in layer_keys]
54 |
55 | return layer_keys
56 |
57 | def rename_weight_for_head(weights):
58 |
59 | original_keys = sorted(weights.keys())
60 | layer_keys = sorted(weights.keys())
61 |
62 | new_weights = OrderedDict()
63 | for k in original_keys:
64 | v = weights[k]
65 | w=v
66 |
67 | layer_keys = _rename_glgd_weights(layer_keys)
68 | layer_keys = _rename_dran_weights(layer_keys)
69 | layer_keys = _rename_cpam_weights(layer_keys)
70 | key_map = {k: v for k, v in zip(original_keys, layer_keys)}
71 | new_weights[key_map[k] if key_map.get(k) else k] = w
72 |
73 | for keys in del_keys:
74 | del new_weights[keys]
75 | return new_weights
--------------------------------------------------------------------------------
/experiments/segmentation/test.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=4,5,6,7 python test.py --dataset citys --model dran --backbone resnet101 --resume models/dran101.pth.tar --eval --base-size 2048 --crop-size 768 --workers 1 --multi-grid --multi-dilation 4 8 16 --os 8 --aux
2 |
--------------------------------------------------------------------------------
/experiments/segmentation/test_danet.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=4,5,6,7 python test.py --dataset citys --model danet --backbone resnet101 --resume models/DANet101.pth.tar --eval --base-size 2048 --crop-size 768 --workers 1 --multi-grid --multi-dilation 4 8 16 --os 8 --aux --no-deepstem
2 |
--------------------------------------------------------------------------------
/experiments/segmentation/test_models.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import torch
3 | import encoding
4 | from option import Options
5 | from torch.autograd import Variable
6 |
7 | if __name__ == "__main__":
8 | args = Options().parse()
9 | model = encoding.models.get_segmentation_model(args.model, dataset=args.dataset, aux=args.aux,
10 | backbone=args.backbone,
11 | se_loss=args.se_loss, norm_layer=torch.nn.BatchNorm2d)
12 | print('Creating the model:')
13 |
14 | print(model)
15 | model.cuda()
16 | model.eval()
17 | x = Variable(torch.Tensor(4, 3, 480, 480)).cuda()
18 | with torch.no_grad():
19 | out = model(x)
20 | for y in out:
21 | print(y.size())
22 |
--------------------------------------------------------------------------------
/img/overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/img/overview.jpg
--------------------------------------------------------------------------------
/img/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/img/overview.png
--------------------------------------------------------------------------------
/img/tab3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/img/tab3.jpg
--------------------------------------------------------------------------------
/img/tab3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/img/tab3.png
--------------------------------------------------------------------------------
/scripts/prepare_ade20k.py:
--------------------------------------------------------------------------------
1 | """Prepare ADE20K dataset"""
2 | import os
3 | import shutil
4 | import argparse
5 | import zipfile
6 | from encoding.utils import download, mkdir
7 |
8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
9 |
10 | def parse_args():
11 | parser = argparse.ArgumentParser(
12 | description='Initialize ADE20K dataset.',
13 | epilog='Example: python prepare_ade20k.py',
14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
15 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
16 | args = parser.parse_args()
17 | return args
18 |
19 | def download_ade(path, overwrite=False):
20 | _AUG_DOWNLOAD_URLS = [
21 | ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip', '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'),
22 | ('http://data.csail.mit.edu/places/ADEchallenge/release_test.zip', 'e05747892219d10e9243933371a497e905a4860c'),]
23 | download_dir = os.path.join(path, 'downloads')
24 | mkdir(download_dir)
25 | for url, checksum in _AUG_DOWNLOAD_URLS:
26 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
27 | # extract
28 | with zipfile.ZipFile(filename,"r") as zip_ref:
29 | zip_ref.extractall(path=path)
30 |
31 |
32 | if __name__ == '__main__':
33 | args = parse_args()
34 | mkdir(os.path.expanduser('~/.encoding/data'))
35 | if args.download_dir is not None:
36 | if os.path.isdir(_TARGET_DIR):
37 | os.remove(_TARGET_DIR)
38 | # make symlink
39 | os.symlink(args.download_dir, _TARGET_DIR)
40 | else:
41 | download_ade(_TARGET_DIR, overwrite=False)
42 |
--------------------------------------------------------------------------------
/scripts/prepare_citys.py:
--------------------------------------------------------------------------------
1 | """Prepare Cityscapes dataset"""
2 | import os
3 | import shutil
4 | import argparse
5 | import zipfile
6 | from encoding.utils import check_sha1, download, mkdir
7 |
8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
9 |
10 | def parse_args():
11 | parser = argparse.ArgumentParser(
12 | description='Initialize ADE20K dataset.',
13 | epilog='Example: python prepare_cityscapes.py',
14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
15 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
16 | args = parser.parse_args()
17 | return args
18 |
19 | def download_city(path, overwrite=False):
20 | _CITY_DOWNLOAD_URLS = [
21 | ('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'),
22 | ('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')]
23 | download_dir = os.path.join(path, 'downloads')
24 | mkdir(download_dir)
25 | for filename, checksum in _CITY_DOWNLOAD_URLS:
26 | if not check_sha1(filename, checksum):
27 | raise UserWarning('File {} is downloaded but the content hash does not match. ' \
28 | 'The repo may be outdated or download may be incomplete. ' \
29 | 'If the "repo_url" is overridden, consider switching to ' \
30 | 'the default repo.'.format(filename))
31 | # extract
32 | with zipfile.ZipFile(filename,"r") as zip_ref:
33 | zip_ref.extractall(path=path)
34 | print("Extracted", filename)
35 |
36 | if __name__ == '__main__':
37 | args = parse_args()
38 | mkdir(os.path.expanduser('~/.encoding/data'))
39 | if args.download_dir is not None:
40 | if os.path.isdir(_TARGET_DIR):
41 | os.remove(_TARGET_DIR)
42 | # make symlink
43 | os.symlink(args.download_dir, _TARGET_DIR)
44 | else:
45 | download_city(_TARGET_DIR, overwrite=False)
46 |
--------------------------------------------------------------------------------
/scripts/prepare_coco.py:
--------------------------------------------------------------------------------
1 | """Prepare MS COCO datasets"""
2 | import os
3 | import shutil
4 | import argparse
5 | import zipfile
6 | from encoding.utils import download, mkdir
7 |
8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
9 |
10 | def parse_args():
11 | parser = argparse.ArgumentParser(
12 | description='Initialize MS COCO dataset.',
13 | epilog='Example: python mscoco.py --download-dir ~/mscoco',
14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
15 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
16 | args = parser.parse_args()
17 | return args
18 |
19 | def download_coco(path, overwrite=False):
20 | _DOWNLOAD_URLS = [
21 | ('http://images.cocodataset.org/zips/train2017.zip',
22 | '10ad623668ab00c62c096f0ed636d6aff41faca5'),
23 | ('http://images.cocodataset.org/zips/val2017.zip',
24 | '4950dc9d00dbe1c933ee0170f5797584351d2a41'),
25 | ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
26 | '8551ee4bb5860311e79dace7e79cb91e432e78b3'),
27 | #('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip',
28 | # '46cdcf715b6b4f67e980b529534e79c2edffe084'),
29 | #('http://images.cocodataset.org/zips/test2017.zip',
30 | # '99813c02442f3c112d491ea6f30cecf421d0e6b3'),
31 | ('https://hangzh.s3.amazonaws.com/encoding/data/coco/train_ids.pth',
32 | '12cd266f97c8d9ea86e15a11f11bcb5faba700b6'),
33 | ('https://hangzh.s3.amazonaws.com/encoding/data/coco/val_ids.pth',
34 | '4ce037ac33cbf3712fd93280a1c5e92dae3136bb'),
35 | ]
36 | mkdir(path)
37 | for url, checksum in _DOWNLOAD_URLS:
38 | filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum)
39 | # extract
40 | if os.path.splitext(filename)[1] == '.zip':
41 | with zipfile.ZipFile(filename) as zf:
42 | zf.extractall(path=path)
43 | else:
44 | shutil.move(filename, os.path.join(path, 'annotations/'+os.path.basename(filename)))
45 |
46 |
47 | def install_coco_api():
48 | repo_url = "https://github.com/cocodataset/cocoapi"
49 | os.system("git clone " + repo_url)
50 | os.system("cd cocoapi/PythonAPI/ && python setup.py install")
51 | shutil.rmtree('cocoapi')
52 | try:
53 | import pycocotools
54 | except Exception:
55 | print("Installing COCO API failed, please install it manually %s"%(repo_url))
56 |
57 |
58 | if __name__ == '__main__':
59 | args = parse_args()
60 | mkdir(os.path.expanduser('~/.encoding/data'))
61 | if args.download_dir is not None:
62 | if os.path.isdir(_TARGET_DIR):
63 | os.remove(_TARGET_DIR)
64 | # make symlink
65 | os.symlink(args.download_dir, _TARGET_DIR)
66 | else:
67 | download_coco(_TARGET_DIR, overwrite=False)
68 | install_coco_api()
69 |
--------------------------------------------------------------------------------
/scripts/prepare_imagenet.py:
--------------------------------------------------------------------------------
1 | """Prepare the ImageNet dataset"""
2 | import os
3 | import argparse
4 | import tarfile
5 | import pickle
6 | import gzip
7 | import subprocess
8 | from tqdm import tqdm
9 | import subprocess
10 | from encoding.utils import check_sha1, download, mkdir
11 |
12 | _TARGET_DIR = os.path.expanduser('~/.encoding/data/ILSVRC2012')
13 | _TRAIN_TAR = 'ILSVRC2012_img_train.tar'
14 | _TRAIN_TAR_SHA1 = '43eda4fe35c1705d6606a6a7a633bc965d194284'
15 | _VAL_TAR = 'ILSVRC2012_img_val.tar'
16 | _VAL_TAR_SHA1 = '5f3f73da3395154b60528b2b2a2caf2374f5f178'
17 |
18 | def parse_args():
19 | parser = argparse.ArgumentParser(
20 | description='Setup the ImageNet dataset.',
21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
22 | parser.add_argument('--download-dir', required=True,
23 | help="The directory that contains downloaded tar files")
24 | parser.add_argument('--target-dir', default=_TARGET_DIR,
25 | help="The directory to store extracted images")
26 | parser.add_argument('--checksum', action='store_true',
27 | help="If check integrity before extracting.")
28 | parser.add_argument('--with-rec', action='store_true',
29 | help="If build image record files.")
30 | parser.add_argument('--num-thread', type=int, default=1,
31 | help="Number of threads to use when building image record file.")
32 | args = parser.parse_args()
33 | return args
34 |
35 | def check_file(filename, checksum, sha1):
36 | if not os.path.exists(filename):
37 | raise ValueError('File not found: '+filename)
38 | if checksum and not check_sha1(filename, sha1):
39 | raise ValueError('Corrupted file: '+filename)
40 |
41 | def extract_train(tar_fname, target_dir, with_rec=False, num_thread=1):
42 | mkdir(target_dir)
43 | with tarfile.open(tar_fname) as tar:
44 | print("Extracting "+tar_fname+"...")
45 | # extract each class one-by-one
46 | pbar = tqdm(total=len(tar.getnames()))
47 | for class_tar in tar:
48 | pbar.set_description('Extract '+class_tar.name)
49 | tar.extract(class_tar, target_dir)
50 | class_fname = os.path.join(target_dir, class_tar.name)
51 | class_dir = os.path.splitext(class_fname)[0]
52 | os.mkdir(class_dir)
53 | with tarfile.open(class_fname) as f:
54 | f.extractall(class_dir)
55 | os.remove(class_fname)
56 | pbar.update(1)
57 | pbar.close()
58 |
59 | def extract_val(tar_fname, target_dir, with_rec=False, num_thread=1):
60 | mkdir(target_dir)
61 | print('Extracting ' + tar_fname)
62 | with tarfile.open(tar_fname) as tar:
63 | tar.extractall(target_dir)
64 | # build rec file before images are moved into subfolders
65 | # move images to proper subfolders
66 | subprocess.call(["wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash"],
67 | cwd=target_dir, shell=True)
68 |
69 |
70 | def main():
71 | args = parse_args()
72 |
73 | target_dir = os.path.expanduser(args.target_dir)
74 | #if os.path.exists(target_dir):
75 | # raise ValueError('Target dir ['+target_dir+'] exists. Remove it first')
76 |
77 | download_dir = os.path.expanduser(args.download_dir)
78 | train_tar_fname = os.path.join(download_dir, _TRAIN_TAR)
79 | check_file(train_tar_fname, args.checksum, _TRAIN_TAR_SHA1)
80 | val_tar_fname = os.path.join(download_dir, _VAL_TAR)
81 | check_file(val_tar_fname, args.checksum, _VAL_TAR_SHA1)
82 |
83 | build_rec = args.with_rec
84 | if build_rec:
85 | os.makedirs(os.path.join(target_dir, 'rec'))
86 | extract_train(train_tar_fname, os.path.join(target_dir, 'train'), build_rec, args.num_thread)
87 | extract_val(val_tar_fname, os.path.join(target_dir, 'val'), build_rec, args.num_thread)
88 |
89 | if __name__ == '__main__':
90 | main()
91 |
--------------------------------------------------------------------------------
/scripts/prepare_minc.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import argparse
4 | import tarfile
5 | from encoding.utils import download, mkdir
6 |
7 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser(
11 | description='Initialize MINC dataset.',
12 | epilog='Example: python prepare_minc.py',
13 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
14 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
15 | parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
16 | parser.add_argument('--overwrite', action='store_true',
17 | help='overwrite downloaded files if set, in case they are corrputed')
18 | args = parser.parse_args()
19 | return args
20 |
21 | def download_minc(path, overwrite=False):
22 | _AUG_DOWNLOAD_URLS = [
23 | ('http://opensurfaces.cs.cornell.edu/static/minc/minc-2500.tar.gz', 'bcccbb3b1ab396ef540f024a5ba23eff54f7fe31')]
24 | download_dir = os.path.join(path, 'downloads')
25 | mkdir(download_dir)
26 | for url, checksum in _AUG_DOWNLOAD_URLS:
27 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
28 | # extract
29 | with tarfile.open(filename) as tar:
30 | tar.extractall(path=path)
31 |
32 | if __name__ == '__main__':
33 | args = parse_args()
34 | mkdir(os.path.expanduser('~/.encoding/datasets'))
35 | if args.download_dir is not None:
36 | if os.path.isdir(_TARGET_DIR):
37 | os.remove(_TARGET_DIR)
38 | os.symlink(args.download_dir, _TARGET_DIR)
39 | else:
40 | download_minc(_TARGET_DIR, overwrite=False)
41 |
--------------------------------------------------------------------------------
/scripts/prepare_pascal.py:
--------------------------------------------------------------------------------
1 | """Prepare PASCAL VOC datasets"""
2 | import os
3 | import shutil
4 | import argparse
5 | import tarfile
6 | from encoding.utils import download, mkdir
7 |
8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
9 |
10 |
11 | def parse_args():
12 | parser = argparse.ArgumentParser(
13 | description='Initialize PASCAL VOC dataset.',
14 | epilog='Example: python prepare_pascal.py',
15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
16 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk')
17 | parser.add_argument('--no-download', action='store_true', help='disable automatic download if set')
18 | parser.add_argument('--overwrite', action='store_true', help='overwrite downloaded files if set, in case they are corrputed')
19 | args = parser.parse_args()
20 | return args
21 |
22 |
23 | def download_voc(path, overwrite=False):
24 | _DOWNLOAD_URLS = [
25 | ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar',
26 | '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')]
27 | download_dir = os.path.join(path, 'downloads')
28 | mkdir(download_dir)
29 | for url, checksum in _DOWNLOAD_URLS:
30 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
31 | # extract
32 | with tarfile.open(filename) as tar:
33 | tar.extractall(path=path)
34 |
35 |
36 | def download_aug(path, overwrite=False):
37 | _AUG_DOWNLOAD_URLS = [
38 | ('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz', '7129e0a480c2d6afb02b517bb18ac54283bfaa35')]
39 | download_dir = os.path.join(path, 'downloads')
40 | mkdir(download_dir)
41 | for url, checksum in _AUG_DOWNLOAD_URLS:
42 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
43 | # extract
44 | with tarfile.open(filename) as tar:
45 | tar.extractall(path=path)
46 | shutil.move(os.path.join(path, 'benchmark_RELEASE'),
47 | os.path.join(path, 'VOCaug'))
48 | filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt']
49 | # generate trainval.txt
50 | with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile:
51 | for fname in filenames:
52 | fname = os.path.join(path, fname)
53 | with open(fname) as infile:
54 | for line in infile:
55 | outfile.write(line)
56 |
57 |
58 | if __name__ == '__main__':
59 | args = parse_args()
60 | mkdir(os.path.expanduser('~/.encoding/datasets'))
61 | if args.download_dir is not None:
62 | if os.path.isdir(_TARGET_DIR):
63 | os.remove(_TARGET_DIR)
64 | os.symlink(args.download_dir, _TARGET_DIR)
65 | else:
66 | download_voc(_TARGET_DIR, overwrite=False)
67 | download_aug(_TARGET_DIR, overwrite=False)
68 |
--------------------------------------------------------------------------------
/scripts/prepare_pcontext.py:
--------------------------------------------------------------------------------
1 | """Prepare PASCAL Context dataset"""
2 | import os
3 | import shutil
4 | import argparse
5 | import tarfile
6 | from encoding.utils import download, mkdir
7 |
8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data')
9 | PASD_URL="https://codalabuser.blob.core.windows.net/public/%s"
10 |
11 | def parse_args():
12 | parser = argparse.ArgumentParser(
13 | description='Initialize PASCAL Context dataset.',
14 | epilog='Example: python prepare_pcontext.py',
15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
16 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk')
17 | args = parser.parse_args()
18 | return args
19 |
20 | def download_ade(path, overwrite=False):
21 | _AUG_DOWNLOAD_URLS = [
22 | ('http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar',
23 | 'bf9985e9f2b064752bf6bd654d89f017c76c395a'),
24 | ('https://codalabuser.blob.core.windows.net/public/trainval_merged.json',
25 | '169325d9f7e9047537fedca7b04de4dddf10b881'),
26 | ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/train.pth',
27 | '4bfb49e8c1cefe352df876c9b5434e655c9c1d07'),
28 | ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/val.pth',
29 | 'ebedc94247ec616c57b9a2df15091784826a7b0c'),
30 | ]
31 | download_dir = os.path.join(path, 'downloads')
32 | mkdir(download_dir)
33 | for url, checksum in _AUG_DOWNLOAD_URLS:
34 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum)
35 | # extract
36 | if os.path.splitext(filename)[1] == '.tar':
37 | with tarfile.open(filename) as tar:
38 | tar.extractall(path=path)
39 | else:
40 | shutil.move(filename, os.path.join(path, 'VOCdevkit/VOC2010/'+os.path.basename(filename)))
41 |
42 | def install_pcontext_api():
43 | repo_url = "https://github.com/zhanghang1989/detail-api"
44 | os.system("git clone " + repo_url)
45 | os.system("cd detail-api/PythonAPI/ && python setup.py install")
46 | shutil.rmtree('detail-api')
47 | try:
48 | import detail
49 | except Exception:
50 | print("Installing PASCAL Context API failed, please install it manually %s"%(repo_url))
51 |
52 |
53 | if __name__ == '__main__':
54 | args = parse_args()
55 | mkdir(os.path.expanduser('~/.encoding/data'))
56 | if args.download_dir is not None:
57 | if os.path.isdir(_TARGET_DIR):
58 | os.remove(_TARGET_DIR)
59 | # make symlink
60 | os.symlink(args.download_dir, _TARGET_DIR)
61 | else:
62 | download_ade(_TARGET_DIR, overwrite=False)
63 | install_pcontext_api()
64 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | import io
12 | import os
13 | import subprocess
14 |
15 | from setuptools import setup, find_packages
16 |
17 | cwd = os.path.dirname(os.path.abspath(__file__))
18 |
19 | version = '1.2.2'
20 | try:
21 | if not os.getenv('RELEASE'):
22 | from datetime import date
23 | today = date.today()
24 | day = today.strftime("b%Y%m%d")
25 | version += day
26 | except Exception:
27 | pass
28 |
29 | def create_version_file():
30 | global version, cwd
31 | print('-- Building version ' + version)
32 | version_path = os.path.join(cwd, 'encoding', 'version.py')
33 | with open(version_path, 'w') as f:
34 | f.write('"""This is encoding version file."""\n')
35 | f.write("__version__ = '{}'\n".format(version))
36 |
37 | requirements = [
38 | 'numpy',
39 | 'tqdm',
40 | 'nose',
41 | 'portalocker',
42 | 'torch>=1.4.0',
43 | 'torchvision>=0.5.0',
44 | 'Pillow',
45 | 'scipy',
46 | 'requests',
47 | ]
48 |
49 | if __name__ == '__main__':
50 | create_version_file()
51 | setup(
52 | name="torch-encoding",
53 | version=version,
54 | author="Hang Zhang",
55 | author_email="zhanghang0704@gmail.com",
56 | url="https://github.com/zhanghang1989/PyTorch-Encoding",
57 | description="PyTorch Encoding Package",
58 | long_description=open('README.md').read(),
59 | long_description_content_type='text/markdown',
60 | license='MIT',
61 | install_requires=requirements,
62 | packages=find_packages(exclude=["tests", "experiments"]),
63 | package_data={ 'encoding': [
64 | 'LICENSE',
65 | 'lib/cpu/*.h',
66 | 'lib/cpu/*.cpp',
67 | 'lib/gpu/*.h',
68 | 'lib/gpu/*.cpp',
69 | 'lib/gpu/*.cu',
70 | ]},
71 | )
72 |
--------------------------------------------------------------------------------
/tests/unit_test/test_dataset.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | from tqdm import tqdm
12 | from torch.utils import data
13 | import torchvision.transforms as transform
14 | from encoding.datasets import get_segmentation_dataset
15 |
16 | def test_ade_dataset():
17 |
18 | def test_dataset(dataset_name):
19 | input_transform = transform.Compose([
20 | transform.ToTensor(),
21 | transform.Normalize([.485, .456, .406], [.229, .224, .225])])
22 | trainset = get_segmentation_dataset(dataset_name, split='val', mode='train',
23 | transform=input_transform)
24 | trainloader = data.DataLoader(trainset, batch_size=16,
25 | drop_last=True, shuffle=True)
26 | tbar = tqdm(trainloader)
27 | max_label = -10
28 | for i, (image, target) in enumerate(tbar):
29 | tmax = target.max().item()
30 | tmin = target.min().item()
31 | assert(tmin >= -1)
32 | if tmax > max_label:
33 | max_label = tmax
34 | assert(max_label < trainset.NUM_CLASS)
35 | tbar.set_description("Batch %d, max label %d"%(i, max_label))
36 | test_dataset('ade20k')
37 |
38 | if __name__ == "__main__":
39 | import nose
40 | nose.runmodule()
41 |
--------------------------------------------------------------------------------
/tests/unit_test/test_function.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## ECE Department, Rutgers University
4 | ## Email: zhang.hang@rutgers.edu
5 | ## Copyright (c) 2017
6 | ##
7 | ## This source code is licensed under the MIT-style license found in the
8 | ## LICENSE file in the root directory of this source tree
9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |
11 | import numpy as np
12 | import torch
13 | from torch.autograd import Variable, gradcheck
14 | import encoding
15 |
16 | EPS = 1e-3
17 | ATOL = 1e-3
18 |
19 | def _assert_tensor_close(a, b, atol=ATOL, rtol=EPS):
20 | npa, npb = a.cpu().numpy(), b.cpu().numpy()
21 | assert np.allclose(npa, npb, rtol=rtol, atol=atol), \
22 | 'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format(
23 | a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max())
24 |
25 | def test_aggregate():
26 | B,N,K,D = 2,3,4,5
27 | A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5),
28 | requires_grad=True)
29 | X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5),
30 | requires_grad=True)
31 | C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5),
32 | requires_grad=True)
33 | input = (A, X, C)
34 | test = gradcheck(encoding.functions.aggregate, input, eps=EPS, atol=ATOL)
35 | print('Testing aggregate(): {}'.format(test))
36 |
37 | def test_scaled_l2():
38 | B,N,K,D = 2,3,4,5
39 | X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5),
40 | requires_grad=True)
41 | C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5),
42 | requires_grad=True)
43 | S = Variable(torch.cuda.DoubleTensor(K).uniform_(-0.5,0.5),
44 | requires_grad=True)
45 | input = (X, C, S)
46 | test = gradcheck(encoding.functions.scaled_l2, input, eps=EPS, atol=ATOL)
47 | print('Testing scaled_l2(): {}'.format(test))
48 |
49 |
50 | def test_moments():
51 | B,C,H = 2,3,4
52 | X = Variable(torch.cuda.DoubleTensor(B,C,H).uniform_(-0.5,0.5),
53 | requires_grad=True)
54 | input = (X,)
55 | test = gradcheck(encoding.functions.moments, input, eps=EPS, atol=ATOL)
56 | print('Testing moments(): {}'.format(test))
57 |
58 | def test_non_max_suppression():
59 | def _test_nms(cuda):
60 | # check a small test case
61 | boxes = torch.Tensor([
62 | [[10.2, 23., 50., 20.],
63 | [11.3, 23., 52., 20.1],
64 | [23.2, 102.3, 23.3, 50.3],
65 | [101.2, 32.4, 70.6, 70.],
66 | [100.2, 30.9, 70.7, 69.]],
67 | [[200.3, 234., 530., 320.],
68 | [110.3, 223., 152., 420.1],
69 | [243.2, 240.3, 50.3, 30.3],
70 | [243.2, 236.4, 48.6, 30.],
71 | [100.2, 310.9, 170.7, 691.]]])
72 |
73 | scores = torch.Tensor([
74 | [0.9, 0.7, 0.11, 0.23, 0.8],
75 | [0.13, 0.89, 0.45, 0.23, 0.3]])
76 |
77 | if cuda:
78 | boxes = boxes.cuda()
79 | scores = scores.cuda()
80 |
81 | expected_output = (
82 | torch.ByteTensor(
83 | [[1, 1, 0, 0, 1], [1, 1, 1, 0, 1]]),
84 | torch.LongTensor(
85 | [[0, 4, 1, 3, 2], [1, 2, 4, 3, 0]])
86 | )
87 |
88 | mask, inds = encoding.functions.NonMaxSuppression(boxes, scores, 0.7)
89 | _assert_tensor_close(mask, expected_output[0])
90 | _assert_tensor_close(inds, expected_output[1])
91 |
92 | _test_nms(False)
93 | _test_nms(True)
94 |
95 | if __name__ == '__main__':
96 | import nose
97 | nose.runmodule()
98 |
--------------------------------------------------------------------------------
/tests/unit_test/test_model.py:
--------------------------------------------------------------------------------
1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 | ## Created by: Hang Zhang
3 | ## Email: zhanghang0704@gmail.com
4 | ## Copyright (c) 2020
5 | ##
6 | ## This source code is licensed under the MIT-style license found in the
7 | ## LICENSE file in the root directory of this source tree
8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 |
10 | import torch
11 | import encoding
12 |
13 | def test_model_inference():
14 | x = torch.rand(1, 3, 224, 224)
15 | for model_name in encoding.models.pretrained_model_list():
16 | print('Doing: ', model_name)
17 | if 'wideresnet' in model_name: continue # need multi-gpu
18 | model = encoding.models.get_model(model_name, pretrained=True)
19 | model.eval()
20 | y = model(x)
21 |
22 | if __name__ == "__main__":
23 | import nose
24 | nose.runmodule()
25 |
--------------------------------------------------------------------------------
/tests/unit_test/test_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | from encoding.utils.metrics import *
4 |
5 | def test_segmentation_metrics():
6 | # check torch evaluation metrics
7 | rows, cols = 640, 480
8 | nclass = 30
9 | # numpy data
10 | im_lab = np.matrix(np.random.randint(0, nclass, size=(rows, cols)))
11 | mask = np.random.random((nclass, rows, cols))
12 | im_pred = mask.argmax(axis=0)
13 | # torch data
14 | tim_lab = torch.from_numpy(im_lab).unsqueeze(0).long()
15 | tim_pred = torch.from_numpy(mask).unsqueeze(0)
16 | # numpy prediction
17 | pixel_correct, pixel_labeled = pixel_accuracy(im_pred, im_lab)
18 | area_inter, area_union = intersection_and_union(im_pred, im_lab, nclass)
19 | pixAcc = 1.0 * pixel_correct / (np.spacing(1) + pixel_labeled)
20 | IoU = 1.0 * area_inter / (np.spacing(1) + area_union)
21 | mIoU = IoU.mean()
22 | print('numpy predictionis :', pixAcc, mIoU)
23 | # torch metric prediction
24 | pixel_correct, pixel_labeled = batch_pix_accuracy(tim_pred, tim_lab)
25 | area_inter, area_union = batch_intersection_union(tim_pred, tim_lab, nclass)
26 | batch_pixAcc = 1.0 * pixel_correct / (np.spacing(1) + pixel_labeled)
27 | IoU = 1.0 * area_inter / (np.spacing(1) + area_union)
28 | batch_mIoU = IoU.mean()
29 | print('torch predictionis :', batch_pixAcc, batch_mIoU)
30 | assert (batch_pixAcc - pixAcc) < 1e-3
31 | assert (batch_mIoU - mIoU) < 1e-3
32 |
--------------------------------------------------------------------------------
/torch_encoding.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 2.1
2 | Name: torch-encoding
3 | Version: 1.2.2b20200814
4 | Summary: PyTorch Encoding Package
5 | Home-page: https://github.com/zhanghang1989/PyTorch-Encoding
6 | Author: Hang Zhang
7 | Author-email: zhanghang0704@gmail.com
8 | License: MIT
9 | Description: [](https://pypi.python.org/pypi/torch-encoding)
10 | [](https://pypi.org/project/torch-encoding/#history)
11 | [](https://github.com/zhanghang1989/PyTorch-Encoding/actions)
12 | [](http://pepy.tech/project/torch-encoding)
13 | [](https://opensource.org/licenses/MIT)
14 | [](https://github.com/zhanghang1989/PyTorch-Encoding/actions)
15 | [](https://github.com/zhanghang1989/PyTorch-Encoding/actions)
16 |
17 | [](https://paperswithcode.com/sota/semantic-segmentation-on-ade20k?p=resnest-split-attention-networks)
18 | [](https://paperswithcode.com/sota/semantic-segmentation-on-pascal-context?p=resnest-split-attention-networks)
19 |
20 | # PyTorch-Encoding
21 |
22 | created by [Hang Zhang](http://hangzh.com/)
23 |
24 | ## [Documentation](http://hangzh.com/PyTorch-Encoding/)
25 |
26 | - Please visit the [**Docs**](http://hangzh.com/PyTorch-Encoding/) for detail instructions of installation and usage.
27 |
28 | - Please visit the [link](http://hangzh.com/PyTorch-Encoding/model_zoo/imagenet.html) to image classification models.
29 |
30 | - Please visit the [link](http://hangzh.com/PyTorch-Encoding/model_zoo/segmentation.html) to semantic segmentation models.
31 |
32 | ## Citations
33 |
34 | **ResNeSt: Split-Attention Networks** [[arXiv]]()
35 | [Hang Zhang](http://hangzh.com/), Chongruo Wu, Zhongyue Zhang, Yi Zhu, Zhi Zhang, Haibin Lin, Yue Sun, Tong He, Jonas Muller, R. Manmatha, Mu Li and Alex Smola
36 | ```
37 | @article{zhang2020resnest,
38 | title={ResNeSt: Split-Attention Networks},
39 | author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
40 | journal={arXiv preprint},
41 | year={2020}
42 | }
43 | ```
44 |
45 | **Context Encoding for Semantic Segmentation** [[arXiv]](https://arxiv.org/pdf/1803.08904.pdf)
46 | [Hang Zhang](http://hangzh.com/), [Kristin Dana](http://eceweb1.rutgers.edu/vision/dana.html), [Jianping Shi](http://shijianping.me/), [Zhongyue Zhang](http://zhongyuezhang.com/), [Xiaogang Wang](http://www.ee.cuhk.edu.hk/~xgwang/), [Ambrish Tyagi](https://scholar.google.com/citations?user=GaSWCoUAAAAJ&hl=en), [Amit Agrawal](http://www.amitkagrawal.com/)
47 | ```
48 | @InProceedings{Zhang_2018_CVPR,
49 | author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit},
50 | title = {Context Encoding for Semantic Segmentation},
51 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
52 | month = {June},
53 | year = {2018}
54 | }
55 | ```
56 |
57 | **Deep TEN: Texture Encoding Network** [[arXiv]](https://arxiv.org/pdf/1612.02844.pdf)
58 | [Hang Zhang](http://hangzh.com/), [Jia Xue](http://jiaxueweb.com/), [Kristin Dana](http://eceweb1.rutgers.edu/vision/dana.html)
59 | ```
60 | @InProceedings{Zhang_2017_CVPR,
61 | author = {Zhang, Hang and Xue, Jia and Dana, Kristin},
62 | title = {Deep TEN: Texture Encoding Network},
63 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
64 | month = {July},
65 | year = {2017}
66 | }
67 | ```
68 |
69 | Platform: UNKNOWN
70 | Description-Content-Type: text/markdown
71 |
--------------------------------------------------------------------------------
/torch_encoding.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | README.md
2 | setup.cfg
3 | setup.py
4 | encoding/__init__.py
5 | encoding/parallel.py
6 | encoding/version.py
7 | encoding/datasets/__init__.py
8 | encoding/datasets/ade20k.py
9 | encoding/datasets/base.py
10 | encoding/datasets/cityscapes.py
11 | encoding/datasets/cityscapes_v0.py
12 | encoding/datasets/cityscapescoarse.py
13 | encoding/datasets/coco.py
14 | encoding/datasets/folder.py
15 | encoding/datasets/hpw18.py
16 | encoding/datasets/imagenet.py
17 | encoding/datasets/minc.py
18 | encoding/datasets/pascal_aug.py
19 | encoding/datasets/pascal_voc.py
20 | encoding/datasets/pcontext.py
21 | encoding/functions/__init__.py
22 | encoding/functions/customize.py
23 | encoding/functions/dist_syncbn.py
24 | encoding/functions/encoding.py
25 | encoding/functions/rectify.py
26 | encoding/functions/syncbn.py
27 | encoding/lib/__init__.py
28 | encoding/lib/cpu/encoding_cpu.cpp
29 | encoding/lib/cpu/nms_cpu.cpp
30 | encoding/lib/cpu/operator.cpp
31 | encoding/lib/cpu/operator.h
32 | encoding/lib/cpu/rectify_cpu.cpp
33 | encoding/lib/cpu/roi_align_cpu.cpp
34 | encoding/lib/cpu/syncbn_cpu.cpp
35 | encoding/lib/gpu/activation_kernel.cu
36 | encoding/lib/gpu/common.h
37 | encoding/lib/gpu/device_tensor.h
38 | encoding/lib/gpu/encoding_kernel.cu
39 | encoding/lib/gpu/lib_ssd.cu
40 | encoding/lib/gpu/nms_kernel.cu
41 | encoding/lib/gpu/operator.cpp
42 | encoding/lib/gpu/operator.h
43 | encoding/lib/gpu/rectify_cuda.cu
44 | encoding/lib/gpu/roi_align_kernel.cu
45 | encoding/lib/gpu/syncbn_kernel.cu
46 | encoding/models/__init__.py
47 | encoding/models/deepten.py
48 | encoding/models/model_store.py
49 | encoding/models/model_zoo.py
50 | encoding/models/backbone/__init__.py
51 | encoding/models/backbone/resnest.py
52 | encoding/models/backbone/resnet.py
53 | encoding/models/backbone/resnet_variants.py
54 | encoding/models/backbone/resnext.py
55 | encoding/models/backbone/wideresnet.py
56 | encoding/models/backbone/xception.py
57 | encoding/models/sseg/__init__.py
58 | encoding/models/sseg/atten.py
59 | encoding/models/sseg/base.py
60 | encoding/models/sseg/danet.py
61 | encoding/models/sseg/deeplab.py
62 | encoding/models/sseg/dran.py
63 | encoding/models/sseg/encnet.py
64 | encoding/models/sseg/fcfpn.py
65 | encoding/models/sseg/fcn.py
66 | encoding/models/sseg/psp.py
67 | encoding/models/sseg/upernet.py
68 | encoding/nn/__init__.py
69 | encoding/nn/attention.py
70 | encoding/nn/customize.py
71 | encoding/nn/da_att.py
72 | encoding/nn/dran_att.py
73 | encoding/nn/dropblock.py
74 | encoding/nn/encoding.py
75 | encoding/nn/loss.py
76 | encoding/nn/rectify.py
77 | encoding/nn/splat.py
78 | encoding/nn/syncbn.py
79 | encoding/transforms/__init__.py
80 | encoding/transforms/autoaug.py
81 | encoding/transforms/get_transform.py
82 | encoding/transforms/transforms.py
83 | encoding/utils/__init__.py
84 | encoding/utils/dist_helper.py
85 | encoding/utils/files.py
86 | encoding/utils/lr_scheduler.py
87 | encoding/utils/metrics.py
88 | encoding/utils/misc.py
89 | encoding/utils/pallete.py
90 | encoding/utils/precise_bn.py
91 | encoding/utils/presets.py
92 | encoding/utils/train_helper.py
93 | torch_encoding.egg-info/PKG-INFO
94 | torch_encoding.egg-info/SOURCES.txt
95 | torch_encoding.egg-info/dependency_links.txt
96 | torch_encoding.egg-info/requires.txt
97 | torch_encoding.egg-info/top_level.txt
--------------------------------------------------------------------------------
/torch_encoding.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/torch_encoding.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | tqdm
3 | nose
4 | portalocker
5 | torch>=1.4.0
6 | torchvision>=0.5.0
7 | Pillow
8 | scipy
9 | requests
10 |
--------------------------------------------------------------------------------
/torch_encoding.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | encoding
2 |
--------------------------------------------------------------------------------