├── .editorconfig ├── .github └── workflows │ ├── pypi.yml │ ├── pypi_release.yml │ ├── sphix_build_master.yml │ ├── sphix_build_pr.yml │ └── unit_test.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── dist ├── torch_encoding-1.2.1b20200708-py3.6.egg ├── torch_encoding-1.2.2b20200707-py3.7.egg ├── torch_encoding-1.2.2b20200708-py3.6.egg ├── torch_encoding-1.2.2b20200708-py3.7.egg ├── torch_encoding-1.2.2b20200709-py3.6.egg ├── torch_encoding-1.2.2b20200725-py3.6.egg ├── torch_encoding-1.2.2b20200725-py3.7.egg ├── torch_encoding-1.2.2b20200801-py3.6.egg ├── torch_encoding-1.2.2b20200802-py3.6.egg └── torch_encoding-1.2.2b20200814-py3.6.egg ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── _static │ ├── css │ │ └── encoding.css │ ├── img │ │ ├── EncNet32k128d.svg │ │ ├── cvpr17.svg │ │ ├── deep_ten_curve.svg │ │ ├── favicon.png │ │ ├── figure1.jpg │ │ ├── icon.png │ │ ├── myimage.gif │ │ └── upconv.png │ ├── js │ │ └── hidebib.js │ └── theme_overrides.css │ ├── _templates │ └── layout.html │ ├── conf.py │ ├── custom_directives.py │ ├── index.rst │ ├── model_zoo │ ├── imagenet.rst │ └── segmentation.rst │ ├── nn.rst │ ├── notes │ └── compile.rst │ ├── parallel.rst │ ├── tutorials │ ├── style.rst │ ├── syncbn.rst │ └── texture.rst │ └── utils.rst ├── encoding ├── __init__.py ├── datasets │ ├── __init__.py │ ├── ade20k.py │ ├── base.py │ ├── cityscapes.py │ ├── cityscapes_v0.py │ ├── cityscapescoarse.py │ ├── coco.py │ ├── folder.py │ ├── hpw18.py │ ├── imagenet.py │ ├── minc.py │ ├── pascal_aug.py │ ├── pascal_voc.py │ └── pcontext.py ├── functions │ ├── __init__.py │ ├── customize.py │ ├── dist_syncbn.py │ ├── encoding.py │ ├── rectify.py │ └── syncbn.py ├── lib │ ├── __init__.py │ ├── cpu │ │ ├── encoding_cpu.cpp │ │ ├── nms_cpu.cpp │ │ ├── operator.cpp │ │ ├── operator.h │ │ ├── rectify_cpu.cpp │ │ ├── roi_align_cpu.cpp │ │ ├── setup.py │ │ └── syncbn_cpu.cpp │ └── gpu │ │ ├── activation_kernel.cu │ │ ├── common.h │ │ ├── device_tensor.h │ │ ├── encoding_kernel.cu │ │ ├── lib_ssd.cu │ │ ├── nms_kernel.cu │ │ ├── operator.cpp │ │ ├── operator.h │ │ ├── rectify_cuda.cu │ │ ├── roi_align_kernel.cu │ │ ├── setup.py │ │ └── syncbn_kernel.cu ├── models │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── resnest.py │ │ ├── resnet.py │ │ ├── resnet_variants.py │ │ ├── resnext.py │ │ ├── wideresnet.py │ │ └── xception.py │ ├── deepten.py │ ├── model_store.py │ ├── model_zoo.py │ └── sseg │ │ ├── __init__.py │ │ ├── atten.py │ │ ├── base.py │ │ ├── danet.py │ │ ├── deeplab.py │ │ ├── dran.py │ │ ├── encnet.py │ │ ├── fcfpn.py │ │ ├── fcn.py │ │ ├── psp.py │ │ └── upernet.py ├── nn │ ├── __init__.py │ ├── attention.py │ ├── customize.py │ ├── da_att.py │ ├── dran_att.py │ ├── dropblock.py │ ├── encoding.py │ ├── loss.py │ ├── rectify.py │ ├── splat.py │ └── syncbn.py ├── parallel.py ├── transforms │ ├── __init__.py │ ├── autoaug.py │ ├── get_transform.py │ └── transforms.py └── utils │ ├── __init__.py │ ├── dist_helper.py │ ├── files.py │ ├── lr_scheduler.py │ ├── metrics.py │ ├── misc.py │ ├── pallete.py │ ├── precise_bn.py │ ├── presets.py │ └── train_helper.py ├── experiments ├── recognition │ ├── README.md │ ├── resnet50_baseline.sh │ ├── test_flops.py │ ├── train_dist.py │ └── verify.py └── segmentation │ ├── demo.py │ ├── model_mapping.py │ ├── test.py │ ├── test.sh │ ├── test_danet.sh │ ├── test_models.py │ ├── train.py │ └── train_dist.py ├── img ├── overview.jpg ├── overview.png ├── tab3.jpg └── tab3.png ├── scripts ├── prepare_ade20k.py ├── prepare_citys.py ├── prepare_coco.py ├── prepare_imagenet.py ├── prepare_minc.py ├── prepare_pascal.py └── prepare_pcontext.py ├── setup.cfg ├── setup.py ├── tests ├── lint.py ├── pylintrc └── unit_test │ ├── test_dataset.py │ ├── test_function.py │ ├── test_model.py │ ├── test_module.py │ └── test_utils.py └── torch_encoding.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── requires.txt └── top_level.txt /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | schedule: 8 | - cron: "0 12 * * *" 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-18.04 14 | 15 | steps: 16 | - uses: actions/checkout@master 17 | - name: Set up Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: '3.7' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine pypandoc 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets. PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets. PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* --verbose 32 | -------------------------------------------------------------------------------- /.github/workflows/pypi_release.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Pypi Release 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-18.04 14 | 15 | steps: 16 | - uses: actions/checkout@master 17 | - name: Set up Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: '3.7' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine pypandoc 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.pypi_username }} 28 | TWINE_PASSWORD: ${{ secrets.pypi_password }} 29 | RELEASE: 1 30 | run: | 31 | python setup.py sdist bdist_wheel 32 | twine upload dist/* --verbose 33 | -------------------------------------------------------------------------------- /.github/workflows/sphix_build_master.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Build Docs 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | 10 | jobs: 11 | docs: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v1 15 | - uses: seanmiddleditch/gha-setup-ninja@master 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: 3.7 21 | 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install numpy -I 26 | pip install pytest torch 27 | 28 | - name: Install package 29 | run: | 30 | pip install -e . 31 | 32 | - name: Install Sphix Dependencies 33 | run: | 34 | cd docs/ 35 | pip install -r requirements.txt 36 | 37 | - name: Build Sphinx docs 38 | run: | 39 | cd docs/ 40 | make html 41 | touch build/html/.nojekyll 42 | 43 | # https://github.com/marketplace/actions/github-pages 44 | - name: Deploy 45 | if: success() 46 | uses: crazy-max/ghaction-github-pages@v1 47 | env: 48 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 49 | with: 50 | build_dir: docs/build/html/ 51 | target_branch: gh-pages 52 | -------------------------------------------------------------------------------- /.github/workflows/sphix_build_pr.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Build Docs 5 | 6 | on: 7 | pull_request: 8 | branches: [ master ] 9 | 10 | jobs: 11 | docs: 12 | runs-on: self-hosted 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: seanmiddleditch/gha-setup-ninja@master 16 | 17 | - name: Set PR Number 18 | uses: actions/github-script@0.3.0 19 | with: 20 | github-token: ${{github.token}} 21 | script: | 22 | const core = require('@actions/core') 23 | const prNumber = context.payload.number; 24 | core.exportVariable('PULL_NUMBER', prNumber); 25 | core.exportVariable("PATH", "/home/ubuntu/anaconda3/bin:/usr/local/bin:/usr/bin/:/bin:$PATH") 26 | 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install numpy -I 31 | pip install pytest torch 32 | 33 | - name: Install package 34 | run: | 35 | pip install -e . 36 | 37 | - name: Install Sphix Dependencies 38 | run: | 39 | cd docs/ 40 | pip install -r requirements.txt 41 | 42 | - name: Build Sphinx docs 43 | run: | 44 | cd docs/ 45 | make html 46 | touch build/html/.nojekyll 47 | aws s3 sync build/html/ s3://hangzh/encoding/docs/${{ env.PULL_NUMBER }}/ --acl public-read --follow-symlinks --delete 48 | 49 | - name: Comment 50 | if: success() 51 | uses: thollander/actions-comment-pull-request@master 52 | with: 53 | message: "The docs are uploaded and can be previewed at http://hangzh.s3.amazonaws.com/encoding/docs/${{ env.PULL_NUMBER }}/index.html" 54 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 55 | -------------------------------------------------------------------------------- /.github/workflows/unit_test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Unit Test 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: self-hosted 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - uses: seanmiddleditch/gha-setup-ninja@master 20 | 21 | - name: Set up Python 22 | uses: actions/github-script@0.3.0 23 | with: 24 | github-token: ${{github.token}} 25 | script: | 26 | const core = require('@actions/core') 27 | core.exportVariable("PATH", "/home/ubuntu/anaconda3/bin:/usr/local/bin:/usr/bin/:/bin:$PATH") 28 | 29 | - name: Install package 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install -e . 33 | 34 | - name: Run pytest 35 | run: | 36 | pip install nose 37 | nosetests -v tests/unit_test/ 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.swp 3 | *.pyc 4 | version.py 5 | build/ 6 | data/ 7 | docs/src/ 8 | docs/html/ 9 | encoding/_ext/ 10 | encoding.egg-info/ 11 | *.o 12 | *.so 13 | *.ninja* 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017- Hang Zhang. All rights reserved. 4 | Copyright (c) 2018- Amazon.com, Inc. or its affiliates. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR = $(CURDIR) 2 | 3 | lint: cpplint pylint 4 | 5 | cpplint: 6 | tests/lint.py encoding cpp src kernel 7 | 8 | pylint: 9 | pylint --rcfile=$(ROOTDIR)/tests/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" encoding --ignore=_ext 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Dual Attention Network for Scene Segmentation(CVPR2019)](https://arxiv.org/pdf/1809.02983.pdf) 2 | 3 | [Jun Fu](https://scholar.google.com/citations?user=h3vzrgkAAAAJ&hl=zh-CN), [Jing Liu](http://www.nlpr.ia.ac.cn/iva/liujing/index.html), [Haijie Tian](https://github.com/tianhaijie), [Yong Li](http://www.foreverlee.net/), Yongjun Bao, Zhiwei Fang,and Hanqing Lu 4 | 5 | ## Introduction 6 | 7 | We propose a Dual Attention Network (DANet) to adaptively integrate local features with their global dependencies based on the self-attention mechanism. And we achieve new state-of-the-art segmentation performance on three challenging scene segmentation datasets, i.e., Cityscapes, PASCAL Context and COCO Stuff-10k dataset. 8 | 9 | ![image](img/overview.png) 10 | 11 | ## Cityscapes testing set result 12 | 13 | We train our DANet-101 with only fine annotated data and submit our test results to the official evaluation server. 14 | 15 | ![image](img/tab3.png) 16 | 17 | ## Updates 18 | 19 | **2020/9**:**Renew the code**, which supports **Pytorch 1.4.0** or later! 20 | 21 | 2020/8:The new TNNLS version DRANet achieves [**82.9%**](https://www.cityscapes-dataset.com/method-details/?submissionID=4792) on Cityscapes test set (submit the result on August, 2019), which is a new state-of-the-arts performance with only using fine annotated dataset and Resnet-101. The code will be released in [DRANet](). 22 | 23 | 2020/7:DANet is supported on [MMSegmentation](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet), in which DANet achieves **80.47%** with single scale testing and **82.02%** with multi-scale testing on Cityscapes val set. 24 | 25 | 2018/9:DANet released. The trained model with ResNet101 achieves 81.5% on Cityscapes test set. 26 | 27 | ## Usage 28 | 29 | 1. Install pytorch 30 | 31 | - The code is tested on python3.6 and torch 1.4.0. 32 | - The code is modified from [PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding). 33 | 34 | 2. Clone the resposity 35 | 36 | ```shell 37 | git clone https://github.com/junfu1115/DANet.git 38 | cd DANet 39 | python setup.py install 40 | ``` 41 | 42 | 3. Dataset 43 | - Download the [Cityscapes](https://www.cityscapes-dataset.com/) dataset. 44 | - Please put dataset in folder `./datasets` 45 | 46 | 4. Evaluation for DANet 47 | 48 | - Download trained model [DANet101](https://drive.google.com/open?id=1XmpFEF-tbPH0Rmv4eKRxYJngr3pTbj6p) and put it in folder `./experiments/segmentation/models/` 49 | 50 | - `cd ./experiments/segmentation/` 51 | 52 | - For single scale testing, please run: 53 | 54 | - ```shell 55 | CUDA_VISIBLE_DEVICES=0,1,2,3 python test.py --dataset citys --model danet --backbone resnet101 --resume models/DANet101.pth.tar --eval --base-size 2048 --crop-size 768 --workers 1 --multi-grid --multi-dilation 4 8 16 --os 8 --aux --no-deepstem 56 | ``` 57 | 58 | - Evaluation Result 59 | 60 | The expected scores will show as follows: DANet101 on cityscapes val set (mIoU/pAcc): **79.93/95.97**(ss) 61 | 62 | 5. Evaluation for DRANet 63 | 64 | - Download trained model [DRANet101](https://drive.google.com/file/d/1xCl2N0b0rVFH4y30HCGfy7RY3-ars7Ce/view?usp=sharing) and put it in folder `./experiments/segmentation/models/` 65 | 66 | - Evaluation code is in folder `./experiments/segmentation/` 67 | 68 | - `cd ./experiments/segmentation/` 69 | 70 | - For single scale testing, please run: 71 | 72 | - ```shell 73 | CUDA_VISIBLE_DEVICES=0,1,2,3 python test.py --dataset citys --model dran --backbone resnet101 --resume  models/dran101.pth.tar --eval --base-size 2048 --crop-size 768 --workers 1 --multi-grid --multi-dilation 4 8 16 --os 8 --aux 74 | ``` 75 | 76 | - Evaluation Result 77 | 78 | The expected scores will show as follows: DRANet101 on cityscapes val set (mIoU/pAcc): **81.63/96.62** (ss) 79 | 80 | ## Citation 81 | 82 | if you find DANet and DRANet useful in your research, please consider citing: 83 | 84 | ``` 85 | @article{fu2020scene, 86 | title={Scene Segmentation With Dual Relation-Aware Attention Network}, 87 | author={Fu, Jun and Liu, Jing and Jiang, Jie and Li, Yong and Bao, Yongjun and Lu, Hanqing}, 88 | journal={IEEE Transactions on Neural Networks and Learning Systems}, 89 | year={2020}, 90 | publisher={IEEE} 91 | } 92 | ``` 93 | 94 | ``` 95 | @inproceedings{fu2019dual, 96 | title={Dual attention network for scene segmentation}, 97 | author={Fu, Jun and Liu, Jing and Tian, Haijie and Li, Yong and Bao, Yongjun and Fang, Zhiwei and Lu, Hanqing}, 98 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 99 | pages={3146--3154}, 100 | year={2019} 101 | } 102 | ``` 103 | 104 | 105 | 106 | ## Acknowledgement 107 | 108 | Thanks [PyTorch-Encoding](https://github.com/zhanghang1989/PyTorch-Encoding), especially the Synchronized BN! 109 | -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.1b20200708-py3.6.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.1b20200708-py3.6.egg -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.2b20200707-py3.7.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200707-py3.7.egg -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.2b20200708-py3.6.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200708-py3.6.egg -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.2b20200708-py3.7.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200708-py3.7.egg -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.2b20200709-py3.6.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200709-py3.6.egg -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.2b20200725-py3.6.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200725-py3.6.egg -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.2b20200725-py3.7.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200725-py3.7.egg -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.2b20200801-py3.6.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200801-py3.6.egg -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.2b20200802-py3.6.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200802-py3.6.egg -------------------------------------------------------------------------------- /dist/torch_encoding-1.2.2b20200814-py3.6.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/dist/torch_encoding-1.2.2b20200814-py3.6.egg -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Encoding 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | docset: html 16 | doc2dash --name $(SPHINXPROJ) --icon $(SOURCEDIR)/_static/img/favicon.png --enable-js --online-redirect-url http://hangzh.com/PyTorch-Encoding/ --force $(BUILDDIR)/html/ 17 | 18 | # Manually fix because Zeal doesn't deal well with `icon.png`-only at 2x resolution. 19 | cp $(SPHINXPROJ).docset/icon.png $(SPHINXPROJ).docset/icon@2x.png 20 | convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png 21 | 22 | .PHONY: help Makefile docset 23 | 24 | # Catch-all target: route all unknown targets to Sphinx using the new 25 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 26 | %: Makefile 27 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 28 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=Encoding 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx-gallery 3 | sphinxcontrib-googleanalytics 4 | -e git://github.com/zhanghang1989/autorch_sphinx_theme.git#egg=autorch_sphinx_theme 5 | -------------------------------------------------------------------------------- /docs/source/_static/css/encoding.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; 3 | } 4 | 5 | /* Default header fonts are ugly */ 6 | h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption { 7 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; 8 | } 9 | 10 | /* Use white for docs background */ 11 | .wy-side-nav-search { 12 | background-color: #a0e2ff; 13 | } 14 | 15 | .wy-nav-content-wrap, .wy-menu li.current > a { 16 | background-color: #fff; 17 | } 18 | 19 | @media screen and (min-width: 1400px) { 20 | .wy-nav-content-wrap { 21 | background-color: rgba(0, 0, 0, 0.0470588); 22 | } 23 | 24 | .wy-nav-content { 25 | background-color: #fff; 26 | } 27 | } 28 | 29 | /* Fixes for mobile */ 30 | .wy-nav-top { 31 | background-color: #fff; 32 | background-repeat: no-repeat; 33 | background-position: center; 34 | padding: 0; 35 | margin: 0.4045em 0.809em; 36 | color: #333; 37 | } 38 | 39 | .wy-nav-top > a { 40 | display: none; 41 | } 42 | 43 | @media screen and (max-width: 768px) { 44 | .wy-side-nav-search>a img.logo { 45 | height: 60px; 46 | } 47 | } 48 | 49 | /* This is needed to ensure that logo above search scales properly */ 50 | .wy-side-nav-search a { 51 | display: block; 52 | } 53 | 54 | /* This ensures that multiple constructors will remain in separate lines. */ 55 | .rst-content dl:not(.docutils) dt { 56 | display: table; 57 | } 58 | 59 | /* Use our blue for literals */ 60 | .rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal { 61 | color: #4080bf; 62 | } 63 | 64 | .rst-content tt.xref, a .rst-content tt, .rst-content tt.xref, 65 | .rst-content code.xref, a .rst-content tt, a .rst-content code { 66 | color: #404040; 67 | } 68 | 69 | /* Change link colors (except for the menu) */ 70 | 71 | a { 72 | color: #4080bf; 73 | } 74 | 75 | a:hover { 76 | color: #4080bf; 77 | } 78 | 79 | 80 | a:visited { 81 | color: #306293; 82 | } 83 | 84 | .wy-menu a { 85 | color: #b3b3b3; 86 | } 87 | 88 | .wy-menu a:hover { 89 | color: #b3b3b3; 90 | } 91 | 92 | /* Default footer text is quite big */ 93 | footer { 94 | font-size: 80%; 95 | } 96 | 97 | footer .rst-footer-buttons { 98 | font-size: 125%; /* revert footer settings - 1/80% = 125% */ 99 | } 100 | 101 | footer p { 102 | font-size: 100%; 103 | } 104 | 105 | /* For hidden headers that appear in TOC tree */ 106 | /* see http://stackoverflow.com/a/32363545/3343043 107 | */ 108 | .rst-content .hidden-section { 109 | display: none; 110 | } 111 | 112 | nav .hidden-section { 113 | display: inherit; 114 | } 115 | 116 | .wy-side-nav-search>div.version { 117 | color: #000; 118 | } 119 | -------------------------------------------------------------------------------- /docs/source/_static/img/cvpr17.svg: -------------------------------------------------------------------------------- 1 | encodingInputDictionaryResidualsAssignAggregateEncoding-Layer -------------------------------------------------------------------------------- /docs/source/_static/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/favicon.png -------------------------------------------------------------------------------- /docs/source/_static/img/figure1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/figure1.jpg -------------------------------------------------------------------------------- /docs/source/_static/img/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/icon.png -------------------------------------------------------------------------------- /docs/source/_static/img/myimage.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/myimage.gif -------------------------------------------------------------------------------- /docs/source/_static/img/upconv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/docs/source/_static/img/upconv.png -------------------------------------------------------------------------------- /docs/source/_static/js/hidebib.js: -------------------------------------------------------------------------------- 1 | // adapted from: http://www.robots.ox.ac.uk/~vedaldi/assets/hidebib.js 2 | function hideallbibs() 3 | { 4 | var el = document.getElementsByTagName("div") ; 5 | for (var i = 0 ; i < el.length ; ++i) { 6 | if (el[i].className == "paper") { 7 | var bib = el[i].getElementsByTagName("pre") ; 8 | if (bib.length > 0) { 9 | bib [0] .style.display = 'none' ; 10 | } 11 | } 12 | } 13 | } 14 | 15 | function togglebib(paperid) 16 | { 17 | var paper = document.getElementById(paperid) ; 18 | var bib = paper.getElementsByTagName('pre') ; 19 | if (bib.length > 0) { 20 | if (bib [0] .style.display == 'none') { 21 | bib [0] .style.display = 'block' ; 22 | } else { 23 | bib [0] .style.display = 'none' ; 24 | } 25 | } 26 | } 27 | 28 | function toggleblock(blockId) 29 | { 30 | var block = document.getElementById(blockId); 31 | if (block.style.display == 'none') { 32 | block.style.display = 'block' ; 33 | } else { 34 | block.style.display = 'none' ; 35 | } 36 | } 37 | 38 | function hideblock(blockId) 39 | { 40 | var block = document.getElementById(blockId); 41 | block.style.display = 'none' ; 42 | } 43 | -------------------------------------------------------------------------------- /docs/source/_static/theme_overrides.css: -------------------------------------------------------------------------------- 1 | /* override table width restrictions */ 2 | @media screen and (min-width: 767px) { 3 | 4 | .wy-table-responsive table td { 5 | /* !important prevents the common CSS stylesheets from overriding 6 | this as on RTD they are loaded after this stylesheet */ 7 | white-space: normal !important; 8 | } 9 | 10 | .wy-table-responsive { 11 | overflow: visible !important; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /docs/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {%- block extrahead %} 4 | 5 | 6 | 7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Encoding documentation master file 2 | 3 | :github_url: https://github.com/zhanghang1989/PyTorch-Encoding 4 | 5 | Encoding Documentation 6 | ====================== 7 | 8 | Created by `Hang Zhang `_ 9 | 10 | An optimized PyTorch package with CUDA backend. 11 | 12 | 13 | .. toctree:: 14 | :glob: 15 | :maxdepth: 1 16 | :caption: Installation 17 | 18 | notes/* 19 | 20 | .. toctree:: 21 | :glob: 22 | :maxdepth: 1 23 | :caption: Model Zoo 24 | 25 | model_zoo/* 26 | 27 | .. toctree:: 28 | :glob: 29 | :maxdepth: 1 30 | :caption: Other Tutorials 31 | 32 | tutorials/* 33 | 34 | .. toctree:: 35 | :maxdepth: 1 36 | :caption: Package Reference 37 | 38 | nn 39 | parallel 40 | utils 41 | 42 | Indices and tables 43 | ================== 44 | 45 | * :ref:`genindex` 46 | * :ref:`modindex` 47 | -------------------------------------------------------------------------------- /docs/source/nn.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | encoding.nn 5 | =========== 6 | 7 | Customized NN modules in Encoding Package. For Synchronized Cross-GPU Batch Normalization, please visit :class:`encoding.nn.BatchNorm2d`. 8 | 9 | .. currentmodule:: encoding.nn 10 | 11 | :hidden:`Encoding` 12 | ~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: Encoding 15 | :members: 16 | 17 | :hidden:`DistSyncBatchNorm` 18 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 | 20 | .. autoclass:: DistSyncBatchNorm 21 | :members: 22 | 23 | :hidden:`SyncBatchNorm` 24 | ~~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | .. autoclass:: SyncBatchNorm 27 | :members: 28 | 29 | :hidden:`BatchNorm1d` 30 | ~~~~~~~~~~~~~~~~~~~~~~~~ 31 | 32 | .. autoclass:: BatchNorm1d 33 | :members: 34 | 35 | :hidden:`BatchNorm2d` 36 | ~~~~~~~~~~~~~~~~~~~~~~~~ 37 | 38 | .. autoclass:: BatchNorm2d 39 | :members: 40 | 41 | :hidden:`BatchNorm3d` 42 | ~~~~~~~~~~~~~~~~~~~~~~~~ 43 | 44 | .. autoclass:: BatchNorm3d 45 | :members: 46 | 47 | :hidden:`Inspiration` 48 | ~~~~~~~~~~~~~~~~~~~~~ 49 | 50 | .. autoclass:: Inspiration 51 | :members: 52 | 53 | :hidden:`UpsampleConv2d` 54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 55 | 56 | .. autoclass:: UpsampleConv2d 57 | :members: 58 | 59 | :hidden:`GramMatrix` 60 | ~~~~~~~~~~~~~~~~~~~~ 61 | 62 | .. autoclass:: GramMatrix 63 | :members: 64 | -------------------------------------------------------------------------------- /docs/source/notes/compile.rst: -------------------------------------------------------------------------------- 1 | Install and Citations 2 | ===================== 3 | 4 | 5 | Installation 6 | ------------ 7 | 8 | * Install PyTorch 1.4.0 by following the `PyTorch instructions `_. 9 | 10 | * PIP Install:: 11 | 12 | pip install torch-encoding --pre 13 | 14 | * Install from source:: 15 | 16 | git clone https://github.com/zhanghang1989/PyTorch-Encoding && cd PyTorch-Encoding 17 | python setup.py install 18 | 19 | 20 | Detailed Steps 21 | -------------- 22 | 23 | This tutorial is a sucessful setup example for AWS EC2 p3 instance with ubuntu 16.04, CUDA 10. 24 | We cannot guarantee it to work for all the machines, but the steps should be similar. 25 | Assuming CUDA and cudnn are already sucessfully installed, otherwise please refer to other tutorials. 26 | 27 | * Install Anaconda from the `link `_ . 28 | 29 | * Install ninja:: 30 | 31 | wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip 32 | sudo unzip ninja-linux.zip -d /usr/local/bin/ 33 | sudo update-alternatives --install /usr/bin/ninja ninja /usr/local/bin/ninja 1 --force 34 | 35 | * Install PyTorch:: 36 | 37 | conda install pytorch torchvision cudatoolkit=10.0 -c pytorch 38 | 39 | * Install this package:: 40 | 41 | pip install torch-encoding --pre 42 | 43 | Citations 44 | --------- 45 | 46 | .. note:: 47 | * Hang Zhang et al. "ResNeSt: Split-Attention Networks" *arXiv 2020*:: 48 | 49 | @article{zhang2020resnest, 50 | title={ResNeSt: Split-Attention Networks}, 51 | author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, 52 | journal={arXiv preprint arXiv:2004.08955}, 53 | year={2020} 54 | } 55 | 56 | * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*:: 57 | 58 | @InProceedings{Zhang_2018_CVPR, 59 | author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, 60 | title = {Context Encoding for Semantic Segmentation}, 61 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 62 | month = {June}, 63 | year = {2018} 64 | } 65 | 66 | 67 | * Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*:: 68 | 69 | @InProceedings{Zhang_2017_CVPR, 70 | author = {Zhang, Hang and Xue, Jia and Dana, Kristin}, 71 | title = {Deep TEN: Texture Encoding Network}, 72 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 73 | month = {July}, 74 | year = {2017} 75 | } 76 | -------------------------------------------------------------------------------- /docs/source/parallel.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | encoding.parallel 5 | ================= 6 | 7 | - Current PyTorch DataParallel Table is not supporting mutl-gpu loss calculation, which makes the gpu memory usage very in-balance. We address this issue here by doing DataParallel for Model & Criterion. 8 | 9 | .. note:: 10 | Deprecated, please use torch.nn.parallel.DistributedDataParallel with :class:`encoding.nn.DistSyncBatchNorm` for the best performance. 11 | 12 | .. automodule:: encoding.parallel 13 | .. currentmodule:: encoding.parallel 14 | 15 | :hidden:`DataParallelModel` 16 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 17 | 18 | .. autoclass:: DataParallelModel 19 | :members: 20 | 21 | :hidden:`DataParallelCriterion` 22 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 23 | 24 | .. autoclass:: DataParallelCriterion 25 | :members: 26 | 27 | 28 | :hidden:`allreduce` 29 | ~~~~~~~~~~~~~~~~~~~ 30 | 31 | .. autofunction:: allreduce 32 | -------------------------------------------------------------------------------- /docs/source/tutorials/syncbn.rst: -------------------------------------------------------------------------------- 1 | Implementing Synchronized Multi-GPU Batch Normalization 2 | ======================================================= 3 | 4 | In this tutorial, we discuss the implementation detail of Multi-GPU Batch Normalization (BN) (classic implementation: :class:`encoding.nn.BatchNorm2d`. We will provide the training example in a later version. 5 | 6 | How BN works? 7 | ------------- 8 | 9 | BN layer was introduced in the paper `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift `_, which dramatically speed up the training process of the network (enables larger learning rate) and makes the network less sensitive to the weight initialization. 10 | 11 | .. image:: http://hangzh.com/blog/images/bn1.png 12 | :align: center 13 | 14 | - Forward Pass: 15 | For the input data :math:`X={x_1, ...x_N}`, the data are normalized to be zero-mean and unit-variance, then scale and shift: 16 | 17 | .. math:: 18 | y_i = \gamma\cdot\frac{x_i-\mu}{\sigma} + \beta , 19 | 20 | where :math:`\mu=\frac{\sum_i^N x_i}{N} , \sigma = \sqrt{\frac{\sum_i^N (x_i-\mu)^2}{N}+\epsilon}` and :math:`\gamma, \beta` are the learnable parameters. 21 | 22 | - Backward Pass: 23 | For calculating the gradient :math:`\frac{d_\ell}{d_{x_i}}`, we need to consider the partial gradient from :math:`\frac{d_\ell}{d_y}` and the gradients from :math:`\frac{d_\ell}{d_\mu}` and :math:`\frac{d_\ell}{d_\sigma}`, since the :math:`\mu \text{ and } \sigma` are the function of the input :math:`x_i`. We use partial derivative in the notations: 24 | 25 | .. math:: 26 | 27 | \frac{d_\ell}{d_{x_i}} = \frac{d_\ell}{d_{y_i}}\cdot\frac{\partial_{y_i}}{\partial_{x_i}} + \frac{d_\ell}{d_\mu}\cdot\frac{d_\mu}{d_{x_i}} + \frac{d_\ell}{d_\sigma}\cdot\frac{d_\sigma}{d_{x_i}} 28 | 29 | where :math:`\frac{\partial_{y_i}}{\partial_{x_i}}=\frac{\gamma}{\sigma}, \frac{d_\ell}{d_\mu}=-\frac{\gamma}{\sigma}\sum_i^N\frac{d_\ell}{d_{y_i}} 30 | \text{ and } \frac{d_\sigma}{d_{x_i}}=-\frac{1}{\sigma}(\frac{x_i-\mu}{N})`. 31 | 32 | Why Synchronize BN? 33 | ------------------- 34 | 35 | - Standard implementations of BN in public frameworks (such as Caffe, MXNet, Torch, TF, PyTorch) are unsynchronized, which means that the data are normalized within each GPU. Therefore the `working batch-size` of the BN layer is `BatchSize/nGPU` (batch-size in each GPU). 36 | 37 | .. image:: http://hangzh.com/blog/images/bn2.png 38 | :align: center 39 | 40 | - Since the `working batch-size` is typically large enough for standard vision tasks, such as classification and detection, there is no need to synchronize BN layer during the training. The synchronization will slow down the training. 41 | 42 | - However, for the Semantic Segmentation task, the state-of-the-art approaches typically adopt dilated convoluton, which is very memory consuming. The `working bath-size` can be too small for BN layers (2 or 4 in each GPU) when using larger/deeper pre-trained networks, such as :class:`encoding.dilated.ResNet` or :class:`encoding.dilated.DenseNet`. 43 | 44 | How to Synchronize? 45 | ------------------- 46 | 47 | Suppose we have :math:`K` number of GPUs, :math:`sum(x)_k` and :math:`sum(x^2)_k` denotes the sum of elements and sum of element squares in :math:`k^{th}` GPU. 48 | 49 | - Forward Pass: 50 | We can calculate the sum of elements :math:`sum(x)=\sum x_i \text{ and sum of squares } sum(x^2)=\sum x_i^2` in each GPU, then apply :class:`encoding.parallel.allreduce` operation to sum accross GPUs. Then calculate the global mean :math:`\mu=\frac{sum(x)}{N} \text{ and global variance } \sigma=\sqrt{\frac{sum(x^2)}{N}-\mu^2+\epsilon}`. 51 | 52 | - Backward Pass: 53 | * :math:`\frac{d_\ell}{d_{x_i}}=\frac{d_\ell}{d_{y_i}}\frac{\gamma}{\sigma}` can be calculated locally in each GPU. 54 | * Calculate the gradient of :math:`sum(x)` and :math:`sum(x^2)` individually in each GPU :math:`\frac{d_\ell}{d_{sum(x)_k}}` and :math:`\frac{d_\ell}{d_{sum(x^2)_k}}`. 55 | 56 | * Then sync the gradient (automatically handled by :class:`encoding.parallel.allreduce`) and continue the backward. 57 | 58 | .. image:: http://hangzh.com/blog/images/bn3.png 59 | :align: center 60 | 61 | Citation 62 | -------- 63 | 64 | .. note:: 65 | This code is provided together with the paper, please cite our work. 66 | 67 | * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*:: 68 | 69 | @InProceedings{Zhang_2018_CVPR, 70 | author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, 71 | title = {Context Encoding for Semantic Segmentation}, 72 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 73 | month = {June}, 74 | year = {2018} 75 | } 76 | -------------------------------------------------------------------------------- /docs/source/tutorials/texture.rst: -------------------------------------------------------------------------------- 1 | Deep TEN: Deep Texture Encoding Network Example 2 | =============================================== 3 | 4 | .. image:: ../_static/img/cvpr17.svg 5 | :width: 100% 6 | :align: left 7 | 8 | In this section, we show an example of training/testing Encoding-Net for texture recognition on MINC-2500 dataset. Comparing to original Torch implementation, we use *different learning rate* for pre-trained base network and encoding layer (10x), disable color jittering after reducing lr and adopt much *smaller training image size* (224 instead of 352). 9 | 10 | 11 | Test Pre-trained Model 12 | ---------------------- 13 | 14 | - Clone the GitHub repo:: 15 | 16 | git clone https://github.com/zhanghang1989/PyTorch-Encoding 17 | 18 | - Install PyTorch Encoding (if not yet). Please follow the installation guide `Installing PyTorch Encoding <../notes/compile.html>`_. 19 | 20 | - Download the `MINC-2500 `_ dataset using the providied script:: 21 | 22 | cd PyTorch-Encoding/ 23 | python scripts/prepare_minc.py 24 | 25 | - Test pre-trained model on MINC-2500. The pre-trained weight will be automatic downloaded (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`18.96\%` using single crop on test-1 set):: 26 | 27 | python verify.py --dataset minc --model deepten_resnet50_minc 28 | # Teriminal Output: 29 | # Top1: 81.043 | Top5: 95.617: 100%|███████████████████████████████████| 45/45 [00:18<00:00, 2.40it/s] 30 | # Top1 Acc: 81.043 | Top5 Acc: 95.617 31 | 32 | 33 | Train Your Own Model 34 | -------------------- 35 | 36 | - Example training command for training above model:: 37 | 38 | CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset minc --model deepten_resnet50_minc --batch-size 512 --lr 0.004 --epochs 80 --lr-step 60 --lr-scheduler step --weight-decay 5e-4 39 | 40 | - Detail training options:: 41 | 42 | -h, --help show this help message and exit 43 | --dataset DATASET training dataset (default: cifar10) 44 | --model MODEL network model type (default: densenet) 45 | --backbone BACKBONE backbone name (default: resnet50) 46 | --batch-size N batch size for training (default: 128) 47 | --test-batch-size N batch size for testing (default: 1000) 48 | --epochs N number of epochs to train (default: 300) 49 | --start_epoch N the epoch number to start (default: 0) 50 | --lr LR learning rate (default: 0.1) 51 | --momentum M SGD momentum (default: 0.9) 52 | --weight-decay M SGD weight decay (default: 1e-4) 53 | --no-cuda disables CUDA training 54 | --plot matplotlib 55 | --seed S random seed (default: 1) 56 | --resume RESUME put the path to resuming file if needed 57 | --checkname set the checkpoint name 58 | --eval evaluating 59 | 60 | 61 | Citation 62 | -------- 63 | 64 | .. note:: 65 | * Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*:: 66 | 67 | @InProceedings{Zhang_2017_CVPR, 68 | author = {Zhang, Hang and Xue, Jia and Dana, Kristin}, 69 | title = {Deep TEN: Texture Encoding Network}, 70 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 71 | month = {July}, 72 | year = {2017} 73 | } 74 | -------------------------------------------------------------------------------- /docs/source/utils.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | encoding.utils 5 | ============== 6 | 7 | Useful util functions. 8 | 9 | .. automodule:: encoding.utils 10 | .. currentmodule:: encoding.utils 11 | 12 | :hidden:`LR_Scheduler` 13 | ~~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: LR_Scheduler 16 | :members: 17 | 18 | :hidden:`save_checkpoint` 19 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | .. autofunction:: save_checkpoint 22 | 23 | :hidden:`SegmentationMetric` 24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | .. autoclass:: SegmentationMetric 27 | :members: 28 | 29 | :hidden:`batch_pix_accuracy` 30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 31 | 32 | .. autofunction:: batch_pix_accuracy 33 | 34 | :hidden:`batch_intersection_union` 35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 36 | 37 | .. autofunction:: batch_intersection_union 38 | -------------------------------------------------------------------------------- /encoding/__init__.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """An optimized PyTorch package with CUDA backend.""" 12 | from .version import __version__ 13 | from . import nn, functions, parallel, utils, models, datasets, transforms 14 | -------------------------------------------------------------------------------- /encoding/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from torchvision.datasets import * 3 | from .base import * 4 | from .coco import COCOSegmentation 5 | from .ade20k import ADE20KSegmentation 6 | from .pascal_voc import VOCSegmentation 7 | from .pascal_aug import VOCAugSegmentation 8 | from .pcontext import ContextSegmentation 9 | from .cityscapes import CitySegmentation 10 | from .imagenet import ImageNetDataset 11 | from .minc import MINCDataset 12 | 13 | from ..utils import EncodingDeprecationWarning 14 | 15 | datasets = { 16 | 'coco': COCOSegmentation, 17 | 'ade20k': ADE20KSegmentation, 18 | 'pascal_voc': VOCSegmentation, 19 | 'pascal_aug': VOCAugSegmentation, 20 | 'pcontext': ContextSegmentation, 21 | 'citys': CitySegmentation, 22 | 'imagenet': ImageNetDataset, 23 | 'minc': MINCDataset, 24 | 'cifar10': CIFAR10, 25 | } 26 | 27 | acronyms = { 28 | 'coco': 'coco', 29 | 'pascal_voc': 'voc', 30 | 'pascal_aug': 'voc', 31 | 'pcontext': 'pcontext', 32 | 'ade20k': 'ade', 33 | 'citys': 'citys', 34 | 'minc': 'minc', 35 | 'cifar10': 'cifar10', 36 | } 37 | 38 | def get_dataset(name, **kwargs): 39 | return datasets[name.lower()](**kwargs) 40 | 41 | def _make_deprecate(meth, old_name): 42 | new_name = meth.__name__ 43 | 44 | def deprecated_init(*args, **kwargs): 45 | warnings.warn("encoding.dataset.{} is now deprecated in favor of encoding.dataset.{}." 46 | .format(old_name, new_name), EncodingDeprecationWarning) 47 | return meth(*args, **kwargs) 48 | 49 | deprecated_init.__doc__ = r""" 50 | {old_name}(...) 51 | .. warning:: 52 | This method is now deprecated in favor of :func:`torch.nn.init.{new_name}`. 53 | See :func:`~torch.nn.init.{new_name}` for details.""".format( 54 | old_name=old_name, new_name=new_name) 55 | deprecated_init.__name__ = old_name 56 | return deprecated_init 57 | 58 | get_segmentation_dataset = _make_deprecate(get_dataset, 'get_segmentation_dataset') 59 | -------------------------------------------------------------------------------- /encoding/datasets/base.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import random 8 | import numpy as np 9 | from PIL import Image, ImageOps, ImageFilter 10 | import torch 11 | import torch.utils.data as data 12 | 13 | __all__ = ['BaseDataset', 'test_batchify_fn'] 14 | 15 | class BaseDataset(data.Dataset): 16 | def __init__(self, root, split, mode=None, transform=None, 17 | target_transform=None, base_size=520, crop_size=480): 18 | self.root = root 19 | self.transform = transform 20 | self.target_transform = target_transform 21 | self.split = split 22 | self.mode = mode if mode is not None else split 23 | self.base_size = base_size 24 | self.crop_size = crop_size 25 | if self.mode == 'train': 26 | print('BaseDataset: base_size {}, crop_size {}'. \ 27 | format(base_size, crop_size)) 28 | 29 | def __getitem__(self, index): 30 | raise NotImplemented 31 | 32 | @property 33 | def num_class(self): 34 | return self.NUM_CLASS 35 | 36 | @property 37 | def pred_offset(self): 38 | raise NotImplemented 39 | 40 | def make_pred(self, x): 41 | return x + self.pred_offset 42 | 43 | def _val_sync_transform(self, img, mask): 44 | outsize = self.crop_size 45 | short_size = outsize 46 | w, h = img.size 47 | if w > h: 48 | oh = short_size 49 | ow = int(1.0 * w * oh / h) 50 | else: 51 | ow = short_size 52 | oh = int(1.0 * h * ow / w) 53 | img = img.resize((ow, oh), Image.BILINEAR) 54 | mask = mask.resize((ow, oh), Image.NEAREST) 55 | # center crop 56 | w, h = img.size 57 | x1 = int(round((w - outsize) / 2.)) 58 | y1 = int(round((h - outsize) / 2.)) 59 | img = img.crop((x1, y1, x1+outsize, y1+outsize)) 60 | mask = mask.crop((x1, y1, x1+outsize, y1+outsize)) 61 | # final transform 62 | return img, self._mask_transform(mask) 63 | 64 | def _sync_transform(self, img, mask): 65 | # random mirror 66 | if random.random() < 0.5: 67 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 68 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT) 69 | crop_size = self.crop_size 70 | # random scale (short edge) 71 | w, h = img.size 72 | long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0)) 73 | if h > w: 74 | oh = long_size 75 | ow = int(1.0 * w * long_size / h + 0.5) 76 | short_size = ow 77 | else: 78 | ow = long_size 79 | oh = int(1.0 * h * long_size / w + 0.5) 80 | short_size = oh 81 | img = img.resize((ow, oh), Image.BILINEAR) 82 | mask = mask.resize((ow, oh), Image.NEAREST) 83 | # pad crop 84 | if short_size < crop_size: 85 | padh = crop_size - oh if oh < crop_size else 0 86 | padw = crop_size - ow if ow < crop_size else 0 87 | img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) 88 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) 89 | # random crop crop_size 90 | w, h = img.size 91 | x1 = random.randint(0, w - crop_size) 92 | y1 = random.randint(0, h - crop_size) 93 | img = img.crop((x1, y1, x1+crop_size, y1+crop_size)) 94 | mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size)) 95 | # final transform 96 | return img, self._mask_transform(mask) 97 | 98 | def _mask_transform(self, mask): 99 | return torch.from_numpy(np.array(mask)).long() 100 | 101 | 102 | def test_batchify_fn(data): 103 | error_msg = "batch must contain tensors, tuples or lists; found {}" 104 | if isinstance(data[0], (str, torch.Tensor)): 105 | return list(data) 106 | elif isinstance(data[0], (tuple, list)): 107 | data = zip(*data) 108 | return [test_batchify_fn(i) for i in data] 109 | raise TypeError((error_msg.format(type(batch[0])))) 110 | -------------------------------------------------------------------------------- /encoding/datasets/folder.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import os 8 | import sys 9 | import numpy as np 10 | import random 11 | import math 12 | 13 | import torch.utils.data as data 14 | from PIL import Image, ImageOps 15 | 16 | import torch.utils.data as data 17 | import torchvision.transforms as transform 18 | from .dataset import ToLabel 19 | 20 | class FolderLoader(data.Dataset): 21 | def __init__(self, root, transform=None): 22 | self.root = root 23 | self.transform = transform 24 | self.images = get_folder_images(root) 25 | if len(self.images) == 0: 26 | raise(RuntimeError("Found 0 images in subfolders of: \ 27 | " + self.root + "\n")) 28 | 29 | def __getitem__(self, index): 30 | img = Image.open(self.images[index]).convert('RGB') 31 | if self.transform is not None: 32 | img = self.transform(img) 33 | return img, os.path.basename(self.images[index]) 34 | 35 | def __len__(self): 36 | return len(self.images) 37 | 38 | 39 | def get_folder_images(img_folder): 40 | img_paths = [] 41 | for filename in os.listdir(img_folder): 42 | if filename.endswith(".jpg"): 43 | imgpath = os.path.join(img_folder, filename) 44 | img_paths.append(imgpath) 45 | return img_paths 46 | 47 | 48 | 49 | class Dataloder(): 50 | def __init__(self, args): 51 | # the data augmentation is implemented as part of the dataloader 52 | assert(args.test) 53 | input_transform = transform.Compose([ 54 | transform.ToTensor(), 55 | transform.Normalize(args.mean, args.std)]) 56 | args.test_batch_size = 1 57 | 58 | assert(args.test_folder is not None) 59 | print('loading the data from: {}'.format(args.test_folder)) 60 | 61 | testset = FolderLoader(args.test_folder, input_transform) 62 | kwargs = {'num_workers': args.workers, 'pin_memory': True} \ 63 | if args.cuda else {} 64 | self.trainloader = None 65 | self.testloader = data.DataLoader(testset, 66 | batch_size=args.test_batch_size, 67 | shuffle=False, **kwargs) 68 | 69 | def getloader(self): 70 | return self.trainloader, self.testloader 71 | -------------------------------------------------------------------------------- /encoding/datasets/hpw18.py: -------------------------------------------------------------------------------- 1 | # created by: Sean Liu 2 | # Amazon Lab 126 3 | from __future__ import print_function 4 | 5 | import errno 6 | import hashlib 7 | import os 8 | import sys 9 | import tarfile 10 | import numpy as np 11 | import random 12 | import math 13 | 14 | import torch.utils.data as data 15 | import PIL 16 | from PIL import Image, ImageOps 17 | 18 | from six.moves import urllib 19 | 20 | 21 | class Segmentation_HPW18(data.Dataset): 22 | CLASSES = [ 23 | 'background', 'hat', 'hair', 'sunglasses', 'upper-clothes', 24 | 'skirt', 'pants', 'dress', 'belt', 'left-shoe', 'right-shoe', 25 | 'face', 'left-leg', 'right-leg', 'left-arm', 'right-arm', 'bag', 26 | 'scarf' 27 | ] 28 | 29 | URL = "/cvdata1/lliuqian/humanParsingDataset" 30 | FILE = "hpw18.tar.gz" 31 | MD5 = '' 32 | BASE_DIR = '' 33 | 34 | def __init__(self, 35 | root, 36 | train=True, 37 | transform=None, 38 | target_transform=None, 39 | download=False): 40 | self.root = root 41 | _hpw18_root = os.path.join(self.root, self.BASE_DIR) 42 | _mask_dir = os.path.join(_hpw18_root, 'SegmentationClassAug_256x384') 43 | _image_dir = os.path.join(_hpw18_root, 'JPEGImages_256x384') 44 | self.transform = transform 45 | self.target_transform = target_transform 46 | self.train = train 47 | 48 | if download: 49 | self._download() 50 | 51 | # train/val/test splits are pre-cut 52 | _splits_dir = _hpw18_root 53 | _split_f = os.path.join(_splits_dir, 'humanparsingImageMask_256x384_absPath_train.txt') 54 | if not self.train: 55 | _split_f = os.path.join(_splits_dir, 'humanparsingImageMask_256x384_absPath_val.txt') 56 | 57 | print("reading from ", _split_f) 58 | 59 | self.images = [] 60 | self.masks = [] 61 | with open(os.path.join(_split_f), "r") as lines: 62 | for line in lines: 63 | s = line.split() 64 | _image = s[0] # image absolution path 65 | _mask = s[1] # mask absolution path 66 | assert os.path.isfile(_image) 67 | assert os.path.isfile(_mask) 68 | self.images.append(_image) 69 | self.masks.append(_mask) 70 | assert (len(self.images) == len(self.masks)) 71 | 72 | def __getitem__(self, index): 73 | _img = Image.open(self.images[index]).convert('RGB') 74 | _timg = Image.open(self.masks[index]) 75 | _target = np.array(_timg, dtype=np.uint8) 76 | _target = Image.fromarray(_target) 77 | 78 | # synchrosized transform 79 | if self.train: 80 | _img, _target = self._sync_transform( _img, _target) 81 | 82 | # general resize, normalize and toTensor 83 | if self.transform is not None: 84 | _img = self.transform(_img) 85 | if self.target_transform is not None: 86 | _target = self.target_transform(_target) 87 | 88 | return _img, _target 89 | 90 | def __len__(self): 91 | return len(self.images) 92 | 93 | def _sync_transform(self, img, mask): 94 | # random rotate -10~10 95 | deg = random.uniform(-10,10) 96 | img = img.rotate(deg) 97 | mask = mask.rotate(deg, PIL.Image.NEAREST) 98 | 99 | return img, mask 100 | 101 | if __name__ == '__main__': 102 | hpw18 = Segmentation_HPW18('/cvdata1/lliuqian/', train=True) 103 | print(hpw18[0]) 104 | print (len(hpw18)) 105 | -------------------------------------------------------------------------------- /encoding/datasets/imagenet.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2018 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | import os 11 | import torchvision.transforms as transforms 12 | import torchvision.datasets as datasets 13 | 14 | import warnings 15 | warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning) 16 | 17 | class ImageNetDataset(datasets.ImageFolder): 18 | BASE_DIR = "ILSVRC2012" 19 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), transform=None, 20 | target_transform=None, train=True, **kwargs): 21 | split='train' if train == True else 'val' 22 | root = os.path.join(root, self.BASE_DIR, split) 23 | super(ImageNetDataset, self).__init__( 24 | root, transform, target_transform) 25 | -------------------------------------------------------------------------------- /encoding/datasets/minc.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import os 12 | from PIL import Image 13 | 14 | import torch 15 | import torch.utils.data as data 16 | 17 | class MINCDataset(data.Dataset): 18 | NUM_CLASS = 23 19 | def __init__(self, root=os.path.expanduser('~/.encoding/data/'), 20 | train=True, transform=None, download=None): 21 | split='train' if train == True else 'val' 22 | root = os.path.join(root, 'minc-2500') 23 | self.transform = transform 24 | classes, class_to_idx = find_classes(root + '/images') 25 | if split=='train': 26 | filename = os.path.join(root, 'labels/train1.txt') 27 | else: 28 | filename = os.path.join(root, 'labels/test1.txt') 29 | 30 | self.images, self.labels = make_dataset(filename, root, 31 | class_to_idx) 32 | assert (len(self.images) == len(self.labels)) 33 | 34 | def __getitem__(self, index): 35 | _img = Image.open(self.images[index]).convert('RGB') 36 | _label = self.labels[index] 37 | if self.transform is not None: 38 | _img = self.transform(_img) 39 | 40 | return _img, _label 41 | 42 | def __len__(self): 43 | return len(self.images) 44 | 45 | def find_classes(dir): 46 | classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] 47 | classes.sort() 48 | class_to_idx = {classes[i]: i for i in range(len(classes))} 49 | return classes, class_to_idx 50 | 51 | 52 | def make_dataset(filename, datadir, class_to_idx): 53 | images = [] 54 | labels = [] 55 | with open(os.path.join(filename), "r") as lines: 56 | for line in lines: 57 | _image = os.path.join(datadir, line.rstrip('\n')) 58 | _dirname = os.path.split(os.path.dirname(_image))[1] 59 | assert os.path.isfile(_image) 60 | label = class_to_idx[_dirname] 61 | images.append(_image) 62 | labels.append(label) 63 | 64 | return images, labels 65 | 66 | -------------------------------------------------------------------------------- /encoding/datasets/pascal_aug.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import scipy.io 4 | import numpy as np 5 | from PIL import Image, ImageOps, ImageFilter 6 | 7 | from .base import BaseDataset 8 | 9 | class VOCAugSegmentation(BaseDataset): 10 | voc = [ 11 | 'background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', 12 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 13 | 'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train', 14 | 'tv' 15 | ] 16 | NUM_CLASS = 21 17 | TRAIN_BASE_DIR = 'VOCaug/dataset/' 18 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 19 | mode=None, transform=None, target_transform=None, **kwargs): 20 | super(VOCAugSegmentation, self).__init__(root, split, mode, transform, 21 | target_transform, **kwargs) 22 | # train/val/test splits are pre-cut 23 | _voc_root = os.path.join(root, self.TRAIN_BASE_DIR) 24 | _mask_dir = os.path.join(_voc_root, 'cls') 25 | _image_dir = os.path.join(_voc_root, 'img') 26 | if self.mode == 'train': 27 | _split_f = os.path.join(_voc_root, 'trainval.txt') 28 | elif self.mode == 'val': 29 | _split_f = os.path.join(_voc_root, 'val.txt') 30 | else: 31 | raise RuntimeError('Unknown dataset split.') 32 | self.images = [] 33 | self.masks = [] 34 | with open(os.path.join(_split_f), "r") as lines: 35 | for line in lines: 36 | _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg") 37 | assert os.path.isfile(_image) 38 | self.images.append(_image) 39 | if self.mode != 'test': 40 | _mask = os.path.join(_mask_dir, line.rstrip('\n')+".mat") 41 | assert os.path.isfile(_mask) 42 | self.masks.append(_mask) 43 | 44 | assert (len(self.images) == len(self.masks)) 45 | 46 | def __getitem__(self, index): 47 | _img = Image.open(self.images[index]).convert('RGB') 48 | if self.mode == 'test': 49 | if self.transform is not None: 50 | _img = self.transform(_img) 51 | return _img, os.path.basename(self.images[index]) 52 | _target = self._load_mat(self.masks[index]) 53 | # synchrosized transform 54 | if self.mode == 'train': 55 | _img, _target = self._sync_transform( _img, _target) 56 | elif self.mode == 'val': 57 | _img, _target = self._val_sync_transform( _img, _target) 58 | # general resize, normalize and toTensor 59 | if self.transform is not None: 60 | _img = self.transform(_img) 61 | if self.target_transform is not None: 62 | _target = self.target_transform(_target) 63 | return _img, _target 64 | 65 | def _load_mat(self, filename): 66 | mat = scipy.io.loadmat(filename, mat_dtype=True, squeeze_me=True, 67 | struct_as_record=False) 68 | mask = mat['GTcls'].Segmentation 69 | return Image.fromarray(mask) 70 | 71 | def __len__(self): 72 | return len(self.images) 73 | -------------------------------------------------------------------------------- /encoding/datasets/pascal_voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | from PIL import Image, ImageOps, ImageFilter 5 | from tqdm import tqdm 6 | 7 | import torch 8 | from .base import BaseDataset 9 | 10 | class VOCSegmentation(BaseDataset): 11 | CLASSES = [ 12 | 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 13 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 14 | 'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train', 15 | 'tv/monitor', 'ambigious' 16 | ] 17 | NUM_CLASS = 21 18 | BASE_DIR = 'VOCdevkit/VOC2012' 19 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 20 | mode=None, transform=None, target_transform=None, **kwargs): 21 | super(VOCSegmentation, self).__init__(root, split, mode, transform, 22 | target_transform, **kwargs) 23 | _voc_root = os.path.join(self.root, self.BASE_DIR) 24 | _mask_dir = os.path.join(_voc_root, 'SegmentationClass') 25 | _image_dir = os.path.join(_voc_root, 'JPEGImages') 26 | # train/val/test splits are pre-cut 27 | _splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation') 28 | if self.mode == 'train': 29 | _split_f = os.path.join(_splits_dir, 'trainval.txt') 30 | elif self.mode == 'val': 31 | _split_f = os.path.join(_splits_dir, 'val.txt') 32 | elif self.mode == 'test': 33 | _split_f = os.path.join(_splits_dir, 'test.txt') 34 | else: 35 | raise RuntimeError('Unknown dataset split.') 36 | self.images = [] 37 | self.masks = [] 38 | with open(os.path.join(_split_f), "r") as lines: 39 | for line in tqdm(lines): 40 | _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg") 41 | assert os.path.isfile(_image) 42 | self.images.append(_image) 43 | if self.mode != 'test': 44 | _mask = os.path.join(_mask_dir, line.rstrip('\n')+".png") 45 | assert os.path.isfile(_mask) 46 | self.masks.append(_mask) 47 | 48 | if self.mode != 'test': 49 | assert (len(self.images) == len(self.masks)) 50 | 51 | def __getitem__(self, index): 52 | img = Image.open(self.images[index]).convert('RGB') 53 | if self.mode == 'test': 54 | if self.transform is not None: 55 | img = self.transform(img) 56 | return img, os.path.basename(self.images[index]) 57 | target = Image.open(self.masks[index]) 58 | # synchrosized transform 59 | if self.mode == 'train': 60 | img, target = self._sync_transform( img, target) 61 | elif self.mode == 'val': 62 | img, target = self._val_sync_transform( img, target) 63 | else: 64 | assert self.mode == 'testval' 65 | mask = self._mask_transform(mask) 66 | # general resize, normalize and toTensor 67 | if self.transform is not None: 68 | img = self.transform(img) 69 | if self.target_transform is not None: 70 | target = self.target_transform(target) 71 | return img, target 72 | 73 | def _mask_transform(self, mask): 74 | target = np.array(mask).astype('int32') 75 | target[target == 255] = -1 76 | return torch.from_numpy(target).long() 77 | 78 | def __len__(self): 79 | return len(self.images) 80 | 81 | @property 82 | def pred_offset(self): 83 | return 0 84 | -------------------------------------------------------------------------------- /encoding/datasets/pcontext.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | from PIL import Image, ImageOps, ImageFilter 8 | import os 9 | import math 10 | import random 11 | import numpy as np 12 | from tqdm import trange 13 | 14 | import torch 15 | from .base import BaseDataset 16 | 17 | class ContextSegmentation(BaseDataset): 18 | BASE_DIR = 'VOCdevkit/VOC2010' 19 | NUM_CLASS = 59 20 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 21 | mode=None, transform=None, target_transform=None, **kwargs): 22 | super(ContextSegmentation, self).__init__( 23 | root, split, mode, transform, target_transform, **kwargs) 24 | from detail import Detail 25 | #from detail import mask 26 | root = os.path.join(root, self.BASE_DIR) 27 | annFile = os.path.join(root, 'trainval_merged.json') 28 | imgDir = os.path.join(root, 'JPEGImages') 29 | # training mode 30 | self.detail = Detail(annFile, imgDir, split) 31 | self.transform = transform 32 | self.target_transform = target_transform 33 | self.ids = self.detail.getImgs() 34 | # generate masks 35 | self._mapping = np.sort(np.array([ 36 | 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 37 | 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 38 | 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 39 | 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 40 | 98, 187, 104, 105, 366, 189, 368, 113, 115])) 41 | self._key = np.array(range(len(self._mapping))).astype('uint8') 42 | mask_file = os.path.join(root, self.split+'.pth') 43 | print('mask_file:', mask_file) 44 | if os.path.exists(mask_file): 45 | self.masks = torch.load(mask_file) 46 | else: 47 | self.masks = self._preprocess(mask_file) 48 | 49 | def _class_to_index(self, mask): 50 | # assert the values 51 | values = np.unique(mask) 52 | for i in range(len(values)): 53 | assert(values[i] in self._mapping) 54 | index = np.digitize(mask.ravel(), self._mapping, right=True) 55 | return self._key[index].reshape(mask.shape) 56 | 57 | def _preprocess(self, mask_file): 58 | masks = {} 59 | tbar = trange(len(self.ids)) 60 | print("Preprocessing mask, this will take a while." + \ 61 | "But don't worry, it only run once for each split.") 62 | for i in tbar: 63 | img_id = self.ids[i] 64 | mask = Image.fromarray(self._class_to_index( 65 | self.detail.getMask(img_id))) 66 | masks[img_id['image_id']] = mask 67 | tbar.set_description("Preprocessing masks {}".format(img_id['image_id'])) 68 | torch.save(masks, mask_file) 69 | return masks 70 | 71 | def __getitem__(self, index): 72 | img_id = self.ids[index] 73 | path = img_id['file_name'] 74 | iid = img_id['image_id'] 75 | img = Image.open(os.path.join(self.detail.img_folder, path)).convert('RGB') 76 | if self.mode == 'test': 77 | if self.transform is not None: 78 | img = self.transform(img) 79 | return img, os.path.basename(path) 80 | # convert mask to 60 categories 81 | mask = self.masks[iid] 82 | # synchrosized transform 83 | if self.mode == 'train': 84 | img, mask = self._sync_transform(img, mask) 85 | elif self.mode == 'val': 86 | img, mask = self._val_sync_transform(img, mask) 87 | else: 88 | assert self.mode == 'testval' 89 | mask = self._mask_transform(mask) 90 | # general resize, normalize and toTensor 91 | if self.transform is not None: 92 | img = self.transform(img) 93 | if self.target_transform is not None: 94 | mask = self.target_transform(mask) 95 | return img, mask 96 | 97 | def _mask_transform(self, mask): 98 | target = np.array(mask).astype('int32') - 1 99 | return torch.from_numpy(target).long() 100 | 101 | def __len__(self): 102 | return len(self.ids) 103 | 104 | @property 105 | def pred_offset(self): 106 | return 1 107 | -------------------------------------------------------------------------------- /encoding/functions/__init__.py: -------------------------------------------------------------------------------- 1 | """Encoding Autograd Fuctions""" 2 | from .encoding import * 3 | from .syncbn import * 4 | from .dist_syncbn import dist_syncbatchnorm 5 | from .customize import * 6 | from .rectify import * 7 | -------------------------------------------------------------------------------- /encoding/functions/customize.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2018 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | """Customized functions""" 11 | 12 | import torch 13 | from torch.autograd import Variable, Function 14 | from .. import lib 15 | 16 | __all__ = ['NonMaxSuppression'] 17 | 18 | def NonMaxSuppression(boxes, scores, threshold): 19 | r"""Non-Maximum Suppression 20 | The algorithm begins by storing the highest-scoring bounding 21 | box, and eliminating any box whose intersection-over-union (IoU) 22 | with it is too great. The procedure repeats on the surviving 23 | boxes, and so on until there are no boxes left. 24 | The stored boxes are returned. 25 | 26 | NB: The function returns a tuple (mask, indices), where 27 | indices index into the input boxes and are sorted 28 | according to score, from higest to lowest. 29 | indices[i][mask[i]] gives the indices of the surviving 30 | boxes from the ith batch, sorted by score. 31 | 32 | Args: 33 | - boxes :math:`(N, n_boxes, 4)` 34 | - scroes :math:`(N, n_boxes)` 35 | - threshold (float): IoU above which to eliminate boxes 36 | 37 | Outputs: 38 | - mask: :math:`(N, n_boxes)` 39 | - indicies: :math:`(N, n_boxes)` 40 | 41 | Examples:: 42 | 43 | >>> boxes = torch.Tensor([[[10., 20., 20., 15.], 44 | >>> [24., 22., 50., 54.], 45 | >>> [10., 21., 20. 14.5]]]) 46 | >>> scores = torch.abs(torch.randn([1, 3])) 47 | >>> mask, indices = NonMaxSuppression(boxes, scores, 0.7) 48 | >>> #indices are SORTED according to score. 49 | >>> surviving_box_indices = indices[mask] 50 | """ 51 | if boxes.is_cuda: 52 | return lib.gpu.non_max_suppression(boxes, scores, threshold) 53 | else: 54 | return lib.cpu.non_max_suppression(boxes, scores, threshold) 55 | -------------------------------------------------------------------------------- /encoding/functions/dist_syncbn.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | 9 | import torch 10 | from torch.autograd.function import Function 11 | from .. import lib 12 | 13 | __all__ = ['dist_syncbatchnorm'] 14 | 15 | class dist_syncbatchnorm_(Function): 16 | @staticmethod 17 | def forward(ctx, x, gamma, beta, running_mean, running_var, eps, momentum, training, process_group): 18 | x = x.contiguous() 19 | ctx.training = training 20 | ctx.momentum = momentum 21 | ctx.eps = eps 22 | ctx.process_group = process_group 23 | 24 | if not ctx.training: 25 | _ex, _var = running_mean.contiguous(), running_var.contiguous() 26 | _exs = _var + _ex ** 2 27 | if x.is_cuda: 28 | y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) 29 | else: 30 | y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) 31 | ctx.save_for_backward(x, _ex, _exs, gamma, beta) 32 | return y 33 | 34 | size = x.numel() // x.size(1) 35 | if size == 1: 36 | raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size)) 37 | 38 | if x.is_cuda: 39 | _ex, _exs = lib.gpu.expectation_forward(x) 40 | else: 41 | raise NotImplemented 42 | 43 | count = torch.Tensor([1]).to(x.device) 44 | count_all_reduce = torch.distributed.all_reduce(count, group=process_group, async_op=True) 45 | _ex_all_reduce = torch.distributed.all_reduce(_ex, group=process_group, async_op=True) 46 | _exs_all_reduce = torch.distributed.all_reduce(_exs, group=process_group, async_op=True) 47 | 48 | count_all_reduce.wait() 49 | _ex_all_reduce.wait() 50 | _exs_all_reduce.wait() 51 | 52 | _ex = _ex / count 53 | _exs = _exs / count 54 | 55 | # Update running stats 56 | _var = _exs - _ex ** 2 57 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * _ex) 58 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * _var) 59 | 60 | # Mark in-place modified tensors 61 | ctx.mark_dirty(running_mean, running_var) 62 | 63 | # BN forward + activation 64 | if x.is_cuda: 65 | y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) 66 | else: 67 | y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) 68 | 69 | ctx.save_for_backward(x, _ex, _exs, gamma, beta) 70 | return y 71 | 72 | @staticmethod 73 | def backward(ctx, dz): 74 | x, _ex, _exs, gamma, beta = ctx.saved_tensors 75 | dz = dz.contiguous() 76 | 77 | # BN backward 78 | if dz.is_cuda: 79 | dx, _dex, _dexs, dgamma, dbeta = \ 80 | lib.gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps) 81 | else: 82 | raise NotImplemented 83 | 84 | if ctx.training: 85 | process_group = ctx.process_group 86 | count = torch.Tensor([1]).to(x.device) 87 | count_all_reduce = torch.distributed.all_reduce(count, group=process_group, async_op=True) 88 | _dex_all_reduce = torch.distributed.all_reduce(_dex, group=process_group, async_op=True) 89 | _dexs_all_reduce = torch.distributed.all_reduce(_dexs, group=process_group, async_op=True) 90 | 91 | count_all_reduce.wait() 92 | _dex_all_reduce.wait() 93 | _dexs_all_reduce.wait() 94 | 95 | _dex = _dex / count 96 | _dexs = _dexs / count 97 | 98 | if x.is_cuda: 99 | dx_ = lib.gpu.expectation_backward(x, _dex, _dexs) 100 | else: 101 | raise NotImplemented 102 | dx = dx + dx_ 103 | 104 | return dx, dgamma, dbeta, None, None, None, None, None, None 105 | 106 | dist_syncbatchnorm = dist_syncbatchnorm_.apply 107 | -------------------------------------------------------------------------------- /encoding/functions/encoding.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2018 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | """Functions for Encoding Layer""" 11 | import torch 12 | from torch.autograd import Function, Variable 13 | import torch.nn.functional as F 14 | from .. import lib 15 | 16 | __all__ = ['aggregate', 'scaled_l2', 'pairwise_cosine'] 17 | 18 | class _aggregate(Function): 19 | @staticmethod 20 | def forward(ctx, A, X, C): 21 | # A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD) 22 | ctx.save_for_backward(A, X, C) 23 | if A.is_cuda: 24 | E = lib.gpu.aggregate_forward(A, X, C) 25 | else: 26 | E = lib.cpu.aggregate_forward(A, X, C) 27 | return E 28 | 29 | @staticmethod 30 | def backward(ctx, gradE): 31 | A, X, C = ctx.saved_variables 32 | if A.is_cuda: 33 | gradA, gradX, gradC = lib.gpu.aggregate_backward(gradE, A, X, C) 34 | else: 35 | gradA, gradX, gradC = lib.cpu.aggregate_backward(gradE, A, X, C) 36 | return gradA, gradX, gradC 37 | 38 | def aggregate(A, X, C): 39 | r""" Aggregate operation, aggregate the residuals of inputs (:math:`X`) with repect 40 | to the codewords (:math:`C`) with assignment weights (:math:`A`). 41 | 42 | .. math:: 43 | 44 | e_{k} = \sum_{i=1}^{N} a_{ik} (x_i - d_k) 45 | 46 | Shape: 47 | - Input: :math:`A\in\mathcal{R}^{B\times N\times K}` 48 | :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` 49 | (where :math:`B` is batch, :math:`N` is total number of features, 50 | :math:`K` is number is codewords, :math:`D` is feature dimensions.) 51 | - Output: :math:`E\in\mathcal{R}^{B\times K\times D}` 52 | 53 | Examples: 54 | >>> B,N,K,D = 2,3,4,5 55 | >>> A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), requires_grad=True) 56 | >>> X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), requires_grad=True) 57 | >>> C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), requires_grad=True) 58 | >>> func = encoding.aggregate() 59 | >>> E = func(A, X, C) 60 | """ 61 | return _aggregate.apply(A, X, C) 62 | 63 | class _scaled_l2(Function): 64 | @staticmethod 65 | def forward(ctx, X, C, S): 66 | if X.is_cuda: 67 | SL = lib.gpu.scaled_l2_forward(X, C, S) 68 | else: 69 | SL = lib.cpu.scaled_l2_forward(X, C, S) 70 | ctx.save_for_backward(X, C, S, SL) 71 | return SL 72 | 73 | @staticmethod 74 | def backward(ctx, gradSL): 75 | X, C, S, SL = ctx.saved_variables 76 | if X.is_cuda: 77 | gradX, gradC, gradS = lib.gpu.scaled_l2_backward(gradSL, X, C, S, SL) 78 | else: 79 | gradX, gradC, gradS = lib.cpu.scaled_l2_backward(gradSL, X, C, S, SL) 80 | return gradX, gradC, gradS 81 | 82 | def scaled_l2(X, C, S): 83 | r""" scaled_l2 distance 84 | 85 | .. math:: 86 | sl_{ik} = s_k \|x_i-c_k\|^2 87 | 88 | Shape: 89 | - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` 90 | :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K` 91 | (where :math:`B` is batch, :math:`N` is total number of features, 92 | :math:`K` is number is codewords, :math:`D` is feature dimensions.) 93 | - Output: :math:`E\in\mathcal{R}^{B\times N\times K}` 94 | """ 95 | return _scaled_l2.apply(X, C, S) 96 | 97 | # Experimental 98 | def pairwise_cosine(X, C, normalize=False): 99 | r"""Pairwise Cosine Similarity or Dot-product Similarity 100 | Shape: 101 | - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` 102 | :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K` 103 | (where :math:`B` is batch, :math:`N` is total number of features, 104 | :math:`K` is number is codewords, :math:`D` is feature dimensions.) 105 | - Output: :math:`E\in\mathcal{R}^{B\times N\times K}` 106 | """ 107 | if normalize: 108 | X = F.normalize(X, dim=2, eps=1e-8) 109 | C = F.normalize(C, dim=1, eps=1e-8) 110 | return torch.matmul(X, C.t()) 111 | -------------------------------------------------------------------------------- /encoding/functions/rectify.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | 9 | """Rectify function""" 10 | import torch 11 | from torch.autograd import Function 12 | 13 | from .. import lib 14 | 15 | __all__ = ['rectify'] 16 | 17 | class _rectify(Function): 18 | @staticmethod 19 | def forward(ctx, y, x, kernel_size, stride, padding, dilation, average): 20 | ctx.save_for_backward(x) 21 | # assuming kernel_size is 3 22 | kernel_size = [k + 2 * (d - 1) for k,d in zip(kernel_size, dilation)] 23 | ctx.kernel_size = kernel_size 24 | ctx.stride = stride 25 | ctx.padding = padding 26 | ctx.dilation = dilation 27 | ctx.average = average 28 | if x.is_cuda: 29 | lib.gpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average) 30 | else: 31 | lib.cpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average) 32 | ctx.mark_dirty(y) 33 | return y 34 | 35 | @staticmethod 36 | def backward(ctx, grad_y): 37 | x, = ctx.saved_variables 38 | if x.is_cuda: 39 | lib.gpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride, 40 | ctx.padding, ctx.dilation, ctx.average) 41 | else: 42 | lib.cpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride, 43 | ctx.padding, ctx.dilation, ctx.average) 44 | ctx.mark_dirty(grad_y) 45 | return grad_y, None, None, None, None, None, None 46 | 47 | rectify = _rectify.apply 48 | -------------------------------------------------------------------------------- /encoding/lib/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.cpp_extension import load 4 | 5 | cwd = os.path.dirname(os.path.realpath(__file__)) 6 | cpu_path = os.path.join(cwd, 'cpu') 7 | gpu_path = os.path.join(cwd, 'gpu') 8 | 9 | cpu = load('enclib_cpu', [ 10 | os.path.join(cpu_path, 'operator.cpp'), 11 | os.path.join(cpu_path, 'encoding_cpu.cpp'), 12 | os.path.join(cpu_path, 'syncbn_cpu.cpp'), 13 | os.path.join(cpu_path, 'roi_align_cpu.cpp'), 14 | os.path.join(cpu_path, 'nms_cpu.cpp'), 15 | os.path.join(cpu_path, 'rectify_cpu.cpp'), 16 | ], build_directory=cpu_path, verbose=False) 17 | 18 | if torch.cuda.is_available(): 19 | gpu = load('enclib_gpu', [ 20 | os.path.join(gpu_path, 'operator.cpp'), 21 | os.path.join(gpu_path, 'activation_kernel.cu'), 22 | os.path.join(gpu_path, 'encoding_kernel.cu'), 23 | os.path.join(gpu_path, 'syncbn_kernel.cu'), 24 | os.path.join(gpu_path, 'roi_align_kernel.cu'), 25 | os.path.join(gpu_path, 'nms_kernel.cu'), 26 | os.path.join(gpu_path, 'rectify_cuda.cu'), 27 | os.path.join(gpu_path, 'lib_ssd.cu'), 28 | ], extra_cuda_cflags=["--expt-extended-lambda"], 29 | build_directory=gpu_path, verbose=False) 30 | -------------------------------------------------------------------------------- /encoding/lib/cpu/encoding_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | at::Tensor Aggregate_Forward_CPU( 5 | const at::Tensor A, 6 | const at::Tensor X, 7 | const at::Tensor C) { 8 | auto E = (A.unsqueeze(3) * (X.unsqueeze(2).expand({X.size(0), X.size(1), 9 | C.size(0), C.size(1)}) - C.unsqueeze(0).unsqueeze(0))).sum(1); 10 | return E; 11 | } 12 | 13 | std::vector Aggregate_Backward_CPU( 14 | const at::Tensor GE, 15 | const at::Tensor A, 16 | const at::Tensor X, 17 | const at::Tensor C) { 18 | auto gradA = (GE.unsqueeze(1) * (X.unsqueeze(2).expand({X.size(0), X.size(1), 19 | C.size(0), C.size(1)}) - C.unsqueeze(0).unsqueeze(0))).sum(3); 20 | auto gradX = at::bmm(A, GE); 21 | auto gradC = (-GE * A.sum(1).unsqueeze(2)).sum(0); 22 | return {gradA, gradX, gradC}; 23 | } 24 | 25 | at::Tensor ScaledL2_Forward_CPU( 26 | const at::Tensor X, 27 | const at::Tensor C, 28 | const at::Tensor S) { 29 | auto SL = S.view({1, 1, C.size(0)}) * (X.unsqueeze(2).expand({X.size(0), X.size(1), 30 | C.size(0), C.size(1)}) - C.unsqueeze(0).unsqueeze(0)).pow(2).sum(3); 31 | return SL; 32 | } 33 | 34 | std::vector ScaledL2_Backward_CPU( 35 | const at::Tensor GSL, 36 | const at::Tensor X, 37 | const at::Tensor C, 38 | const at::Tensor S, 39 | const at::Tensor SL) { 40 | auto tmp = (2 * GSL * S.view({1, 1, C.size(0)})).unsqueeze(3) * 41 | (X.unsqueeze(2).expand({X.size(0), X.size(1), C.size(0), C.size(1)}) - 42 | C.unsqueeze(0).unsqueeze(0)); 43 | auto GX = tmp.sum(2); 44 | auto GC = tmp.sum(0).sum(0); 45 | auto GS = (GSL * (SL / S.view({1, 1, C.size(0)}))).sum(0).sum(0); 46 | return {GX, GC, GS}; 47 | } 48 | -------------------------------------------------------------------------------- /encoding/lib/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #ifdef _OPENMP 6 | #include 7 | #endif 8 | 9 | template 10 | inline scalar IoU(scalar* rawInput, int idx_x, int idx_y) { 11 | scalar lr = std::fmin(rawInput[idx_x*4] + rawInput[idx_x*4+2], 12 | rawInput[idx_y*4] + rawInput[idx_y*4+2]); 13 | scalar rl = std::fmax(rawInput[idx_x*4], rawInput[idx_y*4]); 14 | scalar tb = std::fmin(rawInput[idx_x*4+1] + rawInput[idx_x*4+3], 15 | rawInput[idx_y*4+1] + rawInput[idx_y*4+3]); 16 | scalar bt = std::fmax(rawInput[idx_x*4+1], rawInput[idx_y*4+1]); 17 | scalar inter = std::fmax(0, lr-rl)*std::fmax(0, tb-bt); 18 | scalar uni = (rawInput[idx_x*4+2]*rawInput[idx_x*4+3] 19 | + rawInput[idx_y*4+2]*rawInput[idx_y*4+3] - inter); 20 | return inter/uni; 21 | } 22 | 23 | 24 | std::vector Non_Max_Suppression_CPU( 25 | const at::Tensor& input, 26 | const at::Tensor& scores, 27 | double thresh) { 28 | AT_ASSERT(input.ndimension() == 3); 29 | AT_ASSERT(scores.ndimension() == 2); 30 | AT_ASSERT(input.size(0) == scores.size(0)); 31 | AT_ASSERT(input.size(1) == scores.size(1)); 32 | AT_ASSERT(input.size(2) == 4); 33 | AT_ASSERT(input.is_contiguous()); 34 | AT_ASSERT(scores.is_contiguous()); 35 | AT_ASSERT(input.type().scalarType() == at::kFloat || input.type().scalarType() == at::kDouble); 36 | AT_ASSERT(scores.type().scalarType() == at::kFloat || scores.type().scalarType() == at::kDouble); 37 | AT_ASSERT(input.is_contiguous()); 38 | AT_ASSERT(scores.is_contiguous()); 39 | 40 | 41 | at::Tensor sorted_inds = std::get<1>(scores.sort(-1, true)); 42 | //at::Tensor rawIdx = std::get<1>(scores.sort(-1, true)); 43 | 44 | auto num_boxes = input.size(1); 45 | auto batch_size = input.size(0); 46 | auto mask = torch::zeros({batch_size, num_boxes}, input.type().toScalarType(at::kByte)); 47 | //auto mask = input.type().toScalarType(at::kByte).tensor({batch_size, num_boxes}); 48 | mask.fill_(1); 49 | auto *rawMask = mask.data(); 50 | auto *rawIdx = sorted_inds.data(); 51 | 52 | if (input.type().scalarType() == at::kFloat) 53 | { 54 | auto *rawInput = input.data(); 55 | 56 | for(int batch=0; batch thresh) 67 | rawMask[i] = 0; 68 | } 69 | ++pos; 70 | while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0)) 71 | ++pos; 72 | } 73 | } 74 | } 75 | else 76 | { 77 | auto *rawInput = input.data(); 78 | for(int batch=0; batch thresh) 89 | rawMask[i] = 0; 90 | } 91 | ++pos; 92 | while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0)) 93 | ++pos; 94 | } 95 | } 96 | } 97 | //see ./cuda/NonMaxSuppression.cu for comment about return value. 98 | return {mask, sorted_inds}; 99 | } 100 | -------------------------------------------------------------------------------- /encoding/lib/cpu/operator.cpp: -------------------------------------------------------------------------------- 1 | #include "operator.h" 2 | 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 4 | m.def("roi_align_forward", &ROIAlign_Forward_CPU, "ROI Align forward (CPU)"); 5 | m.def("roi_align_backward", &ROIAlign_Backward_CPU, "ROI Align backward (CPU)"); 6 | m.def("aggregate_forward", &Aggregate_Forward_CPU, "Aggregate forward (CPU)"); 7 | m.def("aggregate_backward", &Aggregate_Backward_CPU, "Aggregate backward (CPU)"); 8 | m.def("scaled_l2_forward", &ScaledL2_Forward_CPU, "ScaledL2 forward (CPU)"); 9 | m.def("scaled_l2_backward", &ScaledL2_Backward_CPU, "ScaledL2 backward (CPU)"); 10 | m.def("batchnorm_forward", &BatchNorm_Forward_CPU, "BatchNorm forward (CPU)"); 11 | m.def("batchnorm_backward", &BatchNorm_Backward_CPU, "BatchNorm backward (CPU)"); 12 | m.def("sumsquare_forward", &Sum_Square_Forward_CPU, "SumSqu forward (CPU)"); 13 | m.def("sumsquare_backward", &Sum_Square_Backward_CPU, "SumSqu backward (CPU)"); 14 | m.def("non_max_suppression", &Non_Max_Suppression_CPU, "NMS (CPU)"); 15 | m.def("conv_rectify", &CONV_RECTIFY_CPU, "Convolution Rectifier (CPU)"); 16 | // Apply fused color jitter 17 | m.def("apply_transform", &apply_transform, "apply_transform"); 18 | } 19 | -------------------------------------------------------------------------------- /encoding/lib/cpu/operator.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | at::Tensor ROIAlign_Forward_CPU( 9 | const at::Tensor& input, 10 | const at::Tensor& bottom_rois, 11 | int64_t pooled_height, 12 | int64_t pooled_width, 13 | double spatial_scale, 14 | int64_t sampling_ratio); 15 | 16 | at::Tensor ROIAlign_Backward_CPU( 17 | const at::Tensor& bottom_rois, 18 | const at::Tensor& grad_output, 19 | int64_t b_size, 20 | int64_t channels, 21 | int64_t height, 22 | int64_t width, 23 | int64_t pooled_height, 24 | int64_t pooled_width, 25 | double spatial_scale, 26 | int64_t sampling_ratio); 27 | 28 | at::Tensor Aggregate_Forward_CPU( 29 | const at::Tensor A, 30 | const at::Tensor X, 31 | const at::Tensor C); 32 | 33 | std::vector Aggregate_Backward_CPU( 34 | const at::Tensor GE, 35 | const at::Tensor A, 36 | const at::Tensor X, 37 | const at::Tensor C); 38 | 39 | at::Tensor ScaledL2_Forward_CPU( 40 | const at::Tensor X_, 41 | const at::Tensor C_, 42 | const at::Tensor S_); 43 | 44 | std::vector ScaledL2_Backward_CPU( 45 | const at::Tensor GSL_, 46 | const at::Tensor X_, 47 | const at::Tensor C_, 48 | const at::Tensor S_, 49 | const at::Tensor SL_); 50 | 51 | at::Tensor BatchNorm_Forward_CPU( 52 | const at::Tensor input_, 53 | const at::Tensor mean_, 54 | const at::Tensor std_, 55 | const at::Tensor gamma_, 56 | const at::Tensor beta_); 57 | 58 | std::vector BatchNorm_Backward_CPU( 59 | const at::Tensor gradoutput_, 60 | const at::Tensor input_, 61 | const at::Tensor mean_, 62 | const at::Tensor std_, 63 | const at::Tensor gamma_, 64 | const at::Tensor beta_, 65 | bool train); 66 | 67 | std::vector Sum_Square_Forward_CPU( 68 | const at::Tensor input_); 69 | 70 | at::Tensor Sum_Square_Backward_CPU( 71 | const at::Tensor input_, 72 | const at::Tensor gradSum_, 73 | const at::Tensor gradSquare_); 74 | 75 | std::vector Non_Max_Suppression_CPU( 76 | const at::Tensor& input, 77 | const at::Tensor& scores, 78 | double thresh); 79 | 80 | void CONV_RECTIFY_CPU( 81 | at::Tensor& output, 82 | const at::Tensor& input, 83 | at::IntArrayRef kernel_size, 84 | at::IntArrayRef stride, 85 | at::IntArrayRef padding, 86 | at::IntArrayRef dilation, 87 | bool avg_mode); 88 | 89 | // Fused color jitter application 90 | // ctm [4,4], img [H, W, C] 91 | py::array_t apply_transform(int H, int W, int C, py::array_t img, py::array_t ctm) { 92 | auto img_buf = img.request(); 93 | auto ctm_buf = ctm.request(); 94 | 95 | // printf("H: %d, W: %d, C: %d\n", H, W, C); 96 | py::array_t result{(unsigned long)img_buf.size}; 97 | auto res_buf = result.request(); 98 | 99 | float *img_ptr = (float *)img_buf.ptr; 100 | float *ctm_ptr = (float *)ctm_buf.ptr; 101 | float *res_ptr = (float *)res_buf.ptr; 102 | 103 | for (int h = 0; h < H; ++h) { 104 | for (int w = 0; w < W; ++w) { 105 | float *ptr = &img_ptr[h * W * C + w * C]; 106 | float *out_ptr = &res_ptr[h * W * C + w * C]; 107 | // manually unroll over C 108 | out_ptr[0] = ctm_ptr[0] * ptr[0] + ctm_ptr[1] * ptr[1] + ctm_ptr[2] * ptr[2] + ctm_ptr[3]; 109 | out_ptr[1] = ctm_ptr[4] * ptr[0] + ctm_ptr[5] * ptr[1] + ctm_ptr[6] * ptr[2] + ctm_ptr[7]; 110 | out_ptr[2] = ctm_ptr[8] * ptr[0] + ctm_ptr[9] * ptr[1] + ctm_ptr[10] * ptr[2] + ctm_ptr[11]; 111 | } 112 | } 113 | 114 | result.resize({H, W, C}); 115 | 116 | return result; 117 | } 118 | -------------------------------------------------------------------------------- /encoding/lib/cpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension 3 | 4 | setup( 5 | name='enclib_cpu', 6 | ext_modules=[ 7 | CppExtension('enclib_cpu', [ 8 | 'operator.cpp', 9 | 'roi_align_cpu.cpp', 10 | 'encoding_cpu.cpp', 11 | 'syncbn_cpu.cpp', 12 | 'nms_cpu.cpp', 13 | ]), 14 | ], 15 | cmdclass={ 16 | 'build_ext': BuildExtension 17 | }) 18 | -------------------------------------------------------------------------------- /encoding/lib/cpu/syncbn_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { 6 | if (x.ndimension() == 2) { 7 | return v; 8 | } else { 9 | std::vector broadcast_size = {1, -1}; 10 | for (int64_t i = 2; i < x.ndimension(); ++i) 11 | broadcast_size.push_back(1); 12 | 13 | return v.view(broadcast_size); 14 | } 15 | } 16 | 17 | at::Tensor BatchNorm_Forward_CPU( 18 | const at::Tensor input, 19 | const at::Tensor mean, 20 | const at::Tensor std, 21 | const at::Tensor gamma, 22 | const at::Tensor beta) { 23 | auto output = (input - broadcast_to(mean, input)) / broadcast_to(std, input); 24 | output = output * broadcast_to(gamma, input) + broadcast_to(beta, input); 25 | return output; 26 | } 27 | 28 | // Not implementing CPU backward for now 29 | std::vector BatchNorm_Backward_CPU( 30 | const at::Tensor gradoutput, 31 | const at::Tensor input, 32 | const at::Tensor mean, 33 | const at::Tensor std, 34 | const at::Tensor gamma, 35 | const at::Tensor beta, 36 | bool train) { 37 | /* outputs*/ 38 | at::Tensor gradinput = at::zeros_like(input); 39 | at::Tensor gradgamma = at::zeros_like(gamma); 40 | at::Tensor gradbeta = at::zeros_like(beta); 41 | at::Tensor gradMean = at::zeros_like(mean); 42 | at::Tensor gradStd = at::zeros_like(std); 43 | return {gradinput, gradMean, gradStd, gradgamma, gradbeta}; 44 | } 45 | 46 | std::vector Sum_Square_Forward_CPU( 47 | const at::Tensor input) { 48 | /* outputs */ 49 | at::Tensor sum = torch::zeros({input.size(1)}, input.options()); 50 | at::Tensor square = torch::zeros({input.size(1)}, input.options()); 51 | return {sum, square}; 52 | } 53 | 54 | at::Tensor Sum_Square_Backward_CPU( 55 | const at::Tensor input, 56 | const at::Tensor gradSum, 57 | const at::Tensor gradSquare) { 58 | /* outputs */ 59 | at::Tensor gradInput = at::zeros_like(input); 60 | return gradInput; 61 | } 62 | -------------------------------------------------------------------------------- /encoding/lib/gpu/activation_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include "common.h" 9 | 10 | 11 | namespace { 12 | 13 | template 14 | inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) { 15 | // Create thrust pointers 16 | thrust::device_ptr th_z = thrust::device_pointer_cast(z); 17 | thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); 18 | 19 | thrust::transform_if(th_dz, th_dz + count, th_z, th_dz, 20 | [slope] __device__ (const T& dz) { return dz * slope; }, 21 | [] __device__ (const T& z) { return z < 0; }); 22 | thrust::transform_if(th_z, th_z + count, th_z, 23 | [slope] __device__ (const T& z) { return z / slope; }, 24 | [] __device__ (const T& z) { return z < 0; }); 25 | } 26 | 27 | } 28 | 29 | void LeakyRelu_Forward_CUDA(at::Tensor z, float slope) { 30 | at::leaky_relu_(z, slope); 31 | } 32 | 33 | void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope) { 34 | int64_t count = z.numel(); 35 | 36 | AT_DISPATCH_FLOATING_TYPES(z.type(), "LeakyRelu_Backward_CUDA", ([&] { 37 | leaky_relu_backward_impl(z.data(), dz.data(), slope, count); 38 | })); 39 | /* 40 | // unstable after scaling 41 | at::leaky_relu_(z, 1.0 / slope); 42 | at::leaky_relu_backward(dz, z, slope); 43 | */ 44 | } 45 | -------------------------------------------------------------------------------- /encoding/lib/gpu/device_tensor.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | template 4 | struct DeviceTensor { 5 | public: 6 | inline __device__ __host__ DeviceTensor(DType *p, const int *size) 7 | : dptr_(p) { 8 | for (int i = 0; i < Dim; ++i) { 9 | size_[i] = size ? size[i] : 0; 10 | } 11 | } 12 | 13 | inline __device__ __host__ unsigned getSize(const int i) const { 14 | assert(i < Dim); 15 | return size_[i]; 16 | } 17 | 18 | inline __device__ __host__ int numElements() const { 19 | int n = 1; 20 | for (int i = 0; i < Dim; ++i) { 21 | n *= size_[i]; 22 | } 23 | return n; 24 | } 25 | 26 | inline __device__ __host__ DeviceTensor select(const size_t x) const { 27 | assert(Dim > 1); 28 | int offset = x; 29 | for (int i = 1; i < Dim; ++i) { 30 | offset *= size_[i]; 31 | } 32 | DeviceTensor tensor(dptr_ + offset, nullptr); 33 | for (int i = 0; i < Dim - 1; ++i) { 34 | tensor.size_[i] = this->size_[i+1]; 35 | } 36 | return tensor; 37 | } 38 | 39 | inline __device__ __host__ DeviceTensor operator[](const size_t x) const { 40 | assert(Dim > 1); 41 | int offset = x; 42 | for (int i = 1; i < Dim; ++i) { 43 | offset *= size_[i]; 44 | } 45 | DeviceTensor tensor(dptr_ + offset, nullptr); 46 | for (int i = 0; i < Dim - 1; ++i) { 47 | tensor.size_[i] = this->size_[i+1]; 48 | } 49 | return tensor; 50 | } 51 | 52 | inline __device__ __host__ size_t InnerSize() const { 53 | assert(Dim >= 3); 54 | size_t sz = 1; 55 | for (size_t i = 2; i < Dim; ++i) { 56 | sz *= size_[i]; 57 | } 58 | return sz; 59 | } 60 | 61 | inline __device__ __host__ size_t ChannelCount() const { 62 | assert(Dim >= 3); 63 | return size_[1]; 64 | } 65 | 66 | inline __device__ __host__ DType* data_ptr() const { 67 | return dptr_; 68 | } 69 | 70 | DType *dptr_; 71 | int size_[Dim]; 72 | }; 73 | 74 | template 75 | struct DeviceTensor { 76 | inline __device__ __host__ DeviceTensor(DType *p, const int *size) 77 | : dptr_(p) { 78 | size_[0] = size ? size[0] : 0; 79 | } 80 | 81 | inline __device__ __host__ unsigned getSize(const int i) const { 82 | assert(i == 0); 83 | return size_[0]; 84 | } 85 | 86 | inline __device__ __host__ int numElements() const { 87 | return size_[0]; 88 | } 89 | 90 | inline __device__ __host__ DType &operator[](const size_t x) const { 91 | return *(dptr_ + x); 92 | } 93 | 94 | inline __device__ __host__ DType* data_ptr() const { 95 | return dptr_; 96 | } 97 | 98 | DType *dptr_; 99 | int size_[1]; 100 | }; 101 | 102 | template 103 | static DeviceTensor devicetensor(const at::Tensor &blob) { 104 | DType *data = blob.data(); 105 | DeviceTensor tensor(data, nullptr); 106 | for (int i = 0; i < Dim; ++i) { 107 | tensor.size_[i] = blob.size(i); 108 | } 109 | return tensor; 110 | } 111 | -------------------------------------------------------------------------------- /encoding/lib/gpu/operator.cpp: -------------------------------------------------------------------------------- 1 | #include "operator.h" 2 | 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 4 | m.def("roi_align_forward", &ROIAlign_Forward_CUDA, "ROI Align forward (CUDA)"); 5 | m.def("roi_align_backward", &ROIAlign_Backward_CUDA, "ROI Align backward (CUDA)"); 6 | m.def("non_max_suppression", &Non_Max_Suppression_CUDA, "NMS (CUDA)"); 7 | m.def("aggregate_forward", &Aggregate_Forward_CUDA, "Aggregate forward (CUDA)"); 8 | m.def("aggregate_backward", &Aggregate_Backward_CUDA, "Aggregate backward (CUDA)"); 9 | m.def("scaled_l2_forward", &ScaledL2_Forward_CUDA, "ScaledL2 forward (CUDA)"); 10 | m.def("scaled_l2_backward", &ScaledL2_Backward_CUDA, "ScaledL2 backward (CUDA)"); 11 | m.def("batchnorm_forward", &BatchNorm_Forward_CUDA, "BatchNorm forward (CUDA)"); 12 | m.def("batchnorm_inp_forward", &BatchNorm_Forward_Inp_CUDA, "BatchNorm forward (CUDA)"); 13 | m.def("batchnorm_backward", &BatchNorm_Backward_CUDA, "BatchNorm backward (CUDA)"); 14 | m.def("batchnorm_inp_backward", &BatchNorm_Inp_Backward_CUDA, "BatchNorm backward (CUDA)"); 15 | m.def("expectation_forward", &Expectation_Forward_CUDA, "Expectation forward (CUDA)"); 16 | m.def("expectation_backward", &Expectation_Backward_CUDA, "Expectation backward (CUDA)"); 17 | m.def("expectation_inp_backward", &Expectation_Inp_Backward_CUDA, 18 | "Inplace Expectation backward (CUDA)"); 19 | m.def("leaky_relu_forward", &LeakyRelu_Forward_CUDA, "Learky ReLU forward (CUDA)"); 20 | m.def("leaky_relu_backward", &LeakyRelu_Backward_CUDA, "Learky ReLU backward (CUDA)"); 21 | m.def("conv_rectify", &CONV_RECTIFY_CUDA, "Convolution Rectifier (CUDA)"); 22 | // batched box encoder 23 | m.def("box_encoder", &box_encoder, "box_encoder"); 24 | m.def("random_horiz_flip", &random_horiz_flip, "random_horiz_flip"); 25 | } 26 | -------------------------------------------------------------------------------- /encoding/lib/gpu/operator.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | std::vector box_encoder( 6 | const int N_img, 7 | const at::Tensor& bbox_input, 8 | const at::Tensor& bbox_offsets, 9 | const at::Tensor& labels_input, 10 | const at::Tensor& dbox, 11 | const float criteria = 0.5); 12 | 13 | std::vector random_horiz_flip( 14 | at::Tensor& img, 15 | at::Tensor& bboxes, 16 | const at::Tensor& bbox_offsets, 17 | const float p, 18 | const bool nhwc); 19 | 20 | at::Tensor ROIAlign_Forward_CUDA( 21 | const at::Tensor input, 22 | const at::Tensor rois, 23 | int64_t pooled_height, 24 | int64_t pooled_width, 25 | double spatial_scale, 26 | int64_t sample_ratio); 27 | 28 | at::Tensor ROIAlign_Backward_CUDA( 29 | const at::Tensor rois, 30 | const at::Tensor grad_output, 31 | int64_t b_size, 32 | int64_t channels, 33 | int64_t height, 34 | int64_t width, 35 | int64_t pooled_height, 36 | int64_t pooled_width, 37 | double spatial_scale, 38 | int64_t sampling_ratio); 39 | 40 | std::vector Non_Max_Suppression_CUDA( 41 | const at::Tensor& input, 42 | const at::Tensor& scores, 43 | double thresh); 44 | 45 | at::Tensor Aggregate_Forward_CUDA( 46 | const at::Tensor A_, 47 | const at::Tensor X_, 48 | const at::Tensor C_); 49 | 50 | std::vector Aggregate_Backward_CUDA( 51 | const at::Tensor GE_, 52 | const at::Tensor A_, 53 | const at::Tensor X_, 54 | const at::Tensor C_); 55 | 56 | at::Tensor ScaledL2_Forward_CUDA( 57 | const at::Tensor X_, 58 | const at::Tensor C_, 59 | const at::Tensor S_); 60 | 61 | std::vector ScaledL2_Backward_CUDA( 62 | const at::Tensor GSL_, 63 | const at::Tensor X_, 64 | const at::Tensor C_, 65 | const at::Tensor S_, 66 | const at::Tensor SL_); 67 | 68 | at::Tensor BatchNorm_Forward_CUDA( 69 | const at::Tensor input_, 70 | const at::Tensor mean_, 71 | const at::Tensor std_, 72 | const at::Tensor gamma_, 73 | const at::Tensor beta_, 74 | float eps); 75 | 76 | at::Tensor BatchNorm_Forward_Inp_CUDA( 77 | const at::Tensor input_, 78 | const at::Tensor ex_, 79 | const at::Tensor exs_, 80 | const at::Tensor gamma_, 81 | const at::Tensor beta_, 82 | float eps); 83 | 84 | std::vector BatchNorm_Backward_CUDA( 85 | const at::Tensor gradoutput_, 86 | const at::Tensor input_, 87 | const at::Tensor ex_, 88 | const at::Tensor exs_, 89 | const at::Tensor gamma_, 90 | const at::Tensor beta_, 91 | float eps); 92 | 93 | std::vector BatchNorm_Inp_Backward_CUDA( 94 | const at::Tensor gradoutput_, 95 | const at::Tensor output_, 96 | const at::Tensor ex_, 97 | const at::Tensor exs_, 98 | const at::Tensor gamma_, 99 | const at::Tensor beta_, 100 | float eps); 101 | 102 | std::vector Expectation_Forward_CUDA( 103 | const at::Tensor input_); 104 | 105 | at::Tensor Expectation_Backward_CUDA( 106 | const at::Tensor input_, 107 | const at::Tensor gradEx_, 108 | const at::Tensor gradExs_); 109 | 110 | at::Tensor Expectation_Inp_Backward_CUDA( 111 | const at::Tensor gradInput_, 112 | const at::Tensor output_, 113 | const at::Tensor gradEx_, 114 | const at::Tensor gradExs_, 115 | const at::Tensor ex_, 116 | const at::Tensor exs_, 117 | const at::Tensor gamma_, 118 | const at::Tensor beta_, 119 | float eps); 120 | 121 | void LeakyRelu_Forward_CUDA(at::Tensor z, float slope); 122 | 123 | void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope); 124 | 125 | void CONV_RECTIFY_CUDA( 126 | at::Tensor& output, 127 | const at::Tensor& input, 128 | at::IntArrayRef kernel_size, 129 | at::IntArrayRef stride, 130 | at::IntArrayRef padding, 131 | at::IntArrayRef dilation, 132 | bool avg_mode); 133 | -------------------------------------------------------------------------------- /encoding/lib/gpu/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='enclib_gpu', 6 | ext_modules=[ 7 | CUDAExtension('enclib_gpu', [ 8 | 'operator.cpp', 9 | 'activation_kernel.cu', 10 | 'encoding_kernel.cu', 11 | 'syncbn_kernel.cu', 12 | 'roi_align_kernel.cu', 13 | 'nms_kernel.cu', 14 | 'rectify.cu', 15 | ]), 16 | ], 17 | cmdclass={ 18 | 'build_ext': BuildExtension 19 | }) 20 | -------------------------------------------------------------------------------- /encoding/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .model_zoo import get_model 2 | from .model_zoo import model_list 3 | from .model_store import get_model_file, pretrained_model_list 4 | 5 | from .sseg import get_segmentation_model, MultiEvalModule 6 | -------------------------------------------------------------------------------- /encoding/models/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .resnest import * 3 | from .resnext import * 4 | from .resnet_variants import * 5 | from .wideresnet import * 6 | from .xception import * 7 | -------------------------------------------------------------------------------- /encoding/models/backbone/resnest.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | """ResNeSt models""" 9 | 10 | import torch 11 | from .resnet import ResNet, Bottleneck 12 | from ..model_store import get_model_file 13 | 14 | __all__ = ['resnest50', 'resnest101', 'resnest200', 'resnest269'] 15 | 16 | _url_format = 'https://hangzh.s3.amazonaws.com/encoding/models/{}-{}.pth' 17 | 18 | 19 | def resnest50(pretrained=False, root='~/.encoding/models', **kwargs): 20 | model = ResNet(Bottleneck, [3, 4, 6, 3], 21 | radix=2, groups=1, bottleneck_width=64, 22 | deep_stem=True, stem_width=32, avg_down=True, 23 | avd=True, avd_first=False, **kwargs) 24 | if pretrained: 25 | model.load_state_dict(torch.load( 26 | get_model_file('resnest50', root=root)), strict=True) 27 | return model 28 | 29 | def resnest101(pretrained=False, root='~/.encoding/models', **kwargs): 30 | model = ResNet(Bottleneck, [3, 4, 23, 3], 31 | radix=2, groups=1, bottleneck_width=64, 32 | deep_stem=True, stem_width=64, avg_down=True, 33 | avd=True, avd_first=False, **kwargs) 34 | if pretrained: 35 | model.load_state_dict(torch.load( 36 | get_model_file('resnest101', root=root)), strict=True) 37 | return model 38 | 39 | def resnest200(pretrained=False, root='~/.encoding/models', **kwargs): 40 | model = ResNet(Bottleneck, [3, 24, 36, 3], 41 | radix=2, groups=1, bottleneck_width=64, 42 | deep_stem=True, stem_width=64, avg_down=True, 43 | avd=True, avd_first=False, **kwargs) 44 | if pretrained: 45 | model.load_state_dict(torch.load( 46 | get_model_file('resnest200', root=root)), strict=False) 47 | return model 48 | 49 | def resnest269(pretrained=False, root='~/.encoding/models', **kwargs): 50 | model = ResNet(Bottleneck, [3, 30, 48, 8], 51 | radix=2, groups=1, bottleneck_width=64, 52 | deep_stem=True, stem_width=64, avg_down=True, 53 | avd=True, avd_first=False, **kwargs) 54 | if pretrained: 55 | model.load_state_dict(torch.load( 56 | get_model_file('resnest269', root=root)), strict=True) 57 | return model 58 | 59 | def resnest50_fast(pretrained=False, root='~/.encoding/models', **kwargs): 60 | model = ResNet(Bottleneck, [3, 4, 6, 3], 61 | radix=2, groups=1, bottleneck_width=64, 62 | deep_stem=True, stem_width=32, avg_down=True, 63 | avd=True, avd_first=True, **kwargs) 64 | if pretrained: 65 | model.load_state_dict(torch.load( 66 | get_model_file('resnest50fast', root=root)), strict=True) 67 | return model 68 | 69 | def resnest101_fast(pretrained=False, root='~/.encoding/models', **kwargs): 70 | model = ResNet(Bottleneck, [3, 4, 23, 3], 71 | radix=2, groups=1, bottleneck_width=64, 72 | deep_stem=True, stem_width=64, avg_down=True, 73 | avd=True, avd_first=True, **kwargs) 74 | if pretrained: 75 | model.load_state_dict(torch.load( 76 | get_model_file('resnest101fast', root=root)), strict=True) 77 | return model 78 | -------------------------------------------------------------------------------- /encoding/models/backbone/resnet_variants.py: -------------------------------------------------------------------------------- 1 | """ResNet variants""" 2 | 3 | import torch 4 | from .resnet import ResNet, Bottleneck 5 | from ..model_store import get_model_file 6 | 7 | __all__ = ['resnet50s', 'resnet101s', 'resnet152s', 8 | 'resnet50d'] 9 | 10 | # pspnet version of ResNet 11 | def resnet50s(pretrained=False, root='~/.encoding/models', **kwargs): 12 | """Constructs a ResNetS-50 model as in PSPNet. 13 | 14 | Args: 15 | pretrained (bool): If True, returns a model pre-trained on ImageNet 16 | """ 17 | kwargs['deep_stem'] = True 18 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 19 | if pretrained: 20 | model.load_state_dict(torch.load( 21 | get_model_file('resnet50s', root=root)), strict=False) 22 | return model 23 | 24 | def resnet101s(pretrained=False, root='~/.encoding/models', **kwargs): 25 | """Constructs a ResNetS-101 model as in PSPNet. 26 | 27 | Args: 28 | pretrained (bool): If True, returns a model pre-trained on ImageNet 29 | """ 30 | kwargs['deep_stem'] = True 31 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 32 | if pretrained: 33 | model.load_state_dict(torch.load( 34 | get_model_file('resnet101s', root=root)), strict=False) 35 | return model 36 | 37 | def resnet152s(pretrained=False, root='~/.encoding/models', **kwargs): 38 | """Constructs a ResNetS-152 model as in PSPNet. 39 | 40 | Args: 41 | pretrained (bool): If True, returns a model pre-trained on ImageNet 42 | """ 43 | kwargs['deep_stem'] = True 44 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 45 | if pretrained: 46 | model.load_state_dict(torch.load( 47 | get_model_file('resnet152s', root=root)), strict=False) 48 | return model 49 | 50 | # ResNet-D 51 | def resnet50d(pretrained=False, root='~/.encoding/models', **kwargs): 52 | model = ResNet(Bottleneck, [3, 4, 6, 3], 53 | deep_stem=True, stem_width=32, 54 | avg_down=True, **kwargs) 55 | if pretrained: 56 | model.load_state_dict(torch.load( 57 | get_model_file('resnet50d', root=root)), strict=False) 58 | return model 59 | -------------------------------------------------------------------------------- /encoding/models/backbone/resnext.py: -------------------------------------------------------------------------------- 1 | """ResNeXt models""" 2 | 3 | from .resnet import ResNet, Bottleneck 4 | from ..model_store import get_model_file 5 | 6 | __all__ = ['resnext50_32x4d', 'resnext101_32x8d'] 7 | 8 | def resnext50_32x4d(pretrained=False, root='~/.encoding/models', **kwargs): 9 | r"""ResNeXt-50 32x4d model from 10 | `"Aggregated Residual Transformation for Deep Neural Networks" `_ 11 | 12 | Args: 13 | pretrained (bool): If True, returns a model pre-trained on ImageNet 14 | progress (bool): If True, displays a progress bar of the download to stderr 15 | """ 16 | kwargs['groups'] = 32 17 | kwargs['bottleneck_width'] = 4 18 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 19 | if pretrained: 20 | model.load_state_dict(torch.load( 21 | get_model_file('resnext50_32x4d', root=root)), strict=False) 22 | return model 23 | 24 | def resnext101_32x8d(pretrained=False, root='~/.encoding/models', **kwargs): 25 | r"""ResNeXt-101 32x8d model from 26 | `"Aggregated Residual Transformation for Deep Neural Networks" `_ 27 | 28 | Args: 29 | pretrained (bool): If True, returns a model pre-trained on ImageNet 30 | progress (bool): If True, displays a progress bar of the download to stderr 31 | """ 32 | kwargs['groups'] = 32 33 | kwargs['bottleneck_width'] = 8 34 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 35 | if pretrained: 36 | model.load_state_dict(torch.load( 37 | get_model_file('resnext101_32x8d', root=root)), strict=False) 38 | return model 39 | 40 | -------------------------------------------------------------------------------- /encoding/models/deepten.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | from ..nn import Encoding, View, Normalize 15 | from .backbone import resnet50s, resnet101s, resnet152s 16 | 17 | __all__ = ['DeepTen', 'get_deepten', 'get_deepten_resnet50_minc'] 18 | 19 | class DeepTen(nn.Module): 20 | def __init__(self, nclass, backbone): 21 | super(DeepTen, self).__init__() 22 | self.backbone = backbone 23 | # copying modules from pretrained models 24 | if self.backbone == 'resnet50': 25 | self.pretrained = resnet50s(pretrained=True, dilated=False) 26 | elif self.backbone == 'resnet101': 27 | self.pretrained = resnet101s(pretrained=True, dilated=False) 28 | elif self.backbone == 'resnet152': 29 | self.pretrained = resnet152s(pretrained=True, dilated=False) 30 | else: 31 | raise RuntimeError('unknown backbone: {}'.format(self.backbone)) 32 | n_codes = 32 33 | self.head = nn.Sequential( 34 | nn.Conv2d(2048, 128, 1), 35 | nn.BatchNorm2d(128), 36 | nn.ReLU(inplace=True), 37 | Encoding(D=128,K=n_codes), 38 | View(-1, 128*n_codes), 39 | Normalize(), 40 | nn.Linear(128*n_codes, nclass), 41 | ) 42 | 43 | def forward(self, x): 44 | _, _, h, w = x.size() 45 | x = self.pretrained.conv1(x) 46 | x = self.pretrained.bn1(x) 47 | x = self.pretrained.relu(x) 48 | x = self.pretrained.maxpool(x) 49 | x = self.pretrained.layer1(x) 50 | x = self.pretrained.layer2(x) 51 | x = self.pretrained.layer3(x) 52 | x = self.pretrained.layer4(x) 53 | return self.head(x) 54 | 55 | def get_deepten(dataset='pascal_voc', backbone='resnet50', pretrained=False, 56 | root='~/.encoding/models', **kwargs): 57 | r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network" 58 | `_ 59 | Parameters 60 | ---------- 61 | dataset : str, default pascal_voc 62 | The dataset that model pretrained on. (pascal_voc, ade20k) 63 | pretrained : bool, default False 64 | Whether to load the pretrained weights for model. 65 | root : str, default '~/.encoding/models' 66 | Location for keeping the model parameters. 67 | Examples 68 | -------- 69 | >>> model = get_deepten(dataset='minc', backbone='resnet50', pretrained=False) 70 | >>> print(model) 71 | """ 72 | from ..datasets import datasets, acronyms 73 | model = DeepTen(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, **kwargs) 74 | if pretrained: 75 | from .model_store import get_model_file 76 | model.load_state_dict(torch.load( 77 | get_model_file('deepten_%s_%s'%(backbone, acronyms[dataset]), root=root))) 78 | return model 79 | 80 | def get_deepten_resnet50_minc(pretrained=False, root='~/.encoding/models', **kwargs): 81 | r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network" 82 | `_ 83 | Parameters 84 | ---------- 85 | pretrained : bool, default False 86 | Whether to load the pretrained weights for model. 87 | root : str, default '~/.encoding/models' 88 | Location for keeping the model parameters. 89 | 90 | 91 | Examples 92 | -------- 93 | >>> model = get_deepten_resnet50_minc(pretrained=True) 94 | >>> print(model) 95 | """ 96 | return get_deepten(dataset='minc', backbone='resnet50', pretrained=pretrained, 97 | root=root, **kwargs) 98 | -------------------------------------------------------------------------------- /encoding/models/model_zoo.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import, unused-wildcard-import 2 | 3 | from .backbone import * 4 | from .sseg import * 5 | from .deepten import * 6 | 7 | __all__ = ['model_list', 'get_model'] 8 | 9 | models = { 10 | # resnet 11 | 'resnet50': resnet50, 12 | 'resnet101': resnet101, 13 | 'resnet152': resnet152, 14 | # resnest 15 | 'resnest50': resnest50, 16 | 'resnest101': resnest101, 17 | 'resnest200': resnest200, 18 | 'resnest269': resnest269, 19 | # resnet other variants 20 | 'resnet50s': resnet50s, 21 | 'resnet101s': resnet101s, 22 | 'resnet152s': resnet152s, 23 | 'resnet50d': resnet50d, 24 | 'resnext50_32x4d': resnext50_32x4d, 25 | 'resnext101_32x8d': resnext101_32x8d, 26 | # other segmentation backbones 27 | 'xception65': xception65, 28 | 'wideresnet38': wideresnet38, 29 | 'wideresnet50': wideresnet50, 30 | # deepten paper 31 | 'deepten_resnet50_minc': get_deepten_resnet50_minc, 32 | # segmentation resnet models 33 | 'encnet_resnet101s_coco': get_encnet_resnet101_coco, 34 | 'fcn_resnet50s_pcontext': get_fcn_resnet50_pcontext, 35 | 'encnet_resnet50s_pcontext': get_encnet_resnet50_pcontext, 36 | 'encnet_resnet101s_pcontext': get_encnet_resnet101_pcontext, 37 | 'encnet_resnet50s_ade': get_encnet_resnet50_ade, 38 | 'encnet_resnet101s_ade': get_encnet_resnet101_ade, 39 | 'fcn_resnet50s_ade': get_fcn_resnet50_ade, 40 | 'psp_resnet50s_ade': get_psp_resnet50_ade, 41 | # segmentation resnest models 42 | 'fcn_resnest50_ade': get_fcn_resnest50_ade, 43 | 'deeplab_resnest50_ade': get_deeplab_resnest50_ade, 44 | 'deeplab_resnest101_ade': get_deeplab_resnest101_ade, 45 | 'deeplab_resnest200_ade': get_deeplab_resnest200_ade, 46 | 'deeplab_resnest269_ade': get_deeplab_resnest269_ade, 47 | 'fcn_resnest50_pcontext': get_fcn_resnest50_pcontext, 48 | 'deeplab_resnest50_pcontext': get_deeplab_resnest50_pcontext, 49 | 'deeplab_resnest101_pcontext': get_deeplab_resnest101_pcontext, 50 | 'deeplab_resnest200_pcontext': get_deeplab_resnest200_pcontext, 51 | 'deeplab_resnest269_pcontext': get_deeplab_resnest269_pcontext, 52 | } 53 | 54 | model_list = list(models.keys()) 55 | 56 | def get_model(name, **kwargs): 57 | """Returns a pre-defined model by name 58 | 59 | Parameters 60 | ---------- 61 | name : str 62 | Name of the model. 63 | pretrained : bool 64 | Whether to load the pretrained weights for model. 65 | root : str, default '~/.encoding/models' 66 | Location for keeping the model parameters. 67 | 68 | Returns 69 | ------- 70 | Module: 71 | The model. 72 | """ 73 | name = name.lower() 74 | if name not in models: 75 | raise ValueError('%s\n\t%s' % (str(name), '\n\t'.join(sorted(models.keys())))) 76 | net = models[name](**kwargs) 77 | return net 78 | -------------------------------------------------------------------------------- /encoding/models/sseg/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | from .fcn import * 3 | from .psp import * 4 | from .fcfpn import * 5 | from .atten import * 6 | from .encnet import * 7 | from .deeplab import * 8 | from .upernet import * 9 | from .dran import * 10 | from .danet import * 11 | 12 | def get_segmentation_model(name, **kwargs): 13 | models = { 14 | 'fcn': get_fcn, 15 | 'psp': get_psp, 16 | 'fcfpn': get_fcfpn, 17 | 'atten': get_atten, 18 | 'encnet': get_encnet, 19 | 'upernet': get_upernet, 20 | 'deeplab': get_deeplab, 21 | 'dran':get_dran, 22 | 'danet': get_danet 23 | } 24 | return models[name.lower()](**kwargs) 25 | -------------------------------------------------------------------------------- /encoding/models/sseg/psp.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | from __future__ import division 7 | import os 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | from torch.nn.functional import interpolate 12 | 13 | from .base import BaseNet 14 | from .fcn import FCNHead 15 | from ...nn import PyramidPooling 16 | 17 | class PSP(BaseNet): 18 | def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs): 19 | super(PSP, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs) 20 | self.head = PSPHead(2048, nclass, norm_layer, self._up_kwargs) 21 | if aux: 22 | self.auxlayer = FCNHead(1024, nclass, norm_layer) 23 | 24 | def forward(self, x): 25 | _, _, h, w = x.size() 26 | _, _, c3, c4 = self.base_forward(x) 27 | 28 | outputs = [] 29 | x = self.head(c4) 30 | x = interpolate(x, (h,w), **self._up_kwargs) 31 | outputs.append(x) 32 | if self.aux: 33 | auxout = self.auxlayer(c3) 34 | auxout = interpolate(auxout, (h,w), **self._up_kwargs) 35 | outputs.append(auxout) 36 | return tuple(outputs) 37 | 38 | 39 | class PSPHead(nn.Module): 40 | def __init__(self, in_channels, out_channels, norm_layer, up_kwargs): 41 | super(PSPHead, self).__init__() 42 | inter_channels = in_channels // 4 43 | self.conv5 = nn.Sequential(PyramidPooling(in_channels, norm_layer, up_kwargs), 44 | nn.Conv2d(in_channels * 2, inter_channels, 3, padding=1, bias=False), 45 | norm_layer(inter_channels), 46 | nn.ReLU(True), 47 | nn.Dropout(0.1, False), 48 | nn.Conv2d(inter_channels, out_channels, 1)) 49 | 50 | def forward(self, x): 51 | return self.conv5(x) 52 | 53 | def get_psp(dataset='pascal_voc', backbone='resnet50s', pretrained=False, 54 | root='~/.encoding/models', **kwargs): 55 | # infer number of classes 56 | from ...datasets import datasets, acronyms 57 | model = PSP(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) 58 | if pretrained: 59 | from ..model_store import get_model_file 60 | model.load_state_dict(torch.load( 61 | get_model_file('psp_%s_%s'%(backbone, acronyms[dataset]), root=root))) 62 | return model 63 | 64 | def get_psp_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs): 65 | r"""PSP model from the paper `"Context Encoding for Semantic Segmentation" 66 | `_ 67 | 68 | Parameters 69 | ---------- 70 | pretrained : bool, default False 71 | Whether to load the pretrained weights for model. 72 | root : str, default '~/.encoding/models' 73 | Location for keeping the model parameters. 74 | 75 | 76 | Examples 77 | -------- 78 | >>> model = get_psp_resnet50_ade(pretrained=True) 79 | >>> print(model) 80 | """ 81 | return get_psp('ade20k', 'resnet50s', pretrained, root=root, **kwargs) 82 | -------------------------------------------------------------------------------- /encoding/models/sseg/upernet.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | from __future__ import division 7 | import os 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | from torch.nn.functional import upsample 12 | 13 | from .base import BaseNet 14 | from .fcfpn import FCFPNHead 15 | from ...nn import PyramidPooling 16 | 17 | torch_ver = torch.__version__[:3] 18 | 19 | __all__ = ['UperNet', 'get_upernet', 'get_upernet_50_ade'] 20 | 21 | class UperNet(BaseNet): 22 | r"""Fully Convolutional Networks for Semantic Segmentation 23 | 24 | Parameters 25 | ---------- 26 | nclass : int 27 | Number of categories for the training dataset. 28 | backbone : string 29 | Pre-trained dilated backbone network type (default:'resnet50s'; 'resnet50s', 30 | 'resnet101s' or 'resnet152s'). 31 | norm_layer : object 32 | Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`; 33 | 34 | 35 | Reference: 36 | 37 | Long, Jonathan, Evan Shelhamer, and Trevor Darrell. "Fully convolutional networks 38 | for semantic segmentation." *CVPR*, 2015 39 | 40 | Examples 41 | -------- 42 | >>> model = UperNet(nclass=21, backbone='resnet50s') 43 | >>> print(model) 44 | """ 45 | def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs): 46 | super(UperNet, self).__init__(nclass, backbone, aux, se_loss, dilated=False, norm_layer=norm_layer) 47 | self.head = UperNetHead(nclass, norm_layer, up_kwargs=self._up_kwargs) 48 | assert not aux, "UperNet does not support aux loss" 49 | 50 | def forward(self, x): 51 | imsize = x.size()[2:] 52 | features = self.base_forward(x) 53 | 54 | x = list(self.head(*features)) 55 | x[0] = upsample(x[0], imsize, **self._up_kwargs) 56 | return tuple(x) 57 | 58 | 59 | class UperNetHead(FCFPNHead): 60 | def __init__(self, out_channels, norm_layer=None, fpn_inchannels=[256, 512, 1024, 2048], 61 | fpn_dim=256, up_kwargs=None): 62 | fpn_inchannels[-1] = fpn_inchannels[-1] * 2 63 | super(UperNetHead, self).__init__(out_channels, norm_layer, fpn_inchannels, 64 | fpn_dim, up_kwargs) 65 | self.extramodule = PyramidPooling(fpn_inchannels[-1] // 2, norm_layer, up_kwargs) 66 | 67 | 68 | def get_upernet(dataset='pascal_voc', backbone='resnet50s', pretrained=False, 69 | root='~/.encoding/models', **kwargs): 70 | r"""UperNet model from the paper `"Fully Convolutional Network for semantic segmentation" 71 | `_ 72 | Parameters 73 | ---------- 74 | dataset : str, default pascal_voc 75 | The dataset that model pretrained on. (pascal_voc, ade20k) 76 | pretrained : bool, default False 77 | Whether to load the pretrained weights for model. 78 | root : str, default '~/.encoding/models' 79 | Location for keeping the model parameters. 80 | Examples 81 | -------- 82 | >>> model = get_upernet(dataset='pascal_voc', backbone='resnet50s', pretrained=False) 83 | >>> print(model) 84 | """ 85 | acronyms = { 86 | 'pascal_voc': 'voc', 87 | 'pascal_aug': 'voc', 88 | 'ade20k': 'ade', 89 | } 90 | # infer number of classes 91 | from ...datasets import datasets, VOCSegmentation, VOCAugSegmentation, ADE20KSegmentation 92 | model = UperNet(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, **kwargs) 93 | if pretrained: 94 | from ..model_store import get_model_file 95 | model.load_state_dict(torch.load( 96 | get_model_file('upernet_%s_%s'%(backbone, acronyms[dataset]), root=root))) 97 | return model 98 | 99 | 100 | def get_upernet_50_ade(pretrained=False, root='~/.encoding/models', **kwargs): 101 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation" 102 | `_ 103 | 104 | Parameters 105 | ---------- 106 | pretrained : bool, default False 107 | Whether to load the pretrained weights for model. 108 | root : str, default '~/.encoding/models' 109 | Location for keeping the model parameters. 110 | 111 | 112 | Examples 113 | -------- 114 | >>> model = get_upernet_50_ade(pretrained=True) 115 | >>> print(model) 116 | """ 117 | return get_upernet('ade20k', 'resnet50s', pretrained) 118 | -------------------------------------------------------------------------------- /encoding/nn/__init__.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding NN Modules""" 12 | from .encoding import * 13 | from .syncbn import * 14 | from .customize import * 15 | from .attention import * 16 | from .loss import * 17 | from .rectify import * 18 | from .splat import SplAtConv2d 19 | from .dropblock import * 20 | from .dran_att import * 21 | from .da_att import * 22 | -------------------------------------------------------------------------------- /encoding/nn/da_att.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: CASIA IVA 3 | # Email: jliu@nlpr.ia.ac.cn 4 | # Copyright (c) 2018 5 | ########################################################################### 6 | 7 | import numpy as np 8 | import torch 9 | import math 10 | from torch.nn import Module, Sequential, Conv2d, ReLU,AdaptiveMaxPool2d, AdaptiveAvgPool2d, \ 11 | NLLLoss, BCELoss, CrossEntropyLoss, AvgPool2d, MaxPool2d, Parameter, Linear, Sigmoid, Softmax, Dropout, Embedding 12 | from torch.nn import functional as F 13 | from torch.autograd import Variable 14 | torch_ver = torch.__version__[:3] 15 | 16 | __all__ = ['PAM_Module', 'CAM_Module'] 17 | 18 | 19 | class PAM_Module(Module): 20 | """ Position attention module""" 21 | #Ref from SAGAN 22 | def __init__(self, in_dim): 23 | super(PAM_Module, self).__init__() 24 | self.chanel_in = in_dim 25 | 26 | self.query_conv = Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1) 27 | self.key_conv = Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1) 28 | self.value_conv = Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1) 29 | self.gamma = Parameter(torch.zeros(1)) 30 | 31 | self.softmax = Softmax(dim=-1) 32 | def forward(self, x): 33 | """ 34 | inputs : 35 | x : input feature maps( B X C X H X W) 36 | returns : 37 | out : attention value + input feature 38 | attention: B X (HxW) X (HxW) 39 | """ 40 | m_batchsize, C, height, width = x.size() 41 | proj_query = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0, 2, 1) 42 | proj_key = self.key_conv(x).view(m_batchsize, -1, width*height) 43 | energy = torch.bmm(proj_query, proj_key) 44 | attention = self.softmax(energy) 45 | proj_value = self.value_conv(x).view(m_batchsize, -1, width*height) 46 | 47 | out = torch.bmm(proj_value, attention.permute(0, 2, 1)) 48 | out = out.view(m_batchsize, C, height, width) 49 | 50 | out = self.gamma*out + x 51 | return out 52 | 53 | 54 | class CAM_Module(Module): 55 | """ Channel attention module""" 56 | def __init__(self, in_dim): 57 | super(CAM_Module, self).__init__() 58 | self.chanel_in = in_dim 59 | 60 | 61 | self.gamma = Parameter(torch.zeros(1)) 62 | self.softmax = Softmax(dim=-1) 63 | def forward(self,x): 64 | """ 65 | inputs : 66 | x : input feature maps( B X C X H X W) 67 | returns : 68 | out : attention value + input feature 69 | attention: B X C X C 70 | """ 71 | m_batchsize, C, height, width = x.size() 72 | proj_query = x.view(m_batchsize, C, -1) 73 | proj_key = x.view(m_batchsize, C, -1).permute(0, 2, 1) 74 | energy = torch.bmm(proj_query, proj_key) 75 | energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy)-energy 76 | attention = self.softmax(energy_new) 77 | proj_value = x.view(m_batchsize, C, -1) 78 | 79 | out = torch.bmm(attention, proj_value) 80 | out = out.view(m_batchsize, C, height, width) 81 | 82 | out = self.gamma*out + x 83 | return out 84 | 85 | -------------------------------------------------------------------------------- /encoding/nn/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | __all__ = ['LabelSmoothing', 'NLLMultiLabelSmooth', 'SegmentationLosses'] 7 | 8 | class LabelSmoothing(nn.Module): 9 | """ 10 | NLL loss with label smoothing. 11 | """ 12 | def __init__(self, smoothing=0.1): 13 | """ 14 | Constructor for the LabelSmoothing module. 15 | :param smoothing: label smoothing factor 16 | """ 17 | super(LabelSmoothing, self).__init__() 18 | self.confidence = 1.0 - smoothing 19 | self.smoothing = smoothing 20 | 21 | def forward(self, x, target): 22 | logprobs = torch.nn.functional.log_softmax(x, dim=-1) 23 | 24 | nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) 25 | nll_loss = nll_loss.squeeze(1) 26 | smooth_loss = -logprobs.mean(dim=-1) 27 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 28 | return loss.mean() 29 | 30 | class NLLMultiLabelSmooth(nn.Module): 31 | def __init__(self, smoothing = 0.1): 32 | super(NLLMultiLabelSmooth, self).__init__() 33 | self.confidence = 1.0 - smoothing 34 | self.smoothing = smoothing 35 | 36 | def forward(self, x, target): 37 | if self.training: 38 | x = x.float() 39 | target = target.float() 40 | logprobs = torch.nn.functional.log_softmax(x, dim = -1) 41 | 42 | nll_loss = -logprobs * target 43 | nll_loss = nll_loss.sum(-1) 44 | 45 | smooth_loss = -logprobs.mean(dim=-1) 46 | 47 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 48 | 49 | return loss.mean() 50 | else: 51 | return torch.nn.functional.cross_entropy(x, target) 52 | 53 | class SegmentationLosses(nn.CrossEntropyLoss): 54 | """2D Cross Entropy Loss with Auxilary Loss""" 55 | def __init__(self, se_loss=False, se_weight=0.2, nclass=-1, 56 | aux=False, aux_weight=0.4, weight=None, 57 | ignore_index=-1): 58 | super(SegmentationLosses, self).__init__(weight, None, ignore_index) 59 | self.se_loss = se_loss 60 | self.aux = aux 61 | self.nclass = nclass 62 | self.se_weight = se_weight 63 | self.aux_weight = aux_weight 64 | self.bceloss = nn.BCELoss(weight) 65 | 66 | def forward(self, *inputs): 67 | if not self.se_loss and not self.aux: 68 | return super(SegmentationLosses, self).forward(*inputs) 69 | elif not self.se_loss: 70 | pred1, pred2, target = tuple(inputs) 71 | loss1 = super(SegmentationLosses, self).forward(pred1, target) 72 | loss2 = super(SegmentationLosses, self).forward(pred2, target) 73 | return loss1 + self.aux_weight * loss2 74 | elif not self.aux: 75 | pred, se_pred, target = tuple(inputs) 76 | se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred) 77 | loss1 = super(SegmentationLosses, self).forward(pred, target) 78 | loss2 = self.bceloss(torch.sigmoid(se_pred), se_target) 79 | return loss1 + self.se_weight * loss2 80 | else: 81 | pred1, se_pred, pred2, target = tuple(inputs) 82 | se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred1) 83 | loss1 = super(SegmentationLosses, self).forward(pred1, target) 84 | loss2 = super(SegmentationLosses, self).forward(pred2, target) 85 | loss3 = self.bceloss(torch.sigmoid(se_pred), se_target) 86 | return loss1 + self.aux_weight * loss2 + self.se_weight * loss3 87 | 88 | @staticmethod 89 | def _get_batch_label_vector(target, nclass): 90 | # target is a 3D Variable BxHxW, output is 2D BxnClass 91 | batch = target.size(0) 92 | tvect = Variable(torch.zeros(batch, nclass)) 93 | for i in range(batch): 94 | hist = torch.histc(target[i].cpu().data.float(), 95 | bins=nclass, min=0, 96 | max=nclass-1) 97 | vect = hist>0 98 | tvect[i] = vect 99 | return tvect 100 | -------------------------------------------------------------------------------- /encoding/nn/rectify.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | 9 | """Rectify Module""" 10 | import warnings 11 | 12 | import torch 13 | from torch.nn import Conv2d 14 | import torch.nn.functional as F 15 | from torch.nn.modules.utils import _pair 16 | 17 | from ..functions import rectify 18 | 19 | __all__ = ['RFConv2d'] 20 | 21 | 22 | class RFConv2d(Conv2d): 23 | """Rectified Convolution 24 | """ 25 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 26 | padding=0, dilation=1, groups=1, 27 | bias=True, padding_mode='zeros', 28 | average_mode=False): 29 | kernel_size = _pair(kernel_size) 30 | stride = _pair(stride) 31 | padding = _pair(padding) 32 | dilation = _pair(dilation) 33 | self.rectify = average_mode or (padding[0] > 0 or padding[1] > 0) 34 | self.average = average_mode 35 | 36 | super(RFConv2d, self).__init__( 37 | in_channels, out_channels, kernel_size, stride=stride, 38 | padding=padding, dilation=dilation, groups=groups, 39 | bias=bias, padding_mode=padding_mode) 40 | 41 | def _conv_forward(self, input, weight): 42 | if self.padding_mode != 'zeros': 43 | return F.conv2d(F.pad(input, self._padding_repeated_twice, mode=self.padding_mode), 44 | weight, self.bias, self.stride, 45 | _pair(0), self.dilation, self.groups) 46 | return F.conv2d(input, weight, self.bias, self.stride, 47 | self.padding, self.dilation, self.groups) 48 | 49 | def forward(self, input): 50 | output = self._conv_forward(input, self.weight) 51 | if self.rectify: 52 | output = rectify(output, input, self.kernel_size, self.stride, 53 | self.padding, self.dilation, self.average) 54 | return output 55 | 56 | def extra_repr(self): 57 | return super().extra_repr() + ', rectify={}, average_mode={}'. \ 58 | format(self.rectify, self.average) 59 | -------------------------------------------------------------------------------- /encoding/nn/splat.py: -------------------------------------------------------------------------------- 1 | """Split-Attention""" 2 | 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | from torch.nn import Conv2d, Module, Linear, BatchNorm2d, ReLU 7 | from torch.nn.modules.utils import _pair 8 | 9 | from .rectify import RFConv2d 10 | from .dropblock import DropBlock2D 11 | 12 | __all__ = ['SplAtConv2d'] 13 | 14 | class SplAtConv2d(Module): 15 | """Split-Attention Conv2d 16 | """ 17 | def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0), 18 | dilation=(1, 1), groups=1, bias=True, 19 | radix=2, reduction_factor=4, 20 | rectify=False, rectify_avg=False, norm_layer=None, 21 | dropblock_prob=0.0, **kwargs): 22 | super(SplAtConv2d, self).__init__() 23 | padding = _pair(padding) 24 | self.rectify = rectify and (padding[0] > 0 or padding[1] > 0) 25 | self.rectify_avg = rectify_avg 26 | inter_channels = max(in_channels*radix//reduction_factor, 32) 27 | self.radix = radix 28 | self.cardinality = groups 29 | self.channels = channels 30 | self.dropblock_prob = dropblock_prob 31 | if self.rectify: 32 | self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation, 33 | groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs) 34 | else: 35 | self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation, 36 | groups=groups*radix, bias=bias, **kwargs) 37 | self.use_bn = norm_layer is not None 38 | self.bn0 = norm_layer(channels*radix) 39 | self.relu = ReLU(inplace=True) 40 | self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality) 41 | self.bn1 = norm_layer(inter_channels) 42 | self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality) 43 | if dropblock_prob > 0.0: 44 | self.dropblock = DropBlock2D(dropblock_prob, 3) 45 | self.rsoftmax = rSoftMax(radix, groups) 46 | 47 | def forward(self, x): 48 | x = self.conv(x) 49 | if self.use_bn: 50 | x = self.bn0(x) 51 | if self.dropblock_prob > 0.0: 52 | x = self.dropblock(x) 53 | x = self.relu(x) 54 | 55 | batch, channel = x.shape[:2] 56 | if self.radix > 1: 57 | splited = torch.split(x, channel//self.radix, dim=1) 58 | gap = sum(splited) 59 | else: 60 | gap = x 61 | gap = F.adaptive_avg_pool2d(gap, 1) 62 | gap = self.fc1(gap) 63 | 64 | if self.use_bn: 65 | gap = self.bn1(gap) 66 | gap = self.relu(gap) 67 | 68 | atten = self.fc2(gap) 69 | atten = self.rsoftmax(atten).view(batch, -1, 1, 1) 70 | 71 | if self.radix > 1: 72 | atten = torch.split(atten, channel//self.radix, dim=1) 73 | out = sum([att*split for (att, split) in zip(atten, splited)]) 74 | else: 75 | out = atten * x 76 | return out.contiguous() 77 | 78 | class rSoftMax(nn.Module): 79 | def __init__(self, radix, cardinality): 80 | super().__init__() 81 | self.radix = radix 82 | self.cardinality = cardinality 83 | 84 | def forward(self, x): 85 | batch = x.size(0) 86 | if self.radix > 1: 87 | x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2) 88 | x = F.softmax(x, dim=1) 89 | x = x.reshape(batch, -1) 90 | else: 91 | x = torch.sigmoid(x) 92 | return x 93 | -------------------------------------------------------------------------------- /encoding/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import * 2 | from .get_transform import get_transform 3 | -------------------------------------------------------------------------------- /encoding/transforms/get_transform.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | import torch 9 | from torchvision.transforms import * 10 | from .transforms import * 11 | 12 | def get_transform(dataset, base_size=None, crop_size=224, rand_aug=False, etrans=True, **kwargs): 13 | normalize = Normalize(mean=[0.485, 0.456, 0.406], 14 | std=[0.229, 0.224, 0.225]) 15 | base_size = base_size if base_size is not None else int(1.0 * crop_size / 0.875) 16 | if dataset == 'imagenet': 17 | train_transforms = [] 18 | val_transforms = [] 19 | if rand_aug: 20 | from .autoaug import RandAugment 21 | train_transforms.append(RandAugment(2, 12)) 22 | if etrans: 23 | train_transforms.extend([ 24 | ERandomCrop(crop_size), 25 | ]) 26 | val_transforms.extend([ 27 | ECenterCrop(crop_size), 28 | ]) 29 | 30 | else: 31 | train_transforms.extend([ 32 | RandomResizedCrop(crop_size), 33 | ]) 34 | val_transforms.extend([ 35 | Resize(base_size), 36 | CenterCrop(crop_size), 37 | ]) 38 | train_transforms.extend([ 39 | RandomHorizontalFlip(), 40 | ColorJitter(0.4, 0.4, 0.4), 41 | ToTensor(), 42 | Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']), 43 | normalize, 44 | ]) 45 | val_transforms.extend([ 46 | ToTensor(), 47 | normalize, 48 | ]) 49 | transform_train = Compose(train_transforms) 50 | transform_val = Compose(val_transforms) 51 | elif dataset == 'minc': 52 | transform_train = Compose([ 53 | Resize(base_size), 54 | RandomResizedCrop(crop_size), 55 | RandomHorizontalFlip(), 56 | ColorJitter(0.4, 0.4, 0.4), 57 | ToTensor(), 58 | Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']), 59 | normalize, 60 | ]) 61 | transform_val = Compose([ 62 | Resize(base_size), 63 | CenterCrop(crop_size), 64 | ToTensor(), 65 | normalize, 66 | ]) 67 | elif dataset == 'cifar10': 68 | transform_train = Compose([ 69 | RandomCrop(32, padding=4), 70 | RandomHorizontalFlip(), 71 | ToTensor(), 72 | Normalize((0.4914, 0.4822, 0.4465), 73 | (0.2023, 0.1994, 0.2010)), 74 | ]) 75 | transform_val = Compose([ 76 | ToTensor(), 77 | Normalize((0.4914, 0.4822, 0.4465), 78 | (0.2023, 0.1994, 0.2010)), 79 | ]) 80 | return transform_train, transform_val 81 | 82 | _imagenet_pca = { 83 | 'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), 84 | 'eigvec': torch.Tensor([ 85 | [-0.5675, 0.7192, 0.4009], 86 | [-0.5808, -0.0045, -0.8140], 87 | [-0.5836, -0.6948, 0.4203], 88 | ]) 89 | } 90 | -------------------------------------------------------------------------------- /encoding/utils/__init__.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding Util Tools""" 12 | from .lr_scheduler import * 13 | from .metrics import * 14 | from .pallete import get_mask_pallete 15 | from .train_helper import * 16 | from .presets import load_image 17 | from .files import * 18 | from .misc import * 19 | from .dist_helper import * 20 | -------------------------------------------------------------------------------- /encoding/utils/dist_helper.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import torch 12 | 13 | __all__ = ['torch_dist_sum'] 14 | 15 | def torch_dist_sum(gpu, *args): 16 | process_group = torch.distributed.group.WORLD 17 | tensor_args = [] 18 | pending_res = [] 19 | for arg in args: 20 | if isinstance(arg, torch.Tensor): 21 | tensor_arg = arg.clone().reshape(-1).detach().cuda(gpu) 22 | else: 23 | tensor_arg = torch.tensor(arg).reshape(-1).cuda(gpu) 24 | tensor_args.append(tensor_arg) 25 | pending_res.append(torch.distributed.all_reduce(tensor_arg, group=process_group, async_op=True)) 26 | for res in pending_res: 27 | res.wait() 28 | return tensor_args 29 | -------------------------------------------------------------------------------- /encoding/utils/files.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import errno 4 | import shutil 5 | import hashlib 6 | from tqdm import tqdm 7 | import torch 8 | 9 | __all__ = ['save_checkpoint', 'download', 'mkdir', 'check_sha1'] 10 | 11 | def save_checkpoint(state, args, is_best, filename='checkpoint.pth.tar'): 12 | """Saves checkpoint to disk""" 13 | if hasattr(args, 'backbone'): 14 | directory = "runs/%s/%s/%s/%s/"%(args.dataset, args.model, args.backbone, args.checkname) 15 | else: 16 | directory = "runs/%s/%s/%s/"%(args.dataset, args.model, args.checkname) 17 | if not os.path.exists(directory): 18 | os.makedirs(directory) 19 | filename = directory + filename 20 | torch.save(state, filename) 21 | if is_best: 22 | shutil.copyfile(filename, directory + 'model_best.pth.tar') 23 | 24 | 25 | def download(url, path=None, overwrite=False, sha1_hash=None): 26 | """Download an given URL 27 | Parameters 28 | ---------- 29 | url : str 30 | URL to download 31 | path : str, optional 32 | Destination path to store downloaded file. By default stores to the 33 | current directory with same name as in url. 34 | overwrite : bool, optional 35 | Whether to overwrite destination file if already exists. 36 | sha1_hash : str, optional 37 | Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified 38 | but doesn't match. 39 | Returns 40 | ------- 41 | str 42 | The file path of the downloaded file. 43 | """ 44 | if path is None: 45 | fname = url.split('/')[-1] 46 | else: 47 | path = os.path.expanduser(path) 48 | if os.path.isdir(path): 49 | fname = os.path.join(path, url.split('/')[-1]) 50 | else: 51 | fname = path 52 | 53 | if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)): 54 | dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname))) 55 | if not os.path.exists(dirname): 56 | os.makedirs(dirname) 57 | 58 | print('Downloading %s from %s...'%(fname, url)) 59 | r = requests.get(url, stream=True) 60 | if r.status_code != 200: 61 | raise RuntimeError("Failed downloading url %s"%url) 62 | total_length = r.headers.get('content-length') 63 | with open(fname, 'wb') as f: 64 | if total_length is None: # no content length header 65 | for chunk in r.iter_content(chunk_size=1024): 66 | if chunk: # filter out keep-alive new chunks 67 | f.write(chunk) 68 | else: 69 | total_length = int(total_length) 70 | for chunk in tqdm(r.iter_content(chunk_size=1024), 71 | total=int(total_length / 1024. + 0.5), 72 | unit='KB', unit_scale=False, dynamic_ncols=True): 73 | f.write(chunk) 74 | 75 | if sha1_hash and not check_sha1(fname, sha1_hash): 76 | raise UserWarning('File {} is downloaded but the content hash does not match. ' \ 77 | 'The repo may be outdated or download may be incomplete. ' \ 78 | 'If the "repo_url" is overridden, consider switching to ' \ 79 | 'the default repo.'.format(fname)) 80 | 81 | return fname 82 | 83 | 84 | def check_sha1(filename, sha1_hash): 85 | """Check whether the sha1 hash of the file content matches the expected hash. 86 | Parameters 87 | ---------- 88 | filename : str 89 | Path to the file. 90 | sha1_hash : str 91 | Expected sha1 hash in hexadecimal digits. 92 | Returns 93 | ------- 94 | bool 95 | Whether the file content matches the expected hash. 96 | """ 97 | sha1 = hashlib.sha1() 98 | with open(filename, 'rb') as f: 99 | while True: 100 | data = f.read(1048576) 101 | if not data: 102 | break 103 | sha1.update(data) 104 | 105 | return sha1.hexdigest() == sha1_hash 106 | 107 | 108 | def mkdir(path): 109 | """make dir exists okay""" 110 | try: 111 | os.makedirs(path) 112 | except OSError as exc: # Python >2.5 113 | if exc.errno == errno.EEXIST and os.path.isdir(path): 114 | pass 115 | else: 116 | raise 117 | -------------------------------------------------------------------------------- /encoding/utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import math 12 | 13 | __all__ = ['LR_Scheduler', 'LR_Scheduler_Head'] 14 | 15 | class LR_Scheduler(object): 16 | """Learning Rate Scheduler 17 | 18 | Step mode: ``lr = baselr * 0.1 ^ {floor(epoch-1 / lr_step)}`` 19 | 20 | Cosine mode: ``lr = baselr * 0.5 * (1 + cos(iter/maxiter))`` 21 | 22 | Poly mode: ``lr = baselr * (1 - iter/maxiter) ^ 0.9`` 23 | 24 | Args: 25 | args: :attr:`args.lr_scheduler` lr scheduler mode (`cos`, `poly`), 26 | :attr:`args.lr` base learning rate, :attr:`args.epochs` number of epochs, 27 | :attr:`args.lr_step` 28 | 29 | iters_per_epoch: number of iterations per epoch 30 | """ 31 | def __init__(self, mode, base_lr, num_epochs, iters_per_epoch=0, 32 | lr_step=0, warmup_epochs=0, quiet=False): 33 | self.mode = mode 34 | self.quiet = quiet 35 | if not quiet: 36 | print('Using {} LR scheduler with warm-up epochs of {}!'.format(self.mode, warmup_epochs)) 37 | if mode == 'step': 38 | assert lr_step 39 | self.base_lr = base_lr 40 | self.lr_step = lr_step 41 | self.iters_per_epoch = iters_per_epoch 42 | self.epoch = -1 43 | self.warmup_iters = warmup_epochs * iters_per_epoch 44 | self.total_iters = (num_epochs - warmup_epochs) * iters_per_epoch 45 | 46 | def __call__(self, optimizer, i, epoch, best_pred): 47 | T = epoch * self.iters_per_epoch + i 48 | # warm up lr schedule 49 | if self.warmup_iters > 0 and T < self.warmup_iters: 50 | lr = self.base_lr * 1.0 * T / self.warmup_iters 51 | elif self.mode == 'cos': 52 | T = T - self.warmup_iters 53 | lr = 0.5 * self.base_lr * (1 + math.cos(1.0 * T / self.total_iters * math.pi)) 54 | elif self.mode == 'poly': 55 | T = T - self.warmup_iters 56 | lr = self.base_lr * pow((1 - 1.0 * T / self.total_iters), 0.9) 57 | elif self.mode == 'step': 58 | lr = self.base_lr * (0.1 ** (epoch // self.lr_step)) 59 | else: 60 | raise NotImplemented 61 | if epoch > self.epoch and (epoch == 0 or best_pred > 0.0): 62 | if not self.quiet: 63 | print('\n=>Epoch %i, learning rate = %.4f, \ 64 | previous best = %.4f' % (epoch, lr, best_pred)) 65 | self.epoch = epoch 66 | assert lr >= 0 67 | self._adjust_learning_rate(optimizer, lr) 68 | 69 | def _adjust_learning_rate(self, optimizer, lr): 70 | for i in range(len(optimizer.param_groups)): 71 | optimizer.param_groups[i]['lr'] = lr 72 | 73 | class LR_Scheduler_Head(LR_Scheduler): 74 | """Incease the additional head LR to be 10 times""" 75 | def _adjust_learning_rate(self, optimizer, lr): 76 | if len(optimizer.param_groups) == 1: 77 | optimizer.param_groups[0]['lr'] = lr 78 | else: 79 | # enlarge the lr at the head 80 | optimizer.param_groups[0]['lr'] = lr 81 | for i in range(1, len(optimizer.param_groups)): 82 | optimizer.param_groups[i]['lr'] = lr * 10 83 | -------------------------------------------------------------------------------- /encoding/utils/misc.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | import warnings 9 | 10 | __all__ = ['AverageMeter', 'EncodingDeprecationWarning'] 11 | 12 | class AverageMeter(object): 13 | """Computes and stores the average and current value""" 14 | def __init__(self): 15 | self.reset() 16 | 17 | def reset(self): 18 | #self.val = 0 19 | self.sum = 0 20 | self.count = 0 21 | 22 | def update(self, val, n=1): 23 | #self.val = val 24 | self.sum += val * n 25 | self.count += n 26 | 27 | @property 28 | def avg(self): 29 | avg = 0 if self.count == 0 else self.sum / self.count 30 | return avg 31 | 32 | class EncodingDeprecationWarning(DeprecationWarning): 33 | pass 34 | 35 | warnings.simplefilter('once', EncodingDeprecationWarning) 36 | -------------------------------------------------------------------------------- /encoding/utils/precise_bn.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/precise_bn.py 2 | import itertools 3 | from typing import Any, Iterable, List, Tuple, Type 4 | 5 | import torch 6 | from torch import nn 7 | from ..nn import DistSyncBatchNorm, SyncBatchNorm 8 | 9 | BN_MODULE_TYPES: Tuple[Type[nn.Module]] = ( 10 | torch.nn.BatchNorm1d, 11 | torch.nn.BatchNorm2d, 12 | torch.nn.BatchNorm3d, 13 | torch.nn.SyncBatchNorm, 14 | DistSyncBatchNorm, 15 | SyncBatchNorm, 16 | ) 17 | 18 | 19 | @torch.no_grad() 20 | def update_bn_stats( 21 | model: nn.Module, data_loader: Iterable[Any], num_iters: int = 200 # pyre-ignore 22 | ) -> None: 23 | """ 24 | Recompute and update the batch norm stats to make them more precise. During 25 | training both BN stats and the weight are changing after every iteration, so 26 | the running average can not precisely reflect the actual stats of the 27 | current model. 28 | In this function, the BN stats are recomputed with fixed weights, to make 29 | the running average more precise. Specifically, it computes the true average 30 | of per-batch mean/variance instead of the running average. 31 | Args: 32 | model (nn.Module): the model whose bn stats will be recomputed. 33 | Note that: 34 | 1. This function will not alter the training mode of the given model. 35 | Users are responsible for setting the layers that needs 36 | precise-BN to training mode, prior to calling this function. 37 | 2. Be careful if your models contain other stateful layers in 38 | addition to BN, i.e. layers whose state can change in forward 39 | iterations. This function will alter their state. If you wish 40 | them unchanged, you need to either pass in a submodule without 41 | those layers, or backup the states. 42 | data_loader (iterator): an iterator. Produce data as inputs to the model. 43 | num_iters (int): number of iterations to compute the stats. 44 | """ 45 | bn_layers = get_bn_modules(model) 46 | 47 | if len(bn_layers) == 0: 48 | return 49 | 50 | # In order to make the running stats only reflect the current batch, the 51 | # momentum is disabled. 52 | # bn.running_mean = (1 - momentum) * bn.running_mean + momentum * batch_mean 53 | # Setting the momentum to 1.0 to compute the stats without momentum. 54 | momentum_actual = [bn.momentum for bn in bn_layers] # pyre-ignore 55 | for bn in bn_layers: 56 | bn.momentum = 1.0 57 | 58 | # Note that running_var actually means "running average of variance" 59 | running_mean = [ 60 | torch.zeros_like(bn.running_mean) for bn in bn_layers # pyre-ignore 61 | ] 62 | running_var = [torch.zeros_like(bn.running_var) for bn in bn_layers] # pyre-ignore 63 | 64 | ind = -1 65 | for ind, inputs in enumerate(itertools.islice(data_loader, num_iters)): 66 | inputs=inputs.cuda() 67 | with torch.no_grad(): # No need to backward 68 | model(inputs) 69 | 70 | for i, bn in enumerate(bn_layers): 71 | # Accumulates the bn stats. 72 | running_mean[i] += (bn.running_mean - running_mean[i]) / (ind + 1) 73 | running_var[i] += (bn.running_var - running_var[i]) / (ind + 1) 74 | # We compute the "average of variance" across iterations. 75 | assert ind == num_iters - 1, ( 76 | "update_bn_stats is meant to run for {} iterations, " 77 | "but the dataloader stops at {} iterations.".format(num_iters, ind) 78 | ) 79 | 80 | for i, bn in enumerate(bn_layers): 81 | # Sets the precise bn stats. 82 | bn.running_mean = running_mean[i] 83 | bn.running_var = running_var[i] 84 | bn.momentum = momentum_actual[i] 85 | 86 | 87 | def get_bn_modules(model: nn.Module) -> List[nn.Module]: 88 | """ 89 | Find all BatchNorm (BN) modules that are in training mode. See 90 | fvcore.precise_bn.BN_MODULE_TYPES for a list of all modules that are 91 | included in this search. 92 | Args: 93 | model (nn.Module): a model possibly containing BN modules. 94 | Returns: 95 | list[nn.Module]: all BN modules in the model. 96 | """ 97 | # Finds all the bn layers. 98 | bn_layers = [ 99 | m for m in model.modules() if m.training and isinstance(m, BN_MODULE_TYPES) 100 | ] 101 | return bn_layers 102 | -------------------------------------------------------------------------------- /encoding/utils/presets.py: -------------------------------------------------------------------------------- 1 | """Preset Transforms for Demos""" 2 | from PIL import Image 3 | import numpy as np 4 | import torch 5 | import torchvision.transforms as transform 6 | 7 | __all__ = ['load_image'] 8 | 9 | input_transform = transform.Compose([ 10 | transform.ToTensor(), 11 | transform.Normalize([.485, .456, .406], [.229, .224, .225])]) 12 | 13 | def load_image(filename, size=None, scale=None, keep_asp=True, transform=input_transform): 14 | """Load the image for demos""" 15 | img = Image.open(filename).convert('RGB') 16 | if size is not None: 17 | if keep_asp: 18 | size2 = int(size * 1.0 / img.size[0] * img.size[1]) 19 | img = img.resize((size, size2), Image.ANTIALIAS) 20 | else: 21 | img = img.resize((size, size), Image.ANTIALIAS) 22 | elif scale is not None: 23 | img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS) 24 | 25 | if transform: 26 | img = transform(img) 27 | return img 28 | -------------------------------------------------------------------------------- /encoding/utils/train_helper.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import numpy as np 12 | import torch 13 | import torch.nn as nn 14 | 15 | #from ..nn import SyncBatchNorm 16 | from torch.nn.modules.batchnorm import _BatchNorm 17 | 18 | __all__ = ['MixUpWrapper', 'get_selabel_vector'] 19 | 20 | class MixUpWrapper(object): 21 | def __init__(self, alpha, num_classes, dataloader, device): 22 | self.alpha = alpha 23 | self.dataloader = dataloader 24 | self.num_classes = num_classes 25 | self.device = device 26 | 27 | def mixup_loader(self, loader): 28 | def mixup(alpha, num_classes, data, target): 29 | with torch.no_grad(): 30 | bs = data.size(0) 31 | c = np.random.beta(alpha, alpha) 32 | perm = torch.randperm(bs).cuda() 33 | 34 | md = c * data + (1-c) * data[perm, :] 35 | mt = c * target + (1-c) * target[perm, :] 36 | return md, mt 37 | 38 | for input, target in loader: 39 | input, target = input.cuda(self.device), target.cuda(self.device) 40 | target = torch.nn.functional.one_hot(target, self.num_classes) 41 | i, t = mixup(self.alpha, self.num_classes, input, target) 42 | yield i, t 43 | 44 | def __len__(self): 45 | return len(self.dataloader) 46 | 47 | def __iter__(self): 48 | return self.mixup_loader(self.dataloader) 49 | 50 | 51 | def get_selabel_vector(target, nclass): 52 | r"""Get SE-Loss Label in a batch 53 | Args: 54 | predict: input 4D tensor 55 | target: label 3D tensor (BxHxW) 56 | nclass: number of categories (int) 57 | Output: 58 | 2D tensor (BxnClass) 59 | """ 60 | batch = target.size(0) 61 | tvect = torch.zeros(batch, nclass) 62 | for i in range(batch): 63 | hist = torch.histc(target[i].data.float(), 64 | bins=nclass, min=0, 65 | max=nclass-1) 66 | vect = hist>0 67 | tvect[i] = vect 68 | return tvect 69 | -------------------------------------------------------------------------------- /experiments/recognition/README.md: -------------------------------------------------------------------------------- 1 | - [Link to the EncNet CIFAR experiments and pre-trained models](http://hangzh.com/PyTorch-Encoding/experiments/cifar.html) 2 | 3 | - [Link to the Deep TEN experiments and pre-trained models](http://hangzh.com/PyTorch-Encoding/experiments/texture.html) 4 | -------------------------------------------------------------------------------- /experiments/recognition/resnet50_baseline.sh: -------------------------------------------------------------------------------- 1 | # baseline 2 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_check --lr 0.025 --batch-size 64 3 | 4 | # rectify 5 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt --lr 0.1 --batch-size 256 --rectify 6 | 7 | # warmup 8 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_warm --lr 0.1 --batch-size 256 --warmup-epochs 5 --rectify 9 | 10 | # no-bn-wd 11 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_nobnwd_warm --lr 0.1 --batch-size 256 --no-bn-wd --warmup-epochs 5 --rectify 12 | 13 | # LS 14 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_ls --lr 0.1 --batch-size 256 --label-smoothing 0.1 --rectify 15 | 16 | # Mixup + LS 17 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 200 --checkname resnet50_rt_ls_mixup --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --rectify 18 | 19 | # last-gamma 20 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_gamma --lr 0.1 --batch-size 256 --last-gamma --rectify 21 | 22 | # BoTs 23 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 200 --checkname resnet50_rt_bots --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --last-gamma --no-bn-wd --warmup-epochs 5 --rectify 24 | 25 | # resnet50d 26 | python train_dist.py --dataset imagenet --model resnet50d --lr-scheduler cos --epochs 200 --checkname resnet50d_rt_bots --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --last-gamma --no-bn-wd --warmup-epochs 5 --rectify 27 | 28 | # dropblock 29 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 200 --checkname --label-smoothing 0.1 --mixup 0.2 --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --dropblock-prob 0.1 --rectify 30 | 31 | # resnest50 32 | python train_dist.py --dataset imagenet --model resnest50 --lr-scheduler cos --epochs 270 --checkname resnest50_rt_bots --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --last-gamma --no-bn-wd --warmup-epochs 5 --dropblock-prob 0.1 --rectify 33 | -------------------------------------------------------------------------------- /experiments/recognition/test_flops.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | import argparse 11 | import torch 12 | 13 | from thop import profile, clever_format 14 | 15 | import encoding 16 | 17 | def get_args(): 18 | # data settings 19 | parser = argparse.ArgumentParser(description='Deep Encoding') 20 | parser.add_argument('--crop-size', type=int, default=224, 21 | help='crop image size') 22 | # model params 23 | parser.add_argument('--model', type=str, default='densenet', 24 | help='network model type (default: densenet)') 25 | parser.add_argument('--rectify', action='store_true', 26 | default=False, help='rectify convolution') 27 | parser.add_argument('--rectify-avg', action='store_true', 28 | default=False, help='rectify convolution') 29 | # checking point 30 | parser = parser 31 | 32 | args = parser.parse_args() 33 | return args 34 | 35 | def main(): 36 | args = get_args() 37 | 38 | model_kwargs = {} 39 | if args.rectify: 40 | model_kwargs['rectified_conv'] = True 41 | model_kwargs['rectify_avg'] = args.rectify_avg 42 | 43 | model = encoding.models.get_model(args.model, **model_kwargs) 44 | print(model) 45 | 46 | dummy_images = torch.rand(1, 3, args.crop_size, args.crop_size) 47 | 48 | #count_ops(model, dummy_images, verbose=False) 49 | macs, params = profile(model, inputs=(dummy_images, )) 50 | macs, params = clever_format([macs, params], "%.3f") 51 | 52 | print(f"macs: {macs}, params: {params}") 53 | 54 | if __name__ == '__main__': 55 | main() 56 | -------------------------------------------------------------------------------- /experiments/segmentation/demo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import encoding 3 | 4 | # Get the model 5 | model = encoding.models.get_model('fcn_resnet50s_ade', pretrained=True).cuda() 6 | model.eval() 7 | 8 | # Prepare the image 9 | url = 'https://github.com/zhanghang1989/image-data/blob/master/' + \ 10 | 'encoding/segmentation/ade20k/ADE_val_00001142.jpg?raw=true' 11 | filename = 'example.jpg' 12 | img = encoding.utils.load_image( 13 | encoding.utils.download(url, filename)).cuda().unsqueeze(0) 14 | 15 | # Make prediction 16 | output = model.evaluate(img) 17 | predict = torch.max(output, 1)[1].cpu().numpy() + 1 18 | 19 | # Get color pallete for visualization 20 | mask = encoding.utils.get_mask_pallete(predict, 'ade20k') 21 | mask.save('output.png') 22 | -------------------------------------------------------------------------------- /experiments/segmentation/model_mapping.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | 5 | 6 | # CLGD_key_map = {'head.gamma': 'head.clgd.gamma', 'head.skipconv.0.weight': 'head.clgd.skipconv.0.weight', 7 | # 'head.skipconv.1.weight': 'head.clgd.skipconv.1.weight', 'head.skipconv.1.bias': 'head.clgd.skipconv.1.bias', 8 | # 'head.skipconv.1.running_mean': 'head.clgd.skipconv.1.running_mean', 'head.skipconv.1.running_var': 'head.clgd.skipconv.1.running_var', 9 | # 'head.skipconv.1.num_batches_tracked': 'head.clgd.skipconv.1.num_batches_tracked', 'head.fusion.0.weight': 'head.clgd.fusion.0.weight', 10 | # 'head.fusion.1.weight': 'head.clgd.fusion.1.weight', 'head.fusion.1.bias': 'head.clgd.fusion.1.bias', 11 | # 'head.fusion.1.running_mean': 'head.clgd.fusion.1.running_mean', 'head.fusion.1.running_var': 'head.clgd.fusion.1.running_var', 12 | # 'head.fusion.1.num_batches_tracked': 'head.clgd.fusion.1.num_batches_tracked', 'head.fusion2.0.weight': 'head.clgd.fusion2.0.weight', 13 | # 'head.fusion2.1.weight': 'head.clgd.fusion2.1.weight', 'head.fusion2.1.bias': 'head.clgd.fusion2.1.bias', 14 | # 'head.fusion2.1.running_mean': 'head.clgd.fusion2.1.running_mean', 'head.fusion2.1.running_var': 'head.clgd.fusion2.1.running_var', 15 | # 'head.fusion2.1.num_batches_tracked': 'head.clgd.fusion2.1.num_batches_tracked', 'head.att.0.weight': 'head.clgd.att.0.weight', 16 | # 'head.att.0.bias': 'head.clgd.att.0.bias'} 17 | 18 | del_keys = ["auxlayer.conv5.0.weight", "auxlayer.conv5.1.bias", "auxlayer.conv5.1.num_batches_tracked", \ 19 | "auxlayer.conv5.1.running_mean", "auxlayer.conv5.1.running_var", "auxlayer.conv5.1.weight", \ 20 | "auxlayer.conv5.4.bias", "auxlayer.conv5.4.weight"] 21 | 22 | def _rename_glgd_weights(layer_keys): 23 | 24 | layer_keys = [k.replace("head.skipconv", "head.clgd.conv_low") for k in layer_keys] 25 | layer_keys = [k.replace("head.fusion2", "head.clgd.conv_out") for k in layer_keys] 26 | layer_keys = [k.replace("head.fusion", "head.clgd.conv_cat") for k in layer_keys] 27 | layer_keys = [k.replace("head.att", "head.clgd.conv_att") for k in layer_keys] 28 | layer_keys = [k.replace("head.gamma", "head.clgd.gamma") for k in layer_keys] 29 | 30 | return layer_keys 31 | 32 | def _rename_dran_weights(layer_keys): 33 | 34 | layer_keys = [k.replace("head.conv5_s", "head.conv_cpam_b") for k in layer_keys] 35 | layer_keys = [k.replace("head.conv5_c", "head.conv_ccam_b") for k in layer_keys] 36 | layer_keys = [k.replace("head.conv51_c", "head.ccam_enc") for k in layer_keys] 37 | layer_keys = [k.replace("head.conv52", "head.conv_cpam_e") for k in layer_keys] 38 | layer_keys = [k.replace("head.conv51", "head.conv_ccam_e") for k in layer_keys] 39 | layer_keys = [k.replace("head.conv_f", "head.conv_cat") for k in layer_keys] 40 | layer_keys = [k.replace("head.conv6", "cls_seg") for k in layer_keys] 41 | layer_keys = [k.replace("head.conv7", "cls_aux") for k in layer_keys] 42 | 43 | layer_keys = [k.replace("head.en_s", "head.cpam_enc") for k in layer_keys] 44 | layer_keys = [k.replace("head.de_s", "head.cpam_dec") for k in layer_keys] 45 | layer_keys = [k.replace("head.de_c", "head.ccam_dec") for k in layer_keys] 46 | 47 | return layer_keys 48 | 49 | def _rename_cpam_weights(layer_keys): 50 | 51 | layer_keys = [k.replace("head.cpam_dec.query_conv2", "head.cpam_dec.conv_query") for k in layer_keys] 52 | layer_keys = [k.replace("head.cpam_dec.key_conv2", "head.cpam_dec.conv_key") for k in layer_keys] 53 | layer_keys = [k.replace("head.cpam_dec.value2", "head.cpam_dec.conv_value") for k in layer_keys] 54 | 55 | return layer_keys 56 | 57 | def rename_weight_for_head(weights): 58 | 59 | original_keys = sorted(weights.keys()) 60 | layer_keys = sorted(weights.keys()) 61 | 62 | new_weights = OrderedDict() 63 | for k in original_keys: 64 | v = weights[k] 65 | w=v 66 | 67 | layer_keys = _rename_glgd_weights(layer_keys) 68 | layer_keys = _rename_dran_weights(layer_keys) 69 | layer_keys = _rename_cpam_weights(layer_keys) 70 | key_map = {k: v for k, v in zip(original_keys, layer_keys)} 71 | new_weights[key_map[k] if key_map.get(k) else k] = w 72 | 73 | for keys in del_keys: 74 | del new_weights[keys] 75 | return new_weights -------------------------------------------------------------------------------- /experiments/segmentation/test.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=4,5,6,7 python test.py --dataset citys --model dran --backbone resnet101 --resume models/dran101.pth.tar --eval --base-size 2048 --crop-size 768 --workers 1 --multi-grid --multi-dilation 4 8 16 --os 8 --aux 2 | -------------------------------------------------------------------------------- /experiments/segmentation/test_danet.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=4,5,6,7 python test.py --dataset citys --model danet --backbone resnet101 --resume models/DANet101.pth.tar --eval --base-size 2048 --crop-size 768 --workers 1 --multi-grid --multi-dilation 4 8 16 --os 8 --aux --no-deepstem 2 | -------------------------------------------------------------------------------- /experiments/segmentation/test_models.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import torch 3 | import encoding 4 | from option import Options 5 | from torch.autograd import Variable 6 | 7 | if __name__ == "__main__": 8 | args = Options().parse() 9 | model = encoding.models.get_segmentation_model(args.model, dataset=args.dataset, aux=args.aux, 10 | backbone=args.backbone, 11 | se_loss=args.se_loss, norm_layer=torch.nn.BatchNorm2d) 12 | print('Creating the model:') 13 | 14 | print(model) 15 | model.cuda() 16 | model.eval() 17 | x = Variable(torch.Tensor(4, 3, 480, 480)).cuda() 18 | with torch.no_grad(): 19 | out = model(x) 20 | for y in out: 21 | print(y.size()) 22 | -------------------------------------------------------------------------------- /img/overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/img/overview.jpg -------------------------------------------------------------------------------- /img/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/img/overview.png -------------------------------------------------------------------------------- /img/tab3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/img/tab3.jpg -------------------------------------------------------------------------------- /img/tab3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junfu1115/DANet/b0a0fa008148f102321cdaa7ca38fb0ddd0239d1/img/tab3.png -------------------------------------------------------------------------------- /scripts/prepare_ade20k.py: -------------------------------------------------------------------------------- 1 | """Prepare ADE20K dataset""" 2 | import os 3 | import shutil 4 | import argparse 5 | import zipfile 6 | from encoding.utils import download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Initialize ADE20K dataset.', 13 | epilog='Example: python prepare_ade20k.py', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk') 16 | args = parser.parse_args() 17 | return args 18 | 19 | def download_ade(path, overwrite=False): 20 | _AUG_DOWNLOAD_URLS = [ 21 | ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip', '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'), 22 | ('http://data.csail.mit.edu/places/ADEchallenge/release_test.zip', 'e05747892219d10e9243933371a497e905a4860c'),] 23 | download_dir = os.path.join(path, 'downloads') 24 | mkdir(download_dir) 25 | for url, checksum in _AUG_DOWNLOAD_URLS: 26 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 27 | # extract 28 | with zipfile.ZipFile(filename,"r") as zip_ref: 29 | zip_ref.extractall(path=path) 30 | 31 | 32 | if __name__ == '__main__': 33 | args = parse_args() 34 | mkdir(os.path.expanduser('~/.encoding/data')) 35 | if args.download_dir is not None: 36 | if os.path.isdir(_TARGET_DIR): 37 | os.remove(_TARGET_DIR) 38 | # make symlink 39 | os.symlink(args.download_dir, _TARGET_DIR) 40 | else: 41 | download_ade(_TARGET_DIR, overwrite=False) 42 | -------------------------------------------------------------------------------- /scripts/prepare_citys.py: -------------------------------------------------------------------------------- 1 | """Prepare Cityscapes dataset""" 2 | import os 3 | import shutil 4 | import argparse 5 | import zipfile 6 | from encoding.utils import check_sha1, download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Initialize ADE20K dataset.', 13 | epilog='Example: python prepare_cityscapes.py', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk') 16 | args = parser.parse_args() 17 | return args 18 | 19 | def download_city(path, overwrite=False): 20 | _CITY_DOWNLOAD_URLS = [ 21 | ('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'), 22 | ('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')] 23 | download_dir = os.path.join(path, 'downloads') 24 | mkdir(download_dir) 25 | for filename, checksum in _CITY_DOWNLOAD_URLS: 26 | if not check_sha1(filename, checksum): 27 | raise UserWarning('File {} is downloaded but the content hash does not match. ' \ 28 | 'The repo may be outdated or download may be incomplete. ' \ 29 | 'If the "repo_url" is overridden, consider switching to ' \ 30 | 'the default repo.'.format(filename)) 31 | # extract 32 | with zipfile.ZipFile(filename,"r") as zip_ref: 33 | zip_ref.extractall(path=path) 34 | print("Extracted", filename) 35 | 36 | if __name__ == '__main__': 37 | args = parse_args() 38 | mkdir(os.path.expanduser('~/.encoding/data')) 39 | if args.download_dir is not None: 40 | if os.path.isdir(_TARGET_DIR): 41 | os.remove(_TARGET_DIR) 42 | # make symlink 43 | os.symlink(args.download_dir, _TARGET_DIR) 44 | else: 45 | download_city(_TARGET_DIR, overwrite=False) 46 | -------------------------------------------------------------------------------- /scripts/prepare_coco.py: -------------------------------------------------------------------------------- 1 | """Prepare MS COCO datasets""" 2 | import os 3 | import shutil 4 | import argparse 5 | import zipfile 6 | from encoding.utils import download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Initialize MS COCO dataset.', 13 | epilog='Example: python mscoco.py --download-dir ~/mscoco', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') 16 | args = parser.parse_args() 17 | return args 18 | 19 | def download_coco(path, overwrite=False): 20 | _DOWNLOAD_URLS = [ 21 | ('http://images.cocodataset.org/zips/train2017.zip', 22 | '10ad623668ab00c62c096f0ed636d6aff41faca5'), 23 | ('http://images.cocodataset.org/zips/val2017.zip', 24 | '4950dc9d00dbe1c933ee0170f5797584351d2a41'), 25 | ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', 26 | '8551ee4bb5860311e79dace7e79cb91e432e78b3'), 27 | #('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip', 28 | # '46cdcf715b6b4f67e980b529534e79c2edffe084'), 29 | #('http://images.cocodataset.org/zips/test2017.zip', 30 | # '99813c02442f3c112d491ea6f30cecf421d0e6b3'), 31 | ('https://hangzh.s3.amazonaws.com/encoding/data/coco/train_ids.pth', 32 | '12cd266f97c8d9ea86e15a11f11bcb5faba700b6'), 33 | ('https://hangzh.s3.amazonaws.com/encoding/data/coco/val_ids.pth', 34 | '4ce037ac33cbf3712fd93280a1c5e92dae3136bb'), 35 | ] 36 | mkdir(path) 37 | for url, checksum in _DOWNLOAD_URLS: 38 | filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) 39 | # extract 40 | if os.path.splitext(filename)[1] == '.zip': 41 | with zipfile.ZipFile(filename) as zf: 42 | zf.extractall(path=path) 43 | else: 44 | shutil.move(filename, os.path.join(path, 'annotations/'+os.path.basename(filename))) 45 | 46 | 47 | def install_coco_api(): 48 | repo_url = "https://github.com/cocodataset/cocoapi" 49 | os.system("git clone " + repo_url) 50 | os.system("cd cocoapi/PythonAPI/ && python setup.py install") 51 | shutil.rmtree('cocoapi') 52 | try: 53 | import pycocotools 54 | except Exception: 55 | print("Installing COCO API failed, please install it manually %s"%(repo_url)) 56 | 57 | 58 | if __name__ == '__main__': 59 | args = parse_args() 60 | mkdir(os.path.expanduser('~/.encoding/data')) 61 | if args.download_dir is not None: 62 | if os.path.isdir(_TARGET_DIR): 63 | os.remove(_TARGET_DIR) 64 | # make symlink 65 | os.symlink(args.download_dir, _TARGET_DIR) 66 | else: 67 | download_coco(_TARGET_DIR, overwrite=False) 68 | install_coco_api() 69 | -------------------------------------------------------------------------------- /scripts/prepare_imagenet.py: -------------------------------------------------------------------------------- 1 | """Prepare the ImageNet dataset""" 2 | import os 3 | import argparse 4 | import tarfile 5 | import pickle 6 | import gzip 7 | import subprocess 8 | from tqdm import tqdm 9 | import subprocess 10 | from encoding.utils import check_sha1, download, mkdir 11 | 12 | _TARGET_DIR = os.path.expanduser('~/.encoding/data/ILSVRC2012') 13 | _TRAIN_TAR = 'ILSVRC2012_img_train.tar' 14 | _TRAIN_TAR_SHA1 = '43eda4fe35c1705d6606a6a7a633bc965d194284' 15 | _VAL_TAR = 'ILSVRC2012_img_val.tar' 16 | _VAL_TAR_SHA1 = '5f3f73da3395154b60528b2b2a2caf2374f5f178' 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser( 20 | description='Setup the ImageNet dataset.', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | parser.add_argument('--download-dir', required=True, 23 | help="The directory that contains downloaded tar files") 24 | parser.add_argument('--target-dir', default=_TARGET_DIR, 25 | help="The directory to store extracted images") 26 | parser.add_argument('--checksum', action='store_true', 27 | help="If check integrity before extracting.") 28 | parser.add_argument('--with-rec', action='store_true', 29 | help="If build image record files.") 30 | parser.add_argument('--num-thread', type=int, default=1, 31 | help="Number of threads to use when building image record file.") 32 | args = parser.parse_args() 33 | return args 34 | 35 | def check_file(filename, checksum, sha1): 36 | if not os.path.exists(filename): 37 | raise ValueError('File not found: '+filename) 38 | if checksum and not check_sha1(filename, sha1): 39 | raise ValueError('Corrupted file: '+filename) 40 | 41 | def extract_train(tar_fname, target_dir, with_rec=False, num_thread=1): 42 | mkdir(target_dir) 43 | with tarfile.open(tar_fname) as tar: 44 | print("Extracting "+tar_fname+"...") 45 | # extract each class one-by-one 46 | pbar = tqdm(total=len(tar.getnames())) 47 | for class_tar in tar: 48 | pbar.set_description('Extract '+class_tar.name) 49 | tar.extract(class_tar, target_dir) 50 | class_fname = os.path.join(target_dir, class_tar.name) 51 | class_dir = os.path.splitext(class_fname)[0] 52 | os.mkdir(class_dir) 53 | with tarfile.open(class_fname) as f: 54 | f.extractall(class_dir) 55 | os.remove(class_fname) 56 | pbar.update(1) 57 | pbar.close() 58 | 59 | def extract_val(tar_fname, target_dir, with_rec=False, num_thread=1): 60 | mkdir(target_dir) 61 | print('Extracting ' + tar_fname) 62 | with tarfile.open(tar_fname) as tar: 63 | tar.extractall(target_dir) 64 | # build rec file before images are moved into subfolders 65 | # move images to proper subfolders 66 | subprocess.call(["wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash"], 67 | cwd=target_dir, shell=True) 68 | 69 | 70 | def main(): 71 | args = parse_args() 72 | 73 | target_dir = os.path.expanduser(args.target_dir) 74 | #if os.path.exists(target_dir): 75 | # raise ValueError('Target dir ['+target_dir+'] exists. Remove it first') 76 | 77 | download_dir = os.path.expanduser(args.download_dir) 78 | train_tar_fname = os.path.join(download_dir, _TRAIN_TAR) 79 | check_file(train_tar_fname, args.checksum, _TRAIN_TAR_SHA1) 80 | val_tar_fname = os.path.join(download_dir, _VAL_TAR) 81 | check_file(val_tar_fname, args.checksum, _VAL_TAR_SHA1) 82 | 83 | build_rec = args.with_rec 84 | if build_rec: 85 | os.makedirs(os.path.join(target_dir, 'rec')) 86 | extract_train(train_tar_fname, os.path.join(target_dir, 'train'), build_rec, args.num_thread) 87 | extract_val(val_tar_fname, os.path.join(target_dir, 'val'), build_rec, args.num_thread) 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /scripts/prepare_minc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import argparse 4 | import tarfile 5 | from encoding.utils import download, mkdir 6 | 7 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='Initialize MINC dataset.', 12 | epilog='Example: python prepare_minc.py', 13 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 14 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') 15 | parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') 16 | parser.add_argument('--overwrite', action='store_true', 17 | help='overwrite downloaded files if set, in case they are corrputed') 18 | args = parser.parse_args() 19 | return args 20 | 21 | def download_minc(path, overwrite=False): 22 | _AUG_DOWNLOAD_URLS = [ 23 | ('http://opensurfaces.cs.cornell.edu/static/minc/minc-2500.tar.gz', 'bcccbb3b1ab396ef540f024a5ba23eff54f7fe31')] 24 | download_dir = os.path.join(path, 'downloads') 25 | mkdir(download_dir) 26 | for url, checksum in _AUG_DOWNLOAD_URLS: 27 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 28 | # extract 29 | with tarfile.open(filename) as tar: 30 | tar.extractall(path=path) 31 | 32 | if __name__ == '__main__': 33 | args = parse_args() 34 | mkdir(os.path.expanduser('~/.encoding/datasets')) 35 | if args.download_dir is not None: 36 | if os.path.isdir(_TARGET_DIR): 37 | os.remove(_TARGET_DIR) 38 | os.symlink(args.download_dir, _TARGET_DIR) 39 | else: 40 | download_minc(_TARGET_DIR, overwrite=False) 41 | -------------------------------------------------------------------------------- /scripts/prepare_pascal.py: -------------------------------------------------------------------------------- 1 | """Prepare PASCAL VOC datasets""" 2 | import os 3 | import shutil 4 | import argparse 5 | import tarfile 6 | from encoding.utils import download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Initialize PASCAL VOC dataset.', 14 | epilog='Example: python prepare_pascal.py', 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') 17 | parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') 18 | parser.add_argument('--overwrite', action='store_true', help='overwrite downloaded files if set, in case they are corrputed') 19 | args = parser.parse_args() 20 | return args 21 | 22 | 23 | def download_voc(path, overwrite=False): 24 | _DOWNLOAD_URLS = [ 25 | ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', 26 | '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')] 27 | download_dir = os.path.join(path, 'downloads') 28 | mkdir(download_dir) 29 | for url, checksum in _DOWNLOAD_URLS: 30 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 31 | # extract 32 | with tarfile.open(filename) as tar: 33 | tar.extractall(path=path) 34 | 35 | 36 | def download_aug(path, overwrite=False): 37 | _AUG_DOWNLOAD_URLS = [ 38 | ('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz', '7129e0a480c2d6afb02b517bb18ac54283bfaa35')] 39 | download_dir = os.path.join(path, 'downloads') 40 | mkdir(download_dir) 41 | for url, checksum in _AUG_DOWNLOAD_URLS: 42 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 43 | # extract 44 | with tarfile.open(filename) as tar: 45 | tar.extractall(path=path) 46 | shutil.move(os.path.join(path, 'benchmark_RELEASE'), 47 | os.path.join(path, 'VOCaug')) 48 | filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt'] 49 | # generate trainval.txt 50 | with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile: 51 | for fname in filenames: 52 | fname = os.path.join(path, fname) 53 | with open(fname) as infile: 54 | for line in infile: 55 | outfile.write(line) 56 | 57 | 58 | if __name__ == '__main__': 59 | args = parse_args() 60 | mkdir(os.path.expanduser('~/.encoding/datasets')) 61 | if args.download_dir is not None: 62 | if os.path.isdir(_TARGET_DIR): 63 | os.remove(_TARGET_DIR) 64 | os.symlink(args.download_dir, _TARGET_DIR) 65 | else: 66 | download_voc(_TARGET_DIR, overwrite=False) 67 | download_aug(_TARGET_DIR, overwrite=False) 68 | -------------------------------------------------------------------------------- /scripts/prepare_pcontext.py: -------------------------------------------------------------------------------- 1 | """Prepare PASCAL Context dataset""" 2 | import os 3 | import shutil 4 | import argparse 5 | import tarfile 6 | from encoding.utils import download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | PASD_URL="https://codalabuser.blob.core.windows.net/public/%s" 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Initialize PASCAL Context dataset.', 14 | epilog='Example: python prepare_pcontext.py', 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk') 17 | args = parser.parse_args() 18 | return args 19 | 20 | def download_ade(path, overwrite=False): 21 | _AUG_DOWNLOAD_URLS = [ 22 | ('http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar', 23 | 'bf9985e9f2b064752bf6bd654d89f017c76c395a'), 24 | ('https://codalabuser.blob.core.windows.net/public/trainval_merged.json', 25 | '169325d9f7e9047537fedca7b04de4dddf10b881'), 26 | ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/train.pth', 27 | '4bfb49e8c1cefe352df876c9b5434e655c9c1d07'), 28 | ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/val.pth', 29 | 'ebedc94247ec616c57b9a2df15091784826a7b0c'), 30 | ] 31 | download_dir = os.path.join(path, 'downloads') 32 | mkdir(download_dir) 33 | for url, checksum in _AUG_DOWNLOAD_URLS: 34 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 35 | # extract 36 | if os.path.splitext(filename)[1] == '.tar': 37 | with tarfile.open(filename) as tar: 38 | tar.extractall(path=path) 39 | else: 40 | shutil.move(filename, os.path.join(path, 'VOCdevkit/VOC2010/'+os.path.basename(filename))) 41 | 42 | def install_pcontext_api(): 43 | repo_url = "https://github.com/zhanghang1989/detail-api" 44 | os.system("git clone " + repo_url) 45 | os.system("cd detail-api/PythonAPI/ && python setup.py install") 46 | shutil.rmtree('detail-api') 47 | try: 48 | import detail 49 | except Exception: 50 | print("Installing PASCAL Context API failed, please install it manually %s"%(repo_url)) 51 | 52 | 53 | if __name__ == '__main__': 54 | args = parse_args() 55 | mkdir(os.path.expanduser('~/.encoding/data')) 56 | if args.download_dir is not None: 57 | if os.path.isdir(_TARGET_DIR): 58 | os.remove(_TARGET_DIR) 59 | # make symlink 60 | os.symlink(args.download_dir, _TARGET_DIR) 61 | else: 62 | download_ade(_TARGET_DIR, overwrite=False) 63 | install_pcontext_api() 64 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import io 12 | import os 13 | import subprocess 14 | 15 | from setuptools import setup, find_packages 16 | 17 | cwd = os.path.dirname(os.path.abspath(__file__)) 18 | 19 | version = '1.2.2' 20 | try: 21 | if not os.getenv('RELEASE'): 22 | from datetime import date 23 | today = date.today() 24 | day = today.strftime("b%Y%m%d") 25 | version += day 26 | except Exception: 27 | pass 28 | 29 | def create_version_file(): 30 | global version, cwd 31 | print('-- Building version ' + version) 32 | version_path = os.path.join(cwd, 'encoding', 'version.py') 33 | with open(version_path, 'w') as f: 34 | f.write('"""This is encoding version file."""\n') 35 | f.write("__version__ = '{}'\n".format(version)) 36 | 37 | requirements = [ 38 | 'numpy', 39 | 'tqdm', 40 | 'nose', 41 | 'portalocker', 42 | 'torch>=1.4.0', 43 | 'torchvision>=0.5.0', 44 | 'Pillow', 45 | 'scipy', 46 | 'requests', 47 | ] 48 | 49 | if __name__ == '__main__': 50 | create_version_file() 51 | setup( 52 | name="torch-encoding", 53 | version=version, 54 | author="Hang Zhang", 55 | author_email="zhanghang0704@gmail.com", 56 | url="https://github.com/zhanghang1989/PyTorch-Encoding", 57 | description="PyTorch Encoding Package", 58 | long_description=open('README.md').read(), 59 | long_description_content_type='text/markdown', 60 | license='MIT', 61 | install_requires=requirements, 62 | packages=find_packages(exclude=["tests", "experiments"]), 63 | package_data={ 'encoding': [ 64 | 'LICENSE', 65 | 'lib/cpu/*.h', 66 | 'lib/cpu/*.cpp', 67 | 'lib/gpu/*.h', 68 | 'lib/gpu/*.cpp', 69 | 'lib/gpu/*.cu', 70 | ]}, 71 | ) 72 | -------------------------------------------------------------------------------- /tests/unit_test/test_dataset.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | from tqdm import tqdm 12 | from torch.utils import data 13 | import torchvision.transforms as transform 14 | from encoding.datasets import get_segmentation_dataset 15 | 16 | def test_ade_dataset(): 17 | 18 | def test_dataset(dataset_name): 19 | input_transform = transform.Compose([ 20 | transform.ToTensor(), 21 | transform.Normalize([.485, .456, .406], [.229, .224, .225])]) 22 | trainset = get_segmentation_dataset(dataset_name, split='val', mode='train', 23 | transform=input_transform) 24 | trainloader = data.DataLoader(trainset, batch_size=16, 25 | drop_last=True, shuffle=True) 26 | tbar = tqdm(trainloader) 27 | max_label = -10 28 | for i, (image, target) in enumerate(tbar): 29 | tmax = target.max().item() 30 | tmin = target.min().item() 31 | assert(tmin >= -1) 32 | if tmax > max_label: 33 | max_label = tmax 34 | assert(max_label < trainset.NUM_CLASS) 35 | tbar.set_description("Batch %d, max label %d"%(i, max_label)) 36 | test_dataset('ade20k') 37 | 38 | if __name__ == "__main__": 39 | import nose 40 | nose.runmodule() 41 | -------------------------------------------------------------------------------- /tests/unit_test/test_function.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import numpy as np 12 | import torch 13 | from torch.autograd import Variable, gradcheck 14 | import encoding 15 | 16 | EPS = 1e-3 17 | ATOL = 1e-3 18 | 19 | def _assert_tensor_close(a, b, atol=ATOL, rtol=EPS): 20 | npa, npb = a.cpu().numpy(), b.cpu().numpy() 21 | assert np.allclose(npa, npb, rtol=rtol, atol=atol), \ 22 | 'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format( 23 | a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max()) 24 | 25 | def test_aggregate(): 26 | B,N,K,D = 2,3,4,5 27 | A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), 28 | requires_grad=True) 29 | X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), 30 | requires_grad=True) 31 | C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), 32 | requires_grad=True) 33 | input = (A, X, C) 34 | test = gradcheck(encoding.functions.aggregate, input, eps=EPS, atol=ATOL) 35 | print('Testing aggregate(): {}'.format(test)) 36 | 37 | def test_scaled_l2(): 38 | B,N,K,D = 2,3,4,5 39 | X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), 40 | requires_grad=True) 41 | C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), 42 | requires_grad=True) 43 | S = Variable(torch.cuda.DoubleTensor(K).uniform_(-0.5,0.5), 44 | requires_grad=True) 45 | input = (X, C, S) 46 | test = gradcheck(encoding.functions.scaled_l2, input, eps=EPS, atol=ATOL) 47 | print('Testing scaled_l2(): {}'.format(test)) 48 | 49 | 50 | def test_moments(): 51 | B,C,H = 2,3,4 52 | X = Variable(torch.cuda.DoubleTensor(B,C,H).uniform_(-0.5,0.5), 53 | requires_grad=True) 54 | input = (X,) 55 | test = gradcheck(encoding.functions.moments, input, eps=EPS, atol=ATOL) 56 | print('Testing moments(): {}'.format(test)) 57 | 58 | def test_non_max_suppression(): 59 | def _test_nms(cuda): 60 | # check a small test case 61 | boxes = torch.Tensor([ 62 | [[10.2, 23., 50., 20.], 63 | [11.3, 23., 52., 20.1], 64 | [23.2, 102.3, 23.3, 50.3], 65 | [101.2, 32.4, 70.6, 70.], 66 | [100.2, 30.9, 70.7, 69.]], 67 | [[200.3, 234., 530., 320.], 68 | [110.3, 223., 152., 420.1], 69 | [243.2, 240.3, 50.3, 30.3], 70 | [243.2, 236.4, 48.6, 30.], 71 | [100.2, 310.9, 170.7, 691.]]]) 72 | 73 | scores = torch.Tensor([ 74 | [0.9, 0.7, 0.11, 0.23, 0.8], 75 | [0.13, 0.89, 0.45, 0.23, 0.3]]) 76 | 77 | if cuda: 78 | boxes = boxes.cuda() 79 | scores = scores.cuda() 80 | 81 | expected_output = ( 82 | torch.ByteTensor( 83 | [[1, 1, 0, 0, 1], [1, 1, 1, 0, 1]]), 84 | torch.LongTensor( 85 | [[0, 4, 1, 3, 2], [1, 2, 4, 3, 0]]) 86 | ) 87 | 88 | mask, inds = encoding.functions.NonMaxSuppression(boxes, scores, 0.7) 89 | _assert_tensor_close(mask, expected_output[0]) 90 | _assert_tensor_close(inds, expected_output[1]) 91 | 92 | _test_nms(False) 93 | _test_nms(True) 94 | 95 | if __name__ == '__main__': 96 | import nose 97 | nose.runmodule() 98 | -------------------------------------------------------------------------------- /tests/unit_test/test_model.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | import torch 11 | import encoding 12 | 13 | def test_model_inference(): 14 | x = torch.rand(1, 3, 224, 224) 15 | for model_name in encoding.models.pretrained_model_list(): 16 | print('Doing: ', model_name) 17 | if 'wideresnet' in model_name: continue # need multi-gpu 18 | model = encoding.models.get_model(model_name, pretrained=True) 19 | model.eval() 20 | y = model(x) 21 | 22 | if __name__ == "__main__": 23 | import nose 24 | nose.runmodule() 25 | -------------------------------------------------------------------------------- /tests/unit_test/test_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from encoding.utils.metrics import * 4 | 5 | def test_segmentation_metrics(): 6 | # check torch evaluation metrics 7 | rows, cols = 640, 480 8 | nclass = 30 9 | # numpy data 10 | im_lab = np.matrix(np.random.randint(0, nclass, size=(rows, cols))) 11 | mask = np.random.random((nclass, rows, cols)) 12 | im_pred = mask.argmax(axis=0) 13 | # torch data 14 | tim_lab = torch.from_numpy(im_lab).unsqueeze(0).long() 15 | tim_pred = torch.from_numpy(mask).unsqueeze(0) 16 | # numpy prediction 17 | pixel_correct, pixel_labeled = pixel_accuracy(im_pred, im_lab) 18 | area_inter, area_union = intersection_and_union(im_pred, im_lab, nclass) 19 | pixAcc = 1.0 * pixel_correct / (np.spacing(1) + pixel_labeled) 20 | IoU = 1.0 * area_inter / (np.spacing(1) + area_union) 21 | mIoU = IoU.mean() 22 | print('numpy predictionis :', pixAcc, mIoU) 23 | # torch metric prediction 24 | pixel_correct, pixel_labeled = batch_pix_accuracy(tim_pred, tim_lab) 25 | area_inter, area_union = batch_intersection_union(tim_pred, tim_lab, nclass) 26 | batch_pixAcc = 1.0 * pixel_correct / (np.spacing(1) + pixel_labeled) 27 | IoU = 1.0 * area_inter / (np.spacing(1) + area_union) 28 | batch_mIoU = IoU.mean() 29 | print('torch predictionis :', batch_pixAcc, batch_mIoU) 30 | assert (batch_pixAcc - pixAcc) < 1e-3 31 | assert (batch_mIoU - mIoU) < 1e-3 32 | -------------------------------------------------------------------------------- /torch_encoding.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: torch-encoding 3 | Version: 1.2.2b20200814 4 | Summary: PyTorch Encoding Package 5 | Home-page: https://github.com/zhanghang1989/PyTorch-Encoding 6 | Author: Hang Zhang 7 | Author-email: zhanghang0704@gmail.com 8 | License: MIT 9 | Description: [![PyPI](https://img.shields.io/pypi/v/torch-encoding.svg)](https://pypi.python.org/pypi/torch-encoding) 10 | [![PyPI Pre-release](https://img.shields.io/badge/pypi--prerelease-v1.2.2-ff69b4.svg)](https://pypi.org/project/torch-encoding/#history) 11 | [![Upload Python Package](https://github.com/zhanghang1989/PyTorch-Encoding/workflows/Upload%20Python%20Package/badge.svg)](https://github.com/zhanghang1989/PyTorch-Encoding/actions) 12 | [![Downloads](http://pepy.tech/badge/torch-encoding)](http://pepy.tech/project/torch-encoding) 13 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 14 | [![Build Docs](https://github.com/zhanghang1989/PyTorch-Encoding/workflows/Build%20Docs/badge.svg)](https://github.com/zhanghang1989/PyTorch-Encoding/actions) 15 | [![Unit Test](https://github.com/zhanghang1989/PyTorch-Encoding/workflows/Unit%20Test/badge.svg)](https://github.com/zhanghang1989/PyTorch-Encoding/actions) 16 | 17 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/resnest-split-attention-networks/semantic-segmentation-on-ade20k)](https://paperswithcode.com/sota/semantic-segmentation-on-ade20k?p=resnest-split-attention-networks) 18 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/resnest-split-attention-networks/semantic-segmentation-on-pascal-context)](https://paperswithcode.com/sota/semantic-segmentation-on-pascal-context?p=resnest-split-attention-networks) 19 | 20 | # PyTorch-Encoding 21 | 22 | created by [Hang Zhang](http://hangzh.com/) 23 | 24 | ## [Documentation](http://hangzh.com/PyTorch-Encoding/) 25 | 26 | - Please visit the [**Docs**](http://hangzh.com/PyTorch-Encoding/) for detail instructions of installation and usage. 27 | 28 | - Please visit the [link](http://hangzh.com/PyTorch-Encoding/model_zoo/imagenet.html) to image classification models. 29 | 30 | - Please visit the [link](http://hangzh.com/PyTorch-Encoding/model_zoo/segmentation.html) to semantic segmentation models. 31 | 32 | ## Citations 33 | 34 | **ResNeSt: Split-Attention Networks** [[arXiv]]() 35 | [Hang Zhang](http://hangzh.com/), Chongruo Wu, Zhongyue Zhang, Yi Zhu, Zhi Zhang, Haibin Lin, Yue Sun, Tong He, Jonas Muller, R. Manmatha, Mu Li and Alex Smola 36 | ``` 37 | @article{zhang2020resnest, 38 | title={ResNeSt: Split-Attention Networks}, 39 | author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, 40 | journal={arXiv preprint}, 41 | year={2020} 42 | } 43 | ``` 44 | 45 | **Context Encoding for Semantic Segmentation** [[arXiv]](https://arxiv.org/pdf/1803.08904.pdf) 46 | [Hang Zhang](http://hangzh.com/), [Kristin Dana](http://eceweb1.rutgers.edu/vision/dana.html), [Jianping Shi](http://shijianping.me/), [Zhongyue Zhang](http://zhongyuezhang.com/), [Xiaogang Wang](http://www.ee.cuhk.edu.hk/~xgwang/), [Ambrish Tyagi](https://scholar.google.com/citations?user=GaSWCoUAAAAJ&hl=en), [Amit Agrawal](http://www.amitkagrawal.com/) 47 | ``` 48 | @InProceedings{Zhang_2018_CVPR, 49 | author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, 50 | title = {Context Encoding for Semantic Segmentation}, 51 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 52 | month = {June}, 53 | year = {2018} 54 | } 55 | ``` 56 | 57 | **Deep TEN: Texture Encoding Network** [[arXiv]](https://arxiv.org/pdf/1612.02844.pdf) 58 | [Hang Zhang](http://hangzh.com/), [Jia Xue](http://jiaxueweb.com/), [Kristin Dana](http://eceweb1.rutgers.edu/vision/dana.html) 59 | ``` 60 | @InProceedings{Zhang_2017_CVPR, 61 | author = {Zhang, Hang and Xue, Jia and Dana, Kristin}, 62 | title = {Deep TEN: Texture Encoding Network}, 63 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 64 | month = {July}, 65 | year = {2017} 66 | } 67 | ``` 68 | 69 | Platform: UNKNOWN 70 | Description-Content-Type: text/markdown 71 | -------------------------------------------------------------------------------- /torch_encoding.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | README.md 2 | setup.cfg 3 | setup.py 4 | encoding/__init__.py 5 | encoding/parallel.py 6 | encoding/version.py 7 | encoding/datasets/__init__.py 8 | encoding/datasets/ade20k.py 9 | encoding/datasets/base.py 10 | encoding/datasets/cityscapes.py 11 | encoding/datasets/cityscapes_v0.py 12 | encoding/datasets/cityscapescoarse.py 13 | encoding/datasets/coco.py 14 | encoding/datasets/folder.py 15 | encoding/datasets/hpw18.py 16 | encoding/datasets/imagenet.py 17 | encoding/datasets/minc.py 18 | encoding/datasets/pascal_aug.py 19 | encoding/datasets/pascal_voc.py 20 | encoding/datasets/pcontext.py 21 | encoding/functions/__init__.py 22 | encoding/functions/customize.py 23 | encoding/functions/dist_syncbn.py 24 | encoding/functions/encoding.py 25 | encoding/functions/rectify.py 26 | encoding/functions/syncbn.py 27 | encoding/lib/__init__.py 28 | encoding/lib/cpu/encoding_cpu.cpp 29 | encoding/lib/cpu/nms_cpu.cpp 30 | encoding/lib/cpu/operator.cpp 31 | encoding/lib/cpu/operator.h 32 | encoding/lib/cpu/rectify_cpu.cpp 33 | encoding/lib/cpu/roi_align_cpu.cpp 34 | encoding/lib/cpu/syncbn_cpu.cpp 35 | encoding/lib/gpu/activation_kernel.cu 36 | encoding/lib/gpu/common.h 37 | encoding/lib/gpu/device_tensor.h 38 | encoding/lib/gpu/encoding_kernel.cu 39 | encoding/lib/gpu/lib_ssd.cu 40 | encoding/lib/gpu/nms_kernel.cu 41 | encoding/lib/gpu/operator.cpp 42 | encoding/lib/gpu/operator.h 43 | encoding/lib/gpu/rectify_cuda.cu 44 | encoding/lib/gpu/roi_align_kernel.cu 45 | encoding/lib/gpu/syncbn_kernel.cu 46 | encoding/models/__init__.py 47 | encoding/models/deepten.py 48 | encoding/models/model_store.py 49 | encoding/models/model_zoo.py 50 | encoding/models/backbone/__init__.py 51 | encoding/models/backbone/resnest.py 52 | encoding/models/backbone/resnet.py 53 | encoding/models/backbone/resnet_variants.py 54 | encoding/models/backbone/resnext.py 55 | encoding/models/backbone/wideresnet.py 56 | encoding/models/backbone/xception.py 57 | encoding/models/sseg/__init__.py 58 | encoding/models/sseg/atten.py 59 | encoding/models/sseg/base.py 60 | encoding/models/sseg/danet.py 61 | encoding/models/sseg/deeplab.py 62 | encoding/models/sseg/dran.py 63 | encoding/models/sseg/encnet.py 64 | encoding/models/sseg/fcfpn.py 65 | encoding/models/sseg/fcn.py 66 | encoding/models/sseg/psp.py 67 | encoding/models/sseg/upernet.py 68 | encoding/nn/__init__.py 69 | encoding/nn/attention.py 70 | encoding/nn/customize.py 71 | encoding/nn/da_att.py 72 | encoding/nn/dran_att.py 73 | encoding/nn/dropblock.py 74 | encoding/nn/encoding.py 75 | encoding/nn/loss.py 76 | encoding/nn/rectify.py 77 | encoding/nn/splat.py 78 | encoding/nn/syncbn.py 79 | encoding/transforms/__init__.py 80 | encoding/transforms/autoaug.py 81 | encoding/transforms/get_transform.py 82 | encoding/transforms/transforms.py 83 | encoding/utils/__init__.py 84 | encoding/utils/dist_helper.py 85 | encoding/utils/files.py 86 | encoding/utils/lr_scheduler.py 87 | encoding/utils/metrics.py 88 | encoding/utils/misc.py 89 | encoding/utils/pallete.py 90 | encoding/utils/precise_bn.py 91 | encoding/utils/presets.py 92 | encoding/utils/train_helper.py 93 | torch_encoding.egg-info/PKG-INFO 94 | torch_encoding.egg-info/SOURCES.txt 95 | torch_encoding.egg-info/dependency_links.txt 96 | torch_encoding.egg-info/requires.txt 97 | torch_encoding.egg-info/top_level.txt -------------------------------------------------------------------------------- /torch_encoding.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /torch_encoding.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | tqdm 3 | nose 4 | portalocker 5 | torch>=1.4.0 6 | torchvision>=0.5.0 7 | Pillow 8 | scipy 9 | requests 10 | -------------------------------------------------------------------------------- /torch_encoding.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | encoding 2 | --------------------------------------------------------------------------------