├── .editorconfig ├── .github └── workflows │ ├── sphix_build_master.yml │ ├── sphix_build_pr.yml │ └── unit_test.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── _static │ ├── css │ │ └── encoding.css │ ├── img │ │ ├── EncNet32k128d.svg │ │ ├── cvpr17.svg │ │ ├── deep_ten_curve.svg │ │ ├── favicon.png │ │ ├── figure1.jpg │ │ ├── icon.png │ │ ├── myimage.gif │ │ └── upconv.png │ ├── js │ │ └── hidebib.js │ └── theme_overrides.css │ ├── _templates │ └── layout.html │ ├── conf.py │ ├── custom_directives.py │ ├── index.rst │ ├── model_zoo │ ├── imagenet.rst │ └── segmentation.rst │ ├── nn.rst │ ├── notes │ └── compile.rst │ ├── parallel.rst │ ├── tutorials │ ├── style.rst │ ├── syncbn.rst │ └── texture.rst │ └── utils.rst ├── encoding ├── __init__.py ├── datasets │ ├── __init__.py │ ├── ade20k.py │ ├── base.py │ ├── cityscapes.py │ ├── cityscapescoarse.py │ ├── coco.py │ ├── folder.py │ ├── hpw18.py │ ├── imagenet.py │ ├── minc.py │ ├── pascal_aug.py │ ├── pascal_voc.py │ └── pcontext.py ├── functions │ ├── __init__.py │ ├── customize.py │ ├── dist_syncbn.py │ ├── encoding.py │ ├── rectify.py │ └── syncbn.py ├── lib │ ├── cpu │ │ ├── encoding_cpu.cpp │ │ ├── nms_cpu.cpp │ │ ├── operator.cpp │ │ ├── operator.h │ │ ├── rectify_cpu.cpp │ │ ├── roi_align_cpu.cpp │ │ └── syncbn_cpu.cpp │ └── gpu │ │ ├── activation_kernel.cu │ │ ├── common.h │ │ ├── device_tensor.h │ │ ├── encoding_kernel.cu │ │ ├── lib_ssd.cu │ │ ├── nms_kernel.cu │ │ ├── operator.cpp │ │ ├── operator.h │ │ ├── rectify_cuda.cu │ │ ├── roi_align_kernel.cu │ │ └── syncbn_kernel.cu ├── models │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── resnest.py │ │ ├── resnet.py │ │ ├── resnet_variants.py │ │ ├── resnext.py │ │ ├── wideresnet.py │ │ └── xception.py │ ├── deepten.py │ ├── model_store.py │ ├── model_zoo.py │ └── sseg │ │ ├── __init__.py │ │ ├── atten.py │ │ ├── base.py │ │ ├── deeplab.py │ │ ├── encnet.py │ │ ├── fcfpn.py │ │ ├── fcn.py │ │ ├── psp.py │ │ └── upernet.py ├── nn │ ├── __init__.py │ ├── attention.py │ ├── customize.py │ ├── dropblock.py │ ├── encoding.py │ ├── loss.py │ ├── rectify.py │ ├── splat.py │ └── syncbn.py ├── parallel.py ├── transforms │ ├── __init__.py │ ├── autoaug.py │ ├── get_transform.py │ └── transforms.py └── utils │ ├── __init__.py │ ├── dist_helper.py │ ├── files.py │ ├── lr_scheduler.py │ ├── metrics.py │ ├── misc.py │ ├── pallete.py │ ├── precise_bn.py │ ├── presets.py │ └── train_helper.py ├── experiments ├── recognition │ ├── README.md │ ├── resnet50_baseline.sh │ ├── test_flops.py │ ├── train_dist.py │ └── verify.py └── segmentation │ ├── demo.py │ ├── test.py │ ├── test_models.py │ ├── train.py │ └── train_dist.py ├── scripts ├── build_docker.sh ├── prepare_ade20k.py ├── prepare_citys.py ├── prepare_coco.py ├── prepare_imagenet.py ├── prepare_minc.py ├── prepare_pascal.py ├── prepare_pcontext.py └── run_docker.sh ├── setup.cfg ├── setup.py └── tests ├── lint.py ├── pylintrc └── unit_test ├── test_dataset.py ├── test_function.py ├── test_model.py ├── test_module.py └── test_utils.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | -------------------------------------------------------------------------------- /.github/workflows/sphix_build_master.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Build Docs 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | 10 | jobs: 11 | docs: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v1 15 | - uses: seanmiddleditch/gha-setup-ninja@master 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: 3.7 21 | 22 | - name: Install dependencies 23 | run: | 24 | python -m pip install --upgrade pip 25 | pip install numpy -I 26 | pip install pytest torch 27 | 28 | - name: Install package 29 | run: | 30 | pip install -e . 31 | 32 | - name: Install Sphix Dependencies 33 | run: | 34 | cd docs/ 35 | pip install -r requirements.txt 36 | 37 | - name: Build Sphinx docs 38 | run: | 39 | cd docs/ 40 | make html 41 | touch build/html/.nojekyll 42 | 43 | # https://github.com/marketplace/actions/github-pages 44 | - name: Deploy 45 | if: success() 46 | uses: crazy-max/ghaction-github-pages@v1 47 | env: 48 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 49 | with: 50 | build_dir: docs/build/html/ 51 | target_branch: gh-pages 52 | -------------------------------------------------------------------------------- /.github/workflows/sphix_build_pr.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Build Docs 5 | 6 | on: 7 | pull_request: 8 | branches: [ master ] 9 | 10 | jobs: 11 | docs: 12 | runs-on: self-hosted 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: seanmiddleditch/gha-setup-ninja@master 16 | 17 | - name: Set PR Number 18 | uses: actions/github-script@0.3.0 19 | with: 20 | github-token: ${{github.token}} 21 | script: | 22 | const core = require('@actions/core') 23 | const prNumber = context.payload.number; 24 | core.exportVariable('PULL_NUMBER', prNumber); 25 | core.exportVariable("PATH", "/home/ubuntu/anaconda3/bin:/usr/local/bin:/usr/bin/:/bin:$PATH") 26 | 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install numpy -I 31 | pip install pytest torch 32 | 33 | - name: Install package 34 | run: | 35 | pip install -e . 36 | 37 | - name: Install Sphix Dependencies 38 | run: | 39 | cd docs/ 40 | pip install -r requirements.txt 41 | 42 | - name: Build Sphinx docs 43 | run: | 44 | cd docs/ 45 | make html 46 | touch build/html/.nojekyll 47 | aws s3 sync build/html/ s3://hangzh/encoding/docs/${{ env.PULL_NUMBER }}/ --acl public-read --follow-symlinks --delete 48 | 49 | - name: Comment 50 | if: success() 51 | uses: thollander/actions-comment-pull-request@master 52 | with: 53 | message: "The docs are uploaded and can be previewed at http://hangzh.s3.amazonaws.com/encoding/docs/${{ env.PULL_NUMBER }}/index.html" 54 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 55 | -------------------------------------------------------------------------------- /.github/workflows/unit_test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Unit Test 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: self-hosted 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - uses: seanmiddleditch/gha-setup-ninja@master 20 | 21 | - name: Set up Python 22 | uses: actions/github-script@0.3.0 23 | with: 24 | github-token: ${{github.token}} 25 | script: | 26 | const core = require('@actions/core') 27 | core.exportVariable("PATH", "/home/ubuntu/anaconda3/bin:/usr/local/bin:/usr/bin/:/bin:$PATH") 28 | 29 | - name: Install package 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install -e . 33 | 34 | - name: Run pytest 35 | run: | 36 | pip install nose 37 | nosetests -v tests/unit_test/ 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.swp 3 | *.pyc 4 | version.py 5 | build/ 6 | data/ 7 | docs/src/ 8 | docs/html/ 9 | encoding/_ext/ 10 | encoding.egg-info/ 11 | *.o 12 | *.so 13 | *.ninja* 14 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/pytorch:20.06-py3 2 | 3 | # Set working directory # Set working directory 4 | WORKDIR /workspace 5 | 6 | #ENV PYTHONPATH "${PYTHONPATH}:/workspace" 7 | 8 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y python3-tk python-pip git tmux htop tree 9 | 10 | RUN python -m pip install --upgrade pip 11 | #RUN python -m pip install torch==1.4.0 12 | #RUN python -m pip install torchvision==0.5.0 13 | RUN python -m pip install pycocotools==2.0.0 14 | 15 | #RUN chmod a+rwx -R /opt/conda/ 16 | 17 | COPY ./setup.py . 18 | COPY ./encoding ./encoding 19 | 20 | ENV FORCE_CUDA="1" 21 | RUN python setup.py develop 22 | 23 | COPY ./experiments ./experiments 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017- Hang Zhang. All rights reserved. 4 | Copyright (c) 2018- Amazon.com, Inc. or its affiliates. All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR = $(CURDIR) 2 | 3 | lint: cpplint pylint 4 | 5 | cpplint: 6 | tests/lint.py encoding cpp src kernel 7 | 8 | pylint: 9 | pylint --rcfile=$(ROOTDIR)/tests/pylintrc --ignore-patterns=".*\.so$$,.*\.dll$$,.*\.dylib$$" encoding --ignore=_ext 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 2 | [![Build Docs](https://github.com/zhanghang1989/PyTorch-Encoding/workflows/Build%20Docs/badge.svg)](https://github.com/zhanghang1989/PyTorch-Encoding/actions) 3 | [![Unit Test](https://github.com/zhanghang1989/PyTorch-Encoding/workflows/Unit%20Test/badge.svg)](https://github.com/zhanghang1989/PyTorch-Encoding/actions) 4 | 5 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/resnest-split-attention-networks/semantic-segmentation-on-ade20k)](https://paperswithcode.com/sota/semantic-segmentation-on-ade20k?p=resnest-split-attention-networks) 6 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/resnest-split-attention-networks/semantic-segmentation-on-pascal-context)](https://paperswithcode.com/sota/semantic-segmentation-on-pascal-context?p=resnest-split-attention-networks) 7 | 8 | # PyTorch-Encoding 9 | 10 | created by [Hang Zhang](http://hangzh.com/) 11 | 12 | ## [Documentation](http://hangzh.com/PyTorch-Encoding/) 13 | 14 | - Please visit the [**Docs**](http://hangzh.com/PyTorch-Encoding/) for detail instructions of installation and usage. 15 | 16 | - Please visit the [link](http://hangzh.com/PyTorch-Encoding/model_zoo/imagenet.html) to image classification models. 17 | 18 | - Please visit the [link](http://hangzh.com/PyTorch-Encoding/model_zoo/segmentation.html) to semantic segmentation models. 19 | 20 | ## Citations 21 | 22 | **ResNeSt: Split-Attention Networks** [[arXiv]](https://arxiv.org/abs/2004.08955) 23 | [Hang Zhang](http://hangzh.com/), Chongruo Wu, Zhongyue Zhang, Yi Zhu, Zhi Zhang, Haibin Lin, Yue Sun, Tong He, Jonas Muller, R. Manmatha, Mu Li and Alex Smola 24 | ``` 25 | @article{zhang2020resnest, 26 | title={ResNeSt: Split-Attention Networks}, 27 | author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, 28 | journal={arXiv preprint}, 29 | year={2020} 30 | } 31 | ``` 32 | 33 | **Context Encoding for Semantic Segmentation** [[arXiv]](https://arxiv.org/pdf/1803.08904.pdf) 34 | [Hang Zhang](http://hangzh.com/), [Kristin Dana](http://eceweb1.rutgers.edu/vision/dana.html), [Jianping Shi](http://shijianping.me/), [Zhongyue Zhang](http://zhongyuezhang.com/), [Xiaogang Wang](http://www.ee.cuhk.edu.hk/~xgwang/), [Ambrish Tyagi](https://scholar.google.com/citations?user=GaSWCoUAAAAJ&hl=en), [Amit Agrawal](http://www.amitkagrawal.com/) 35 | ``` 36 | @InProceedings{Zhang_2018_CVPR, 37 | author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, 38 | title = {Context Encoding for Semantic Segmentation}, 39 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 40 | month = {June}, 41 | year = {2018} 42 | } 43 | ``` 44 | 45 | **Deep TEN: Texture Encoding Network** [[arXiv]](https://arxiv.org/pdf/1612.02844.pdf) 46 | [Hang Zhang](http://hangzh.com/), [Jia Xue](http://jiaxueweb.com/), [Kristin Dana](http://eceweb1.rutgers.edu/vision/dana.html) 47 | ``` 48 | @InProceedings{Zhang_2017_CVPR, 49 | author = {Zhang, Hang and Xue, Jia and Dana, Kristin}, 50 | title = {Deep TEN: Texture Encoding Network}, 51 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 52 | month = {July}, 53 | year = {2017} 54 | } 55 | ``` 56 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Encoding 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | docset: html 16 | doc2dash --name $(SPHINXPROJ) --icon $(SOURCEDIR)/_static/img/favicon.png --enable-js --online-redirect-url http://hangzh.com/PyTorch-Encoding/ --force $(BUILDDIR)/html/ 17 | 18 | # Manually fix because Zeal doesn't deal well with `icon.png`-only at 2x resolution. 19 | cp $(SPHINXPROJ).docset/icon.png $(SPHINXPROJ).docset/icon@2x.png 20 | convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png 21 | 22 | .PHONY: help Makefile docset 23 | 24 | # Catch-all target: route all unknown targets to Sphinx using the new 25 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 26 | %: Makefile 27 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 28 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=Encoding 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx-gallery 3 | sphinxcontrib-googleanalytics 4 | -e git://github.com/zhanghang1989/autorch_sphinx_theme.git#egg=autorch_sphinx_theme 5 | -------------------------------------------------------------------------------- /docs/source/_static/css/encoding.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; 3 | } 4 | 5 | /* Default header fonts are ugly */ 6 | h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption { 7 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; 8 | } 9 | 10 | /* Use white for docs background */ 11 | .wy-side-nav-search { 12 | background-color: #a0e2ff; 13 | } 14 | 15 | .wy-nav-content-wrap, .wy-menu li.current > a { 16 | background-color: #fff; 17 | } 18 | 19 | @media screen and (min-width: 1400px) { 20 | .wy-nav-content-wrap { 21 | background-color: rgba(0, 0, 0, 0.0470588); 22 | } 23 | 24 | .wy-nav-content { 25 | background-color: #fff; 26 | } 27 | } 28 | 29 | /* Fixes for mobile */ 30 | .wy-nav-top { 31 | background-color: #fff; 32 | background-repeat: no-repeat; 33 | background-position: center; 34 | padding: 0; 35 | margin: 0.4045em 0.809em; 36 | color: #333; 37 | } 38 | 39 | .wy-nav-top > a { 40 | display: none; 41 | } 42 | 43 | @media screen and (max-width: 768px) { 44 | .wy-side-nav-search>a img.logo { 45 | height: 60px; 46 | } 47 | } 48 | 49 | /* This is needed to ensure that logo above search scales properly */ 50 | .wy-side-nav-search a { 51 | display: block; 52 | } 53 | 54 | /* This ensures that multiple constructors will remain in separate lines. */ 55 | .rst-content dl:not(.docutils) dt { 56 | display: table; 57 | } 58 | 59 | /* Use our blue for literals */ 60 | .rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal { 61 | color: #4080bf; 62 | } 63 | 64 | .rst-content tt.xref, a .rst-content tt, .rst-content tt.xref, 65 | .rst-content code.xref, a .rst-content tt, a .rst-content code { 66 | color: #404040; 67 | } 68 | 69 | /* Change link colors (except for the menu) */ 70 | 71 | a { 72 | color: #4080bf; 73 | } 74 | 75 | a:hover { 76 | color: #4080bf; 77 | } 78 | 79 | 80 | a:visited { 81 | color: #306293; 82 | } 83 | 84 | .wy-menu a { 85 | color: #b3b3b3; 86 | } 87 | 88 | .wy-menu a:hover { 89 | color: #b3b3b3; 90 | } 91 | 92 | /* Default footer text is quite big */ 93 | footer { 94 | font-size: 80%; 95 | } 96 | 97 | footer .rst-footer-buttons { 98 | font-size: 125%; /* revert footer settings - 1/80% = 125% */ 99 | } 100 | 101 | footer p { 102 | font-size: 100%; 103 | } 104 | 105 | /* For hidden headers that appear in TOC tree */ 106 | /* see http://stackoverflow.com/a/32363545/3343043 107 | */ 108 | .rst-content .hidden-section { 109 | display: none; 110 | } 111 | 112 | nav .hidden-section { 113 | display: inherit; 114 | } 115 | 116 | .wy-side-nav-search>div.version { 117 | color: #000; 118 | } 119 | -------------------------------------------------------------------------------- /docs/source/_static/img/cvpr17.svg: -------------------------------------------------------------------------------- 1 | encodingInputDictionaryResidualsAssignAggregateEncoding-Layer -------------------------------------------------------------------------------- /docs/source/_static/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanghang1989/PyTorch-Encoding/ac748410dfc8d7d70a2ce7f5add08050af2fae20/docs/source/_static/img/favicon.png -------------------------------------------------------------------------------- /docs/source/_static/img/figure1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanghang1989/PyTorch-Encoding/ac748410dfc8d7d70a2ce7f5add08050af2fae20/docs/source/_static/img/figure1.jpg -------------------------------------------------------------------------------- /docs/source/_static/img/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanghang1989/PyTorch-Encoding/ac748410dfc8d7d70a2ce7f5add08050af2fae20/docs/source/_static/img/icon.png -------------------------------------------------------------------------------- /docs/source/_static/img/myimage.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanghang1989/PyTorch-Encoding/ac748410dfc8d7d70a2ce7f5add08050af2fae20/docs/source/_static/img/myimage.gif -------------------------------------------------------------------------------- /docs/source/_static/img/upconv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanghang1989/PyTorch-Encoding/ac748410dfc8d7d70a2ce7f5add08050af2fae20/docs/source/_static/img/upconv.png -------------------------------------------------------------------------------- /docs/source/_static/js/hidebib.js: -------------------------------------------------------------------------------- 1 | // adapted from: http://www.robots.ox.ac.uk/~vedaldi/assets/hidebib.js 2 | function hideallbibs() 3 | { 4 | var el = document.getElementsByTagName("div") ; 5 | for (var i = 0 ; i < el.length ; ++i) { 6 | if (el[i].className == "paper") { 7 | var bib = el[i].getElementsByTagName("pre") ; 8 | if (bib.length > 0) { 9 | bib [0] .style.display = 'none' ; 10 | } 11 | } 12 | } 13 | } 14 | 15 | function togglebib(paperid) 16 | { 17 | var paper = document.getElementById(paperid) ; 18 | var bib = paper.getElementsByTagName('pre') ; 19 | if (bib.length > 0) { 20 | if (bib [0] .style.display == 'none') { 21 | bib [0] .style.display = 'block' ; 22 | } else { 23 | bib [0] .style.display = 'none' ; 24 | } 25 | } 26 | } 27 | 28 | function toggleblock(blockId) 29 | { 30 | var block = document.getElementById(blockId); 31 | if (block.style.display == 'none') { 32 | block.style.display = 'block' ; 33 | } else { 34 | block.style.display = 'none' ; 35 | } 36 | } 37 | 38 | function hideblock(blockId) 39 | { 40 | var block = document.getElementById(blockId); 41 | block.style.display = 'none' ; 42 | } 43 | -------------------------------------------------------------------------------- /docs/source/_static/theme_overrides.css: -------------------------------------------------------------------------------- 1 | /* override table width restrictions */ 2 | @media screen and (min-width: 767px) { 3 | 4 | .wy-table-responsive table td { 5 | /* !important prevents the common CSS stylesheets from overriding 6 | this as on RTD they are loaded after this stylesheet */ 7 | white-space: normal !important; 8 | } 9 | 10 | .wy-table-responsive { 11 | overflow: visible !important; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /docs/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {%- block extrahead %} 4 | 5 | 6 | 7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Encoding documentation master file 2 | 3 | :github_url: https://github.com/zhanghang1989/PyTorch-Encoding 4 | 5 | Encoding Documentation 6 | ====================== 7 | 8 | Created by `Hang Zhang `_ 9 | 10 | An optimized PyTorch package with CUDA backend. 11 | 12 | 13 | .. toctree:: 14 | :glob: 15 | :maxdepth: 1 16 | :caption: Installation 17 | 18 | notes/* 19 | 20 | .. toctree:: 21 | :glob: 22 | :maxdepth: 1 23 | :caption: Model Zoo 24 | 25 | model_zoo/* 26 | 27 | .. toctree:: 28 | :glob: 29 | :maxdepth: 1 30 | :caption: Other Tutorials 31 | 32 | tutorials/* 33 | 34 | .. toctree:: 35 | :maxdepth: 1 36 | :caption: Package Reference 37 | 38 | nn 39 | parallel 40 | utils 41 | 42 | Indices and tables 43 | ================== 44 | 45 | * :ref:`genindex` 46 | * :ref:`modindex` 47 | -------------------------------------------------------------------------------- /docs/source/model_zoo/imagenet.rst: -------------------------------------------------------------------------------- 1 | Image Classification 2 | ==================== 3 | 4 | Install Package 5 | --------------- 6 | 7 | - Clone the GitHub repo:: 8 | 9 | git clone https://github.com/zhanghang1989/PyTorch-Encoding 10 | 11 | - Install PyTorch Encoding (if not yet). Please follow the installation guide `Installing PyTorch Encoding <../notes/compile.html>`_. 12 | 13 | Get Pre-trained Model 14 | --------------------- 15 | 16 | .. hint:: 17 | How to get pretrained model, for example ``ResNeSt50``:: 18 | 19 | model = encoding.models.get_model('ResNeSt50', pretrained=True) 20 | 21 | After clicking ``cmd`` in the table, the command for training the model can be found below the table. 22 | 23 | .. role:: raw-html(raw) 24 | :format: html 25 | 26 | 27 | ResNeSt 28 | ~~~~~~~ 29 | 30 | .. note:: 31 | The provided models were trained using MXNet Gluon, this PyTorch implementation is slightly worse than the original implementation. 32 | 33 | =============================== ============== ============== ========================================================================================================= 34 | Model crop-size Acc Command 35 | =============================== ============== ============== ========================================================================================================= 36 | ResNeSt-50 224 81.03 :raw-html:`cmd` 37 | ResNeSt-101 256 82.83 :raw-html:`cmd` 38 | ResNeSt-200 320 83.84 :raw-html:`cmd` 39 | ResNeSt-269 416 84.54 :raw-html:`cmd` 40 | =============================== ============== ============== ========================================================================================================= 41 | 42 | .. raw:: html 43 | 44 | 48 | 49 | 53 | 54 | 58 | 59 | 63 | 64 | Test Pretrained 65 | ~~~~~~~~~~~~~~~ 66 | 67 | - Prepare the datasets by downloading the data into current folder and then runing the scripts in the ``scripts/`` folder:: 68 | 69 | python scripts/prepare_imagenet.py --data-dir ./ 70 | 71 | - The test script is in the ``experiments/recognition/`` folder. For evaluating the model (using MS), 72 | for example ``ResNeSt50``:: 73 | 74 | python verify.py --dataset imagenet --model ResNeSt50 --crop-size 224 75 | 76 | Train Your Own Model 77 | -------------------- 78 | 79 | - Prepare the datasets by downloading the data into current folder and then runing the scripts in the ``scripts/`` folder:: 80 | 81 | python scripts/prepare_imagenet.py --data-dir ./ 82 | 83 | - The training script is in the ``experiments/recognition/`` folder. Commands for reproducing pre-trained models can be found in the table. 84 | 85 | 86 | -------------------------------------------------------------------------------- /docs/source/nn.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | encoding.nn 5 | =========== 6 | 7 | Customized NN modules in Encoding Package. For Synchronized Cross-GPU Batch Normalization, please visit :class:`encoding.nn.BatchNorm2d`. 8 | 9 | .. currentmodule:: encoding.nn 10 | 11 | :hidden:`Encoding` 12 | ~~~~~~~~~~~~~~~~~~ 13 | 14 | .. autoclass:: Encoding 15 | :members: 16 | 17 | :hidden:`DistSyncBatchNorm` 18 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 19 | 20 | .. autoclass:: DistSyncBatchNorm 21 | :members: 22 | 23 | :hidden:`SyncBatchNorm` 24 | ~~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | .. autoclass:: SyncBatchNorm 27 | :members: 28 | 29 | :hidden:`BatchNorm1d` 30 | ~~~~~~~~~~~~~~~~~~~~~~~~ 31 | 32 | .. autoclass:: BatchNorm1d 33 | :members: 34 | 35 | :hidden:`BatchNorm2d` 36 | ~~~~~~~~~~~~~~~~~~~~~~~~ 37 | 38 | .. autoclass:: BatchNorm2d 39 | :members: 40 | 41 | :hidden:`BatchNorm3d` 42 | ~~~~~~~~~~~~~~~~~~~~~~~~ 43 | 44 | .. autoclass:: BatchNorm3d 45 | :members: 46 | 47 | :hidden:`Inspiration` 48 | ~~~~~~~~~~~~~~~~~~~~~ 49 | 50 | .. autoclass:: Inspiration 51 | :members: 52 | 53 | :hidden:`UpsampleConv2d` 54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 55 | 56 | .. autoclass:: UpsampleConv2d 57 | :members: 58 | 59 | :hidden:`GramMatrix` 60 | ~~~~~~~~~~~~~~~~~~~~ 61 | 62 | .. autoclass:: GramMatrix 63 | :members: 64 | -------------------------------------------------------------------------------- /docs/source/notes/compile.rst: -------------------------------------------------------------------------------- 1 | Install and Citations 2 | ===================== 3 | 4 | 5 | Installation 6 | ------------ 7 | 8 | * Install PyTorch 1.4.0 by following the `PyTorch instructions `_. 9 | 10 | * PIP Install:: 11 | 12 | pip install git+https://github.com/zhanghang1989/PyTorch-Encoding/ 13 | # macOS 14 | CC=clang CXX=clang++ pip install git+https://github.com/zhanghang1989/PyTorch-Encoding/ 15 | 16 | * Install from source:: 17 | 18 | git clone https://github.com/zhanghang1989/PyTorch-Encoding && cd PyTorch-Encoding 19 | # ubuntu 20 | python setup.py install 21 | # macOS 22 | CC=clang CXX=clang++ python setup.py install 23 | 24 | 25 | Using Docker 26 | ------------ 27 | 28 | We strongly recommend using the docker option, if you are experiencing any errors using standard installation. 29 | 30 | * Install Docker Engine by following the `Install Docker Engine `_. 31 | * Build the docker image:: 32 | 33 | git clone https://github.com/zhanghang1989/PyTorch-Encoding && cd PyTorch-Encoding 34 | bash scripts/build_docker.sh 35 | 36 | * Run the docker:: 37 | 38 | bash scripts/run_docker.sh 39 | 40 | 41 | Detailed Steps 42 | -------------- 43 | 44 | This tutorial is a sucessful setup example for AWS EC2 p3 instance with ubuntu 16.04, CUDA 10. 45 | We cannot guarantee it to work for all the machines, but the steps should be similar. 46 | Assuming CUDA and cudnn are already sucessfully installed, otherwise please refer to other tutorials. 47 | 48 | * Install Anaconda from the `link `_ . 49 | 50 | * Install ninja:: 51 | 52 | wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip 53 | sudo unzip ninja-linux.zip -d /usr/local/bin/ 54 | sudo update-alternatives --install /usr/bin/ninja ninja /usr/local/bin/ninja 1 --force 55 | 56 | * Install PyTorch:: 57 | 58 | conda install pytorch torchvision cudatoolkit=10.0 -c pytorch 59 | 60 | * Install this package:: 61 | 62 | pip install git+https://github.com/zhanghang1989/PyTorch-Encoding/ 63 | 64 | Citations 65 | --------- 66 | 67 | .. note:: 68 | * Hang Zhang et al. "ResNeSt: Split-Attention Networks" *arXiv 2020*:: 69 | 70 | @article{zhang2020resnest, 71 | title={ResNeSt: Split-Attention Networks}, 72 | author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, 73 | journal={arXiv preprint arXiv:2004.08955}, 74 | year={2020} 75 | } 76 | 77 | * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*:: 78 | 79 | @InProceedings{Zhang_2018_CVPR, 80 | author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, 81 | title = {Context Encoding for Semantic Segmentation}, 82 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 83 | month = {June}, 84 | year = {2018} 85 | } 86 | 87 | 88 | * Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*:: 89 | 90 | @InProceedings{Zhang_2017_CVPR, 91 | author = {Zhang, Hang and Xue, Jia and Dana, Kristin}, 92 | title = {Deep TEN: Texture Encoding Network}, 93 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 94 | month = {July}, 95 | year = {2017} 96 | } 97 | -------------------------------------------------------------------------------- /docs/source/parallel.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | encoding.parallel 5 | ================= 6 | 7 | - Current PyTorch DataParallel Table is not supporting mutl-gpu loss calculation, which makes the gpu memory usage very in-balance. We address this issue here by doing DataParallel for Model & Criterion. 8 | 9 | .. note:: 10 | Deprecated, please use torch.nn.parallel.DistributedDataParallel with :class:`encoding.nn.DistSyncBatchNorm` for the best performance. 11 | 12 | .. automodule:: encoding.parallel 13 | .. currentmodule:: encoding.parallel 14 | 15 | :hidden:`DataParallelModel` 16 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 17 | 18 | .. autoclass:: DataParallelModel 19 | :members: 20 | 21 | :hidden:`DataParallelCriterion` 22 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 23 | 24 | .. autoclass:: DataParallelCriterion 25 | :members: 26 | 27 | 28 | :hidden:`allreduce` 29 | ~~~~~~~~~~~~~~~~~~~ 30 | 31 | .. autofunction:: allreduce 32 | -------------------------------------------------------------------------------- /docs/source/tutorials/syncbn.rst: -------------------------------------------------------------------------------- 1 | Implementing Synchronized Multi-GPU Batch Normalization 2 | ======================================================= 3 | 4 | In this tutorial, we discuss the implementation detail of Multi-GPU Batch Normalization (BN) (classic implementation: :class:`encoding.nn.BatchNorm2d`. We will provide the training example in a later version. 5 | 6 | How BN works? 7 | ------------- 8 | 9 | BN layer was introduced in the paper `Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift `_, which dramatically speed up the training process of the network (enables larger learning rate) and makes the network less sensitive to the weight initialization. 10 | 11 | .. image:: http://hangzh.com/blog/images/bn1.png 12 | :align: center 13 | 14 | - Forward Pass: 15 | For the input data :math:`X={x_1, ...x_N}`, the data are normalized to be zero-mean and unit-variance, then scale and shift: 16 | 17 | .. math:: 18 | y_i = \gamma\cdot\frac{x_i-\mu}{\sigma} + \beta , 19 | 20 | where :math:`\mu=\frac{\sum_i^N x_i}{N} , \sigma = \sqrt{\frac{\sum_i^N (x_i-\mu)^2}{N}+\epsilon}` and :math:`\gamma, \beta` are the learnable parameters. 21 | 22 | - Backward Pass: 23 | For calculating the gradient :math:`\frac{d_\ell}{d_{x_i}}`, we need to consider the partial gradient from :math:`\frac{d_\ell}{d_y}` and the gradients from :math:`\frac{d_\ell}{d_\mu}` and :math:`\frac{d_\ell}{d_\sigma}`, since the :math:`\mu \text{ and } \sigma` are the function of the input :math:`x_i`. We use partial derivative in the notations: 24 | 25 | .. math:: 26 | 27 | \frac{d_\ell}{d_{x_i}} = \frac{d_\ell}{d_{y_i}}\cdot\frac{\partial_{y_i}}{\partial_{x_i}} + \frac{d_\ell}{d_\mu}\cdot\frac{d_\mu}{d_{x_i}} + \frac{d_\ell}{d_\sigma}\cdot\frac{d_\sigma}{d_{x_i}} 28 | 29 | where :math:`\frac{\partial_{y_i}}{\partial_{x_i}}=\frac{\gamma}{\sigma}, \frac{d_\ell}{d_\mu}=-\frac{\gamma}{\sigma}\sum_i^N\frac{d_\ell}{d_{y_i}} 30 | \text{ and } \frac{d_\sigma}{d_{x_i}}=-\frac{1}{\sigma}(\frac{x_i-\mu}{N})`. 31 | 32 | Why Synchronize BN? 33 | ------------------- 34 | 35 | - Standard implementations of BN in public frameworks (such as Caffe, MXNet, Torch, TF, PyTorch) are unsynchronized, which means that the data are normalized within each GPU. Therefore the `working batch-size` of the BN layer is `BatchSize/nGPU` (batch-size in each GPU). 36 | 37 | .. image:: http://hangzh.com/blog/images/bn2.png 38 | :align: center 39 | 40 | - Since the `working batch-size` is typically large enough for standard vision tasks, such as classification and detection, there is no need to synchronize BN layer during the training. The synchronization will slow down the training. 41 | 42 | - However, for the Semantic Segmentation task, the state-of-the-art approaches typically adopt dilated convoluton, which is very memory consuming. The `working bath-size` can be too small for BN layers (2 or 4 in each GPU) when using larger/deeper pre-trained networks, such as :class:`encoding.dilated.ResNet` or :class:`encoding.dilated.DenseNet`. 43 | 44 | How to Synchronize? 45 | ------------------- 46 | 47 | Suppose we have :math:`K` number of GPUs, :math:`sum(x)_k` and :math:`sum(x^2)_k` denotes the sum of elements and sum of element squares in :math:`k^{th}` GPU. 48 | 49 | - Forward Pass: 50 | We can calculate the sum of elements :math:`sum(x)=\sum x_i \text{ and sum of squares } sum(x^2)=\sum x_i^2` in each GPU, then apply :class:`encoding.parallel.allreduce` operation to sum accross GPUs. Then calculate the global mean :math:`\mu=\frac{sum(x)}{N} \text{ and global variance } \sigma=\sqrt{\frac{sum(x^2)}{N}-\mu^2+\epsilon}`. 51 | 52 | - Backward Pass: 53 | * :math:`\frac{d_\ell}{d_{x_i}}=\frac{d_\ell}{d_{y_i}}\frac{\gamma}{\sigma}` can be calculated locally in each GPU. 54 | * Calculate the gradient of :math:`sum(x)` and :math:`sum(x^2)` individually in each GPU :math:`\frac{d_\ell}{d_{sum(x)_k}}` and :math:`\frac{d_\ell}{d_{sum(x^2)_k}}`. 55 | 56 | * Then sync the gradient (automatically handled by :class:`encoding.parallel.allreduce`) and continue the backward. 57 | 58 | .. image:: http://hangzh.com/blog/images/bn3.png 59 | :align: center 60 | 61 | Citation 62 | -------- 63 | 64 | .. note:: 65 | This code is provided together with the paper, please cite our work. 66 | 67 | * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation" *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*:: 68 | 69 | @InProceedings{Zhang_2018_CVPR, 70 | author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, 71 | title = {Context Encoding for Semantic Segmentation}, 72 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 73 | month = {June}, 74 | year = {2018} 75 | } 76 | -------------------------------------------------------------------------------- /docs/source/tutorials/texture.rst: -------------------------------------------------------------------------------- 1 | Deep TEN: Deep Texture Encoding Network Example 2 | =============================================== 3 | 4 | .. image:: ../_static/img/cvpr17.svg 5 | :width: 100% 6 | :align: left 7 | 8 | In this section, we show an example of training/testing Encoding-Net for texture recognition on MINC-2500 dataset. Comparing to original Torch implementation, we use *different learning rate* for pre-trained base network and encoding layer (10x), disable color jittering after reducing lr and adopt much *smaller training image size* (224 instead of 352). 9 | 10 | 11 | Test Pre-trained Model 12 | ---------------------- 13 | 14 | - Clone the GitHub repo:: 15 | 16 | git clone https://github.com/zhanghang1989/PyTorch-Encoding 17 | 18 | - Install PyTorch Encoding (if not yet). Please follow the installation guide `Installing PyTorch Encoding <../notes/compile.html>`_. 19 | 20 | - Download the `MINC-2500 `_ dataset using the providied script:: 21 | 22 | cd PyTorch-Encoding/ 23 | python scripts/prepare_minc.py 24 | 25 | - Test pre-trained model on MINC-2500. The pre-trained weight will be automatic downloaded (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`18.96\%` using single crop on test-1 set):: 26 | 27 | python verify.py --dataset minc --model deepten_resnet50_minc 28 | # Teriminal Output: 29 | # Top1: 81.043 | Top5: 95.617: 100%|███████████████████████████████████| 45/45 [00:18<00:00, 2.40it/s] 30 | # Top1 Acc: 81.043 | Top5 Acc: 95.617 31 | 32 | 33 | Train Your Own Model 34 | -------------------- 35 | 36 | - Example training command for training above model:: 37 | 38 | CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset minc --model deepten_resnet50_minc --batch-size 512 --lr 0.004 --epochs 80 --lr-step 60 --lr-scheduler step --weight-decay 5e-4 39 | 40 | - Detail training options:: 41 | 42 | -h, --help show this help message and exit 43 | --dataset DATASET training dataset (default: cifar10) 44 | --model MODEL network model type (default: densenet) 45 | --backbone BACKBONE backbone name (default: resnet50) 46 | --batch-size N batch size for training (default: 128) 47 | --test-batch-size N batch size for testing (default: 1000) 48 | --epochs N number of epochs to train (default: 300) 49 | --start_epoch N the epoch number to start (default: 0) 50 | --lr LR learning rate (default: 0.1) 51 | --momentum M SGD momentum (default: 0.9) 52 | --weight-decay M SGD weight decay (default: 1e-4) 53 | --no-cuda disables CUDA training 54 | --plot matplotlib 55 | --seed S random seed (default: 1) 56 | --resume RESUME put the path to resuming file if needed 57 | --checkname set the checkpoint name 58 | --eval evaluating 59 | 60 | 61 | Citation 62 | -------- 63 | 64 | .. note:: 65 | * Hang Zhang, Jia Xue, and Kristin Dana. "Deep TEN: Texture Encoding Network." *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2017*:: 66 | 67 | @InProceedings{Zhang_2017_CVPR, 68 | author = {Zhang, Hang and Xue, Jia and Dana, Kristin}, 69 | title = {Deep TEN: Texture Encoding Network}, 70 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 71 | month = {July}, 72 | year = {2017} 73 | } 74 | -------------------------------------------------------------------------------- /docs/source/utils.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | encoding.utils 5 | ============== 6 | 7 | Useful util functions. 8 | 9 | .. automodule:: encoding.utils 10 | .. currentmodule:: encoding.utils 11 | 12 | :hidden:`LR_Scheduler` 13 | ~~~~~~~~~~~~~~~~~~~~~~ 14 | 15 | .. autoclass:: LR_Scheduler 16 | :members: 17 | 18 | :hidden:`save_checkpoint` 19 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 20 | 21 | .. autofunction:: save_checkpoint 22 | 23 | :hidden:`SegmentationMetric` 24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | .. autoclass:: SegmentationMetric 27 | :members: 28 | 29 | :hidden:`batch_pix_accuracy` 30 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 31 | 32 | .. autofunction:: batch_pix_accuracy 33 | 34 | :hidden:`batch_intersection_union` 35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 36 | 37 | .. autofunction:: batch_intersection_union 38 | -------------------------------------------------------------------------------- /encoding/__init__.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """An optimized PyTorch package with CUDA backend.""" 12 | from .version import __version__ 13 | from . import nn, functions, parallel, utils, models, datasets, transforms 14 | -------------------------------------------------------------------------------- /encoding/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from torchvision.datasets import * 3 | from .base import * 4 | from .coco import COCOSegmentation 5 | from .ade20k import ADE20KSegmentation 6 | from .pascal_voc import VOCSegmentation 7 | from .pascal_aug import VOCAugSegmentation 8 | from .pcontext import ContextSegmentation 9 | from .cityscapes import CitySegmentation 10 | from .imagenet import ImageNetDataset 11 | from .minc import MINCDataset 12 | 13 | from ..utils import EncodingDeprecationWarning 14 | 15 | datasets = { 16 | 'coco': COCOSegmentation, 17 | 'ade20k': ADE20KSegmentation, 18 | 'pascal_voc': VOCSegmentation, 19 | 'pascal_aug': VOCAugSegmentation, 20 | 'pcontext': ContextSegmentation, 21 | 'citys': CitySegmentation, 22 | 'imagenet': ImageNetDataset, 23 | 'minc': MINCDataset, 24 | 'cifar10': CIFAR10, 25 | } 26 | 27 | acronyms = { 28 | 'coco': 'coco', 29 | 'pascal_voc': 'voc', 30 | 'pascal_aug': 'voc', 31 | 'pcontext': 'pcontext', 32 | 'ade20k': 'ade', 33 | 'citys': 'citys', 34 | 'minc': 'minc', 35 | 'cifar10': 'cifar10', 36 | } 37 | 38 | def get_dataset(name, **kwargs): 39 | return datasets[name.lower()](**kwargs) 40 | 41 | def _make_deprecate(meth, old_name): 42 | new_name = meth.__name__ 43 | 44 | def deprecated_init(*args, **kwargs): 45 | warnings.warn("encoding.dataset.{} is now deprecated in favor of encoding.dataset.{}." 46 | .format(old_name, new_name), EncodingDeprecationWarning) 47 | return meth(*args, **kwargs) 48 | 49 | deprecated_init.__doc__ = r""" 50 | {old_name}(...) 51 | .. warning:: 52 | This method is now deprecated in favor of :func:`torch.nn.init.{new_name}`. 53 | See :func:`~torch.nn.init.{new_name}` for details.""".format( 54 | old_name=old_name, new_name=new_name) 55 | deprecated_init.__name__ = old_name 56 | return deprecated_init 57 | 58 | get_segmentation_dataset = _make_deprecate(get_dataset, 'get_segmentation_dataset') 59 | -------------------------------------------------------------------------------- /encoding/datasets/base.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import random 8 | import numpy as np 9 | from PIL import Image, ImageOps, ImageFilter 10 | import torch 11 | import torch.utils.data as data 12 | 13 | __all__ = ['BaseDataset', 'test_batchify_fn'] 14 | 15 | class BaseDataset(data.Dataset): 16 | def __init__(self, root, split, mode=None, transform=None, 17 | target_transform=None, base_size=520, crop_size=480): 18 | self.root = root 19 | self.transform = transform 20 | self.target_transform = target_transform 21 | self.split = split 22 | self.mode = mode if mode is not None else split 23 | self.base_size = base_size 24 | self.crop_size = crop_size 25 | if self.mode == 'train': 26 | print('BaseDataset: base_size {}, crop_size {}'. \ 27 | format(base_size, crop_size)) 28 | 29 | def __getitem__(self, index): 30 | raise NotImplemented 31 | 32 | @property 33 | def num_class(self): 34 | return self.NUM_CLASS 35 | 36 | @property 37 | def pred_offset(self): 38 | raise NotImplemented 39 | 40 | def make_pred(self, x): 41 | return x + self.pred_offset 42 | 43 | def _val_sync_transform(self, img, mask): 44 | outsize = self.crop_size 45 | short_size = outsize 46 | w, h = img.size 47 | if w > h: 48 | oh = short_size 49 | ow = int(1.0 * w * oh / h) 50 | else: 51 | ow = short_size 52 | oh = int(1.0 * h * ow / w) 53 | img = img.resize((ow, oh), Image.BILINEAR) 54 | mask = mask.resize((ow, oh), Image.NEAREST) 55 | # center crop 56 | w, h = img.size 57 | x1 = int(round((w - outsize) / 2.)) 58 | y1 = int(round((h - outsize) / 2.)) 59 | img = img.crop((x1, y1, x1+outsize, y1+outsize)) 60 | mask = mask.crop((x1, y1, x1+outsize, y1+outsize)) 61 | # final transform 62 | return img, self._mask_transform(mask) 63 | 64 | def _sync_transform(self, img, mask): 65 | # random mirror 66 | if random.random() < 0.5: 67 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 68 | mask = mask.transpose(Image.FLIP_LEFT_RIGHT) 69 | crop_size = self.crop_size 70 | # random scale (short edge) 71 | w, h = img.size 72 | long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0)) 73 | if h > w: 74 | oh = long_size 75 | ow = int(1.0 * w * long_size / h + 0.5) 76 | short_size = ow 77 | else: 78 | ow = long_size 79 | oh = int(1.0 * h * long_size / w + 0.5) 80 | short_size = oh 81 | img = img.resize((ow, oh), Image.BILINEAR) 82 | mask = mask.resize((ow, oh), Image.NEAREST) 83 | # pad crop 84 | if short_size < crop_size: 85 | padh = crop_size - oh if oh < crop_size else 0 86 | padw = crop_size - ow if ow < crop_size else 0 87 | img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) 88 | mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0) 89 | # random crop crop_size 90 | w, h = img.size 91 | x1 = random.randint(0, w - crop_size) 92 | y1 = random.randint(0, h - crop_size) 93 | img = img.crop((x1, y1, x1+crop_size, y1+crop_size)) 94 | mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size)) 95 | # final transform 96 | return img, self._mask_transform(mask) 97 | 98 | def _mask_transform(self, mask): 99 | return torch.from_numpy(np.array(mask)).long() 100 | 101 | 102 | def test_batchify_fn(data): 103 | error_msg = "batch must contain tensors, tuples or lists; found {}" 104 | if isinstance(data[0], (str, torch.Tensor)): 105 | return list(data) 106 | elif isinstance(data[0], (tuple, list)): 107 | data = zip(*data) 108 | return [test_batchify_fn(i) for i in data] 109 | raise TypeError((error_msg.format(type(batch[0])))) 110 | -------------------------------------------------------------------------------- /encoding/datasets/coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tqdm import trange 3 | from PIL import Image, ImageOps, ImageFilter 4 | import numpy as np 5 | import torch 6 | 7 | from .base import BaseDataset 8 | 9 | class COCOSegmentation(BaseDataset): 10 | NUM_CLASS = 21 11 | CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 12 | 1, 64, 20, 63, 7, 72] 13 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 14 | mode=None, transform=None, target_transform=None, **kwargs): 15 | super(COCOSegmentation, self).__init__( 16 | root, split, mode, transform, target_transform, **kwargs) 17 | from pycocotools.coco import COCO 18 | from pycocotools import mask 19 | if split == 'train': 20 | print('train set') 21 | ann_file = os.path.join(root, 'annotations/instances_train2017.json') 22 | ids_file = os.path.join(root, 'annotations/train_ids.pth') 23 | self.root = os.path.join(root, 'train2017') 24 | else: 25 | print('val set') 26 | assert split == 'val' 27 | ann_file = os.path.join(root, 'annotations/instances_val2017.json') 28 | ids_file = os.path.join(root, 'annotations/val_ids.pth') 29 | self.root = os.path.join(root, 'val2017') 30 | self.coco = COCO(ann_file) 31 | self.coco_mask = mask 32 | if os.path.exists(ids_file): 33 | self.ids = torch.load(ids_file) 34 | else: 35 | ids = list(self.coco.imgs.keys()) 36 | self.ids = self._preprocess(ids, ids_file) 37 | self.transform = transform 38 | self.target_transform = target_transform 39 | 40 | def __getitem__(self, index): 41 | coco = self.coco 42 | img_id = self.ids[index] 43 | img_metadata = coco.loadImgs(img_id)[0] 44 | path = img_metadata['file_name'] 45 | img = Image.open(os.path.join(self.root, path)).convert('RGB') 46 | cocotarget = coco.loadAnns(coco.getAnnIds(imgIds=img_id)) 47 | mask = Image.fromarray(self._gen_seg_mask( 48 | cocotarget, img_metadata['height'], img_metadata['width'])) 49 | # synchrosized transform 50 | if self.mode == 'train': 51 | img, mask = self._sync_transform(img, mask) 52 | elif self.mode == 'val': 53 | img, mask = self._val_sync_transform(img, mask) 54 | else: 55 | assert self.mode == 'testval' 56 | mask = self._mask_transform(mask) 57 | # general resize, normalize and toTensor 58 | if self.transform is not None: 59 | img = self.transform(img) 60 | if self.target_transform is not None: 61 | mask = self.target_transform(mask) 62 | return img, mask 63 | 64 | def __len__(self): 65 | return len(self.ids) 66 | 67 | def _gen_seg_mask(self, target, h, w): 68 | mask = np.zeros((h, w), dtype=np.uint8) 69 | coco_mask = self.coco_mask 70 | for instance in target: 71 | rle = coco_mask.frPyObjects(instance['segmentation'], h, w) 72 | m = coco_mask.decode(rle) 73 | cat = instance['category_id'] 74 | if cat in self.CAT_LIST: 75 | c = self.CAT_LIST.index(cat) 76 | else: 77 | continue 78 | if len(m.shape) < 3: 79 | mask[:, :] += (mask == 0) * (m * c) 80 | else: 81 | mask[:, :] += (mask == 0) * (((np.sum(m, axis=2)) > 0) * c).astype(np.uint8) 82 | return mask 83 | 84 | def _preprocess(self, ids, ids_file): 85 | print("Preprocessing mask, this will take a while." + \ 86 | "But don't worry, it only run once for each split.") 87 | tbar = trange(len(ids)) 88 | new_ids = [] 89 | for i in tbar: 90 | img_id = ids[i] 91 | cocotarget = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id)) 92 | img_metadata = self.coco.loadImgs(img_id)[0] 93 | mask = self._gen_seg_mask(cocotarget, img_metadata['height'], 94 | img_metadata['width']) 95 | # more than 1k pixels 96 | if (mask > 0).sum() > 1000: 97 | new_ids.append(img_id) 98 | tbar.set_description('Doing: {}/{}, got {} qualified images'.\ 99 | format(i, len(ids), len(new_ids))) 100 | print('Found number of qualified images: ', len(new_ids)) 101 | torch.save(new_ids, ids_file) 102 | return new_ids 103 | 104 | """ 105 | NUM_CHANNEL = 91 106 | [] background 107 | [5] airplane 108 | [2] bicycle 109 | [16] bird 110 | [9] boat 111 | [44] bottle 112 | [6] bus 113 | [3] car 114 | [17] cat 115 | [62] chair 116 | [21] cow 117 | [67] dining table 118 | [18] dog 119 | [19] horse 120 | [4] motorcycle 121 | [1] person 122 | [64] potted plant 123 | [20] sheep 124 | [63] couch 125 | [7] train 126 | [72] tv 127 | """ 128 | -------------------------------------------------------------------------------- /encoding/datasets/folder.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | import os 8 | import sys 9 | import numpy as np 10 | import random 11 | import math 12 | 13 | import torch.utils.data as data 14 | from PIL import Image, ImageOps 15 | 16 | import torch.utils.data as data 17 | import torchvision.transforms as transform 18 | from .dataset import ToLabel 19 | 20 | class FolderLoader(data.Dataset): 21 | def __init__(self, root, transform=None): 22 | self.root = root 23 | self.transform = transform 24 | self.images = get_folder_images(root) 25 | if len(self.images) == 0: 26 | raise(RuntimeError("Found 0 images in subfolders of: \ 27 | " + self.root + "\n")) 28 | 29 | def __getitem__(self, index): 30 | img = Image.open(self.images[index]).convert('RGB') 31 | if self.transform is not None: 32 | img = self.transform(img) 33 | return img, os.path.basename(self.images[index]) 34 | 35 | def __len__(self): 36 | return len(self.images) 37 | 38 | 39 | def get_folder_images(img_folder): 40 | img_paths = [] 41 | for filename in os.listdir(img_folder): 42 | if filename.endswith(".jpg"): 43 | imgpath = os.path.join(img_folder, filename) 44 | img_paths.append(imgpath) 45 | return img_paths 46 | 47 | 48 | 49 | class Dataloder(): 50 | def __init__(self, args): 51 | # the data augmentation is implemented as part of the dataloader 52 | assert(args.test) 53 | input_transform = transform.Compose([ 54 | transform.ToTensor(), 55 | transform.Normalize(args.mean, args.std)]) 56 | args.test_batch_size = 1 57 | 58 | assert(args.test_folder is not None) 59 | print('loading the data from: {}'.format(args.test_folder)) 60 | 61 | testset = FolderLoader(args.test_folder, input_transform) 62 | kwargs = {'num_workers': args.workers, 'pin_memory': True} \ 63 | if args.cuda else {} 64 | self.trainloader = None 65 | self.testloader = data.DataLoader(testset, 66 | batch_size=args.test_batch_size, 67 | shuffle=False, **kwargs) 68 | 69 | def getloader(self): 70 | return self.trainloader, self.testloader 71 | -------------------------------------------------------------------------------- /encoding/datasets/hpw18.py: -------------------------------------------------------------------------------- 1 | # created by: Sean Liu 2 | # Amazon Lab 126 3 | from __future__ import print_function 4 | 5 | import errno 6 | import hashlib 7 | import os 8 | import sys 9 | import tarfile 10 | import numpy as np 11 | import random 12 | import math 13 | 14 | import torch.utils.data as data 15 | import PIL 16 | from PIL import Image, ImageOps 17 | 18 | from six.moves import urllib 19 | 20 | 21 | class Segmentation_HPW18(data.Dataset): 22 | CLASSES = [ 23 | 'background', 'hat', 'hair', 'sunglasses', 'upper-clothes', 24 | 'skirt', 'pants', 'dress', 'belt', 'left-shoe', 'right-shoe', 25 | 'face', 'left-leg', 'right-leg', 'left-arm', 'right-arm', 'bag', 26 | 'scarf' 27 | ] 28 | 29 | URL = "/cvdata1/lliuqian/humanParsingDataset" 30 | FILE = "hpw18.tar.gz" 31 | MD5 = '' 32 | BASE_DIR = '' 33 | 34 | def __init__(self, 35 | root, 36 | train=True, 37 | transform=None, 38 | target_transform=None, 39 | download=False): 40 | self.root = root 41 | _hpw18_root = os.path.join(self.root, self.BASE_DIR) 42 | _mask_dir = os.path.join(_hpw18_root, 'SegmentationClassAug_256x384') 43 | _image_dir = os.path.join(_hpw18_root, 'JPEGImages_256x384') 44 | self.transform = transform 45 | self.target_transform = target_transform 46 | self.train = train 47 | 48 | if download: 49 | self._download() 50 | 51 | # train/val/test splits are pre-cut 52 | _splits_dir = _hpw18_root 53 | _split_f = os.path.join(_splits_dir, 'humanparsingImageMask_256x384_absPath_train.txt') 54 | if not self.train: 55 | _split_f = os.path.join(_splits_dir, 'humanparsingImageMask_256x384_absPath_val.txt') 56 | 57 | print("reading from ", _split_f) 58 | 59 | self.images = [] 60 | self.masks = [] 61 | with open(os.path.join(_split_f), "r") as lines: 62 | for line in lines: 63 | s = line.split() 64 | _image = s[0] # image absolution path 65 | _mask = s[1] # mask absolution path 66 | assert os.path.isfile(_image) 67 | assert os.path.isfile(_mask) 68 | self.images.append(_image) 69 | self.masks.append(_mask) 70 | assert (len(self.images) == len(self.masks)) 71 | 72 | def __getitem__(self, index): 73 | _img = Image.open(self.images[index]).convert('RGB') 74 | _timg = Image.open(self.masks[index]) 75 | _target = np.array(_timg, dtype=np.uint8) 76 | _target = Image.fromarray(_target) 77 | 78 | # synchrosized transform 79 | if self.train: 80 | _img, _target = self._sync_transform( _img, _target) 81 | 82 | # general resize, normalize and toTensor 83 | if self.transform is not None: 84 | _img = self.transform(_img) 85 | if self.target_transform is not None: 86 | _target = self.target_transform(_target) 87 | 88 | return _img, _target 89 | 90 | def __len__(self): 91 | return len(self.images) 92 | 93 | def _sync_transform(self, img, mask): 94 | # random rotate -10~10 95 | deg = random.uniform(-10,10) 96 | img = img.rotate(deg) 97 | mask = mask.rotate(deg, PIL.Image.NEAREST) 98 | 99 | return img, mask 100 | 101 | if __name__ == '__main__': 102 | hpw18 = Segmentation_HPW18('/cvdata1/lliuqian/', train=True) 103 | print(hpw18[0]) 104 | print (len(hpw18)) 105 | -------------------------------------------------------------------------------- /encoding/datasets/imagenet.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2018 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | import os 11 | import torchvision.transforms as transforms 12 | import torchvision.datasets as datasets 13 | 14 | import warnings 15 | warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning) 16 | 17 | class ImageNetDataset(datasets.ImageFolder): 18 | BASE_DIR = "ILSVRC2012" 19 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), transform=None, 20 | target_transform=None, train=True, **kwargs): 21 | split='train' if train == True else 'val' 22 | root = os.path.join(root, self.BASE_DIR, split) 23 | super(ImageNetDataset, self).__init__( 24 | root, transform, target_transform) 25 | -------------------------------------------------------------------------------- /encoding/datasets/minc.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import os 12 | from PIL import Image 13 | 14 | import torch 15 | import torch.utils.data as data 16 | 17 | class MINCDataset(data.Dataset): 18 | NUM_CLASS = 23 19 | def __init__(self, root=os.path.expanduser('~/.encoding/data/'), 20 | train=True, transform=None, download=None): 21 | split='train' if train == True else 'val' 22 | root = os.path.join(root, 'minc-2500') 23 | self.transform = transform 24 | classes, class_to_idx = find_classes(root + '/images') 25 | if split=='train': 26 | filename = os.path.join(root, 'labels/train1.txt') 27 | else: 28 | filename = os.path.join(root, 'labels/test1.txt') 29 | 30 | self.images, self.labels = make_dataset(filename, root, 31 | class_to_idx) 32 | assert (len(self.images) == len(self.labels)) 33 | 34 | def __getitem__(self, index): 35 | _img = Image.open(self.images[index]).convert('RGB') 36 | _label = self.labels[index] 37 | if self.transform is not None: 38 | _img = self.transform(_img) 39 | 40 | return _img, _label 41 | 42 | def __len__(self): 43 | return len(self.images) 44 | 45 | def find_classes(dir): 46 | classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] 47 | classes.sort() 48 | class_to_idx = {classes[i]: i for i in range(len(classes))} 49 | return classes, class_to_idx 50 | 51 | 52 | def make_dataset(filename, datadir, class_to_idx): 53 | images = [] 54 | labels = [] 55 | with open(os.path.join(filename), "r") as lines: 56 | for line in lines: 57 | _image = os.path.join(datadir, line.rstrip('\n')) 58 | _dirname = os.path.split(os.path.dirname(_image))[1] 59 | assert os.path.isfile(_image) 60 | label = class_to_idx[_dirname] 61 | images.append(_image) 62 | labels.append(label) 63 | 64 | return images, labels 65 | 66 | -------------------------------------------------------------------------------- /encoding/datasets/pascal_aug.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import scipy.io 4 | import numpy as np 5 | from PIL import Image, ImageOps, ImageFilter 6 | 7 | from .base import BaseDataset 8 | 9 | class VOCAugSegmentation(BaseDataset): 10 | voc = [ 11 | 'background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', 12 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 13 | 'motorcycle', 'person', 'potted-plant', 'sheep', 'sofa', 'train', 14 | 'tv' 15 | ] 16 | NUM_CLASS = 21 17 | TRAIN_BASE_DIR = 'VOCaug/dataset/' 18 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 19 | mode=None, transform=None, target_transform=None, **kwargs): 20 | super(VOCAugSegmentation, self).__init__(root, split, mode, transform, 21 | target_transform, **kwargs) 22 | # train/val/test splits are pre-cut 23 | _voc_root = os.path.join(root, self.TRAIN_BASE_DIR) 24 | _mask_dir = os.path.join(_voc_root, 'cls') 25 | _image_dir = os.path.join(_voc_root, 'img') 26 | if self.mode == 'train': 27 | _split_f = os.path.join(_voc_root, 'trainval.txt') 28 | elif self.mode == 'val': 29 | _split_f = os.path.join(_voc_root, 'val.txt') 30 | else: 31 | raise RuntimeError('Unknown dataset split.') 32 | self.images = [] 33 | self.masks = [] 34 | with open(os.path.join(_split_f), "r") as lines: 35 | for line in lines: 36 | _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg") 37 | assert os.path.isfile(_image) 38 | self.images.append(_image) 39 | if self.mode != 'test': 40 | _mask = os.path.join(_mask_dir, line.rstrip('\n')+".mat") 41 | assert os.path.isfile(_mask) 42 | self.masks.append(_mask) 43 | 44 | assert (len(self.images) == len(self.masks)) 45 | 46 | def __getitem__(self, index): 47 | _img = Image.open(self.images[index]).convert('RGB') 48 | if self.mode == 'test': 49 | if self.transform is not None: 50 | _img = self.transform(_img) 51 | return _img, os.path.basename(self.images[index]) 52 | _target = self._load_mat(self.masks[index]) 53 | # synchrosized transform 54 | if self.mode == 'train': 55 | _img, _target = self._sync_transform( _img, _target) 56 | elif self.mode == 'val': 57 | _img, _target = self._val_sync_transform( _img, _target) 58 | # general resize, normalize and toTensor 59 | if self.transform is not None: 60 | _img = self.transform(_img) 61 | if self.target_transform is not None: 62 | _target = self.target_transform(_target) 63 | return _img, _target 64 | 65 | def _load_mat(self, filename): 66 | mat = scipy.io.loadmat(filename, mat_dtype=True, squeeze_me=True, 67 | struct_as_record=False) 68 | mask = mat['GTcls'].Segmentation 69 | return Image.fromarray(mask) 70 | 71 | def __len__(self): 72 | return len(self.images) 73 | -------------------------------------------------------------------------------- /encoding/datasets/pascal_voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | from PIL import Image, ImageOps, ImageFilter 5 | from tqdm import tqdm 6 | 7 | import torch 8 | from .base import BaseDataset 9 | 10 | class VOCSegmentation(BaseDataset): 11 | CLASSES = [ 12 | 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 13 | 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 14 | 'motorbike', 'person', 'potted-plant', 'sheep', 'sofa', 'train', 15 | 'tv/monitor', 'ambigious' 16 | ] 17 | NUM_CLASS = 21 18 | BASE_DIR = 'VOCdevkit/VOC2012' 19 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 20 | mode=None, transform=None, target_transform=None, **kwargs): 21 | super(VOCSegmentation, self).__init__(root, split, mode, transform, 22 | target_transform, **kwargs) 23 | _voc_root = os.path.join(self.root, self.BASE_DIR) 24 | _mask_dir = os.path.join(_voc_root, 'SegmentationClass') 25 | _image_dir = os.path.join(_voc_root, 'JPEGImages') 26 | # train/val/test splits are pre-cut 27 | _splits_dir = os.path.join(_voc_root, 'ImageSets/Segmentation') 28 | if self.mode == 'train': 29 | _split_f = os.path.join(_splits_dir, 'trainval.txt') 30 | elif self.mode == 'val': 31 | _split_f = os.path.join(_splits_dir, 'val.txt') 32 | elif self.mode == 'test': 33 | _split_f = os.path.join(_splits_dir, 'test.txt') 34 | else: 35 | raise RuntimeError('Unknown dataset split.') 36 | self.images = [] 37 | self.masks = [] 38 | with open(os.path.join(_split_f), "r") as lines: 39 | for line in tqdm(lines): 40 | _image = os.path.join(_image_dir, line.rstrip('\n')+".jpg") 41 | assert os.path.isfile(_image) 42 | self.images.append(_image) 43 | if self.mode != 'test': 44 | _mask = os.path.join(_mask_dir, line.rstrip('\n')+".png") 45 | assert os.path.isfile(_mask) 46 | self.masks.append(_mask) 47 | 48 | if self.mode != 'test': 49 | assert (len(self.images) == len(self.masks)) 50 | 51 | def __getitem__(self, index): 52 | img = Image.open(self.images[index]).convert('RGB') 53 | if self.mode == 'test': 54 | if self.transform is not None: 55 | img = self.transform(img) 56 | return img, os.path.basename(self.images[index]) 57 | target = Image.open(self.masks[index]) 58 | # synchrosized transform 59 | if self.mode == 'train': 60 | img, target = self._sync_transform( img, target) 61 | elif self.mode == 'val': 62 | img, target = self._val_sync_transform( img, target) 63 | else: 64 | assert self.mode == 'testval' 65 | mask = self._mask_transform(mask) 66 | # general resize, normalize and toTensor 67 | if self.transform is not None: 68 | img = self.transform(img) 69 | if self.target_transform is not None: 70 | target = self.target_transform(target) 71 | return img, target 72 | 73 | def _mask_transform(self, mask): 74 | target = np.array(mask).astype('int32') 75 | target[target == 255] = -1 76 | return torch.from_numpy(target).long() 77 | 78 | def __len__(self): 79 | return len(self.images) 80 | 81 | @property 82 | def pred_offset(self): 83 | return 0 84 | -------------------------------------------------------------------------------- /encoding/datasets/pcontext.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | 7 | from PIL import Image, ImageOps, ImageFilter 8 | import os 9 | import math 10 | import random 11 | import numpy as np 12 | from tqdm import trange 13 | 14 | import torch 15 | from .base import BaseDataset 16 | 17 | class ContextSegmentation(BaseDataset): 18 | BASE_DIR = 'VOCdevkit/VOC2010' 19 | NUM_CLASS = 59 20 | def __init__(self, root=os.path.expanduser('~/.encoding/data'), split='train', 21 | mode=None, transform=None, target_transform=None, **kwargs): 22 | super(ContextSegmentation, self).__init__( 23 | root, split, mode, transform, target_transform, **kwargs) 24 | from detail import Detail 25 | #from detail import mask 26 | root = os.path.join(root, self.BASE_DIR) 27 | annFile = os.path.join(root, 'trainval_merged.json') 28 | imgDir = os.path.join(root, 'JPEGImages') 29 | # training mode 30 | self.detail = Detail(annFile, imgDir, split) 31 | self.transform = transform 32 | self.target_transform = target_transform 33 | self.ids = self.detail.getImgs() 34 | # generate masks 35 | self._mapping = np.sort(np.array([ 36 | 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 37 | 23, 397, 25, 284, 158, 159, 416, 33, 162, 420, 454, 295, 296, 38 | 427, 44, 45, 46, 308, 59, 440, 445, 31, 232, 65, 354, 424, 39 | 68, 326, 72, 458, 34, 207, 80, 355, 85, 347, 220, 349, 360, 40 | 98, 187, 104, 105, 366, 189, 368, 113, 115])) 41 | self._key = np.array(range(len(self._mapping))).astype('uint8') 42 | mask_file = os.path.join(root, self.split+'.pth') 43 | print('mask_file:', mask_file) 44 | if os.path.exists(mask_file): 45 | self.masks = torch.load(mask_file) 46 | else: 47 | self.masks = self._preprocess(mask_file) 48 | 49 | def _class_to_index(self, mask): 50 | # assert the values 51 | values = np.unique(mask) 52 | for i in range(len(values)): 53 | assert(values[i] in self._mapping) 54 | index = np.digitize(mask.ravel(), self._mapping, right=True) 55 | return self._key[index].reshape(mask.shape) 56 | 57 | def _preprocess(self, mask_file): 58 | masks = {} 59 | tbar = trange(len(self.ids)) 60 | print("Preprocessing mask, this will take a while." + \ 61 | "But don't worry, it only run once for each split.") 62 | for i in tbar: 63 | img_id = self.ids[i] 64 | mask = Image.fromarray(self._class_to_index( 65 | self.detail.getMask(img_id))) 66 | masks[img_id['image_id']] = mask 67 | tbar.set_description("Preprocessing masks {}".format(img_id['image_id'])) 68 | torch.save(masks, mask_file) 69 | return masks 70 | 71 | def __getitem__(self, index): 72 | img_id = self.ids[index] 73 | path = img_id['file_name'] 74 | iid = img_id['image_id'] 75 | img = Image.open(os.path.join(self.detail.img_folder, path)).convert('RGB') 76 | if self.mode == 'test': 77 | if self.transform is not None: 78 | img = self.transform(img) 79 | return img, os.path.basename(path) 80 | # convert mask to 60 categories 81 | mask = self.masks[iid] 82 | # synchrosized transform 83 | if self.mode == 'train': 84 | img, mask = self._sync_transform(img, mask) 85 | elif self.mode == 'val': 86 | img, mask = self._val_sync_transform(img, mask) 87 | else: 88 | assert self.mode == 'testval' 89 | mask = self._mask_transform(mask) 90 | # general resize, normalize and toTensor 91 | if self.transform is not None: 92 | img = self.transform(img) 93 | if self.target_transform is not None: 94 | mask = self.target_transform(mask) 95 | return img, mask 96 | 97 | def _mask_transform(self, mask): 98 | target = np.array(mask).astype('int32') - 1 99 | return torch.from_numpy(target).long() 100 | 101 | def __len__(self): 102 | return len(self.ids) 103 | 104 | @property 105 | def pred_offset(self): 106 | return 1 107 | -------------------------------------------------------------------------------- /encoding/functions/__init__.py: -------------------------------------------------------------------------------- 1 | """Encoding Autograd Fuctions""" 2 | from .encoding import * 3 | from .syncbn import * 4 | from .dist_syncbn import dist_syncbatchnorm 5 | from .customize import * 6 | from .rectify import * 7 | -------------------------------------------------------------------------------- /encoding/functions/customize.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2018 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | """Customized functions""" 11 | 12 | import torch 13 | from torch.autograd import Variable, Function 14 | 15 | from encoding import cpu 16 | if torch.cuda.device_count() > 0: 17 | from encoding import gpu 18 | 19 | __all__ = ['NonMaxSuppression'] 20 | 21 | def NonMaxSuppression(boxes, scores, threshold): 22 | r"""Non-Maximum Suppression 23 | The algorithm begins by storing the highest-scoring bounding 24 | box, and eliminating any box whose intersection-over-union (IoU) 25 | with it is too great. The procedure repeats on the surviving 26 | boxes, and so on until there are no boxes left. 27 | The stored boxes are returned. 28 | 29 | NB: The function returns a tuple (mask, indices), where 30 | indices index into the input boxes and are sorted 31 | according to score, from higest to lowest. 32 | indices[i][mask[i]] gives the indices of the surviving 33 | boxes from the ith batch, sorted by score. 34 | 35 | Args: 36 | - boxes :math:`(N, n_boxes, 4)` 37 | - scroes :math:`(N, n_boxes)` 38 | - threshold (float): IoU above which to eliminate boxes 39 | 40 | Outputs: 41 | - mask: :math:`(N, n_boxes)` 42 | - indicies: :math:`(N, n_boxes)` 43 | 44 | Examples:: 45 | 46 | >>> boxes = torch.Tensor([[[10., 20., 20., 15.], 47 | >>> [24., 22., 50., 54.], 48 | >>> [10., 21., 20. 14.5]]]) 49 | >>> scores = torch.abs(torch.randn([1, 3])) 50 | >>> mask, indices = NonMaxSuppression(boxes, scores, 0.7) 51 | >>> #indices are SORTED according to score. 52 | >>> surviving_box_indices = indices[mask] 53 | """ 54 | if boxes.is_cuda: 55 | return gpu.non_max_suppression(boxes, scores, threshold) 56 | else: 57 | return cpu.non_max_suppression(boxes, scores, threshold) 58 | -------------------------------------------------------------------------------- /encoding/functions/dist_syncbn.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | 9 | import torch 10 | from torch.autograd.function import Function 11 | 12 | from encoding import cpu 13 | if torch.cuda.device_count() > 0: 14 | from encoding import gpu 15 | 16 | __all__ = ['dist_syncbatchnorm'] 17 | 18 | class dist_syncbatchnorm_(Function): 19 | @staticmethod 20 | def forward(ctx, x, gamma, beta, running_mean, running_var, eps, momentum, training, process_group): 21 | x = x.contiguous() 22 | ctx.training = training 23 | ctx.momentum = momentum 24 | ctx.eps = eps 25 | ctx.process_group = process_group 26 | 27 | if not ctx.training: 28 | _ex, _var = running_mean.contiguous(), running_var.contiguous() 29 | _exs = _var + _ex ** 2 30 | if x.is_cuda: 31 | y = gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) 32 | else: 33 | y = cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) 34 | ctx.save_for_backward(x, _ex, _exs, gamma, beta) 35 | return y 36 | 37 | size = x.numel() // x.size(1) 38 | if size == 1: 39 | raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size)) 40 | 41 | if x.is_cuda: 42 | _ex, _exs = gpu.expectation_forward(x) 43 | else: 44 | raise NotImplemented 45 | 46 | count = torch.Tensor([1]).to(x.device) 47 | count_all_reduce = torch.distributed.all_reduce(count, group=process_group, async_op=True) 48 | _ex_all_reduce = torch.distributed.all_reduce(_ex, group=process_group, async_op=True) 49 | _exs_all_reduce = torch.distributed.all_reduce(_exs, group=process_group, async_op=True) 50 | 51 | count_all_reduce.wait() 52 | _ex_all_reduce.wait() 53 | _exs_all_reduce.wait() 54 | 55 | _ex = _ex / count 56 | _exs = _exs / count 57 | 58 | # Update running stats 59 | _var = _exs - _ex ** 2 60 | running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * _ex) 61 | running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * _var) 62 | 63 | # Mark in-place modified tensors 64 | ctx.mark_dirty(running_mean, running_var) 65 | 66 | # BN forward + activation 67 | if x.is_cuda: 68 | y = gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) 69 | else: 70 | y = cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps) 71 | 72 | ctx.save_for_backward(x, _ex, _exs, gamma, beta) 73 | return y 74 | 75 | @staticmethod 76 | def backward(ctx, dz): 77 | x, _ex, _exs, gamma, beta = ctx.saved_tensors 78 | dz = dz.contiguous() 79 | 80 | # BN backward 81 | if dz.is_cuda: 82 | dx, _dex, _dexs, dgamma, dbeta = \ 83 | gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps) 84 | else: 85 | raise NotImplemented 86 | 87 | if ctx.training: 88 | process_group = ctx.process_group 89 | count = torch.Tensor([1]).to(x.device) 90 | count_all_reduce = torch.distributed.all_reduce(count, group=process_group, async_op=True) 91 | _dex_all_reduce = torch.distributed.all_reduce(_dex, group=process_group, async_op=True) 92 | _dexs_all_reduce = torch.distributed.all_reduce(_dexs, group=process_group, async_op=True) 93 | 94 | count_all_reduce.wait() 95 | _dex_all_reduce.wait() 96 | _dexs_all_reduce.wait() 97 | 98 | _dex = _dex / count 99 | _dexs = _dexs / count 100 | 101 | if x.is_cuda: 102 | dx_ = gpu.expectation_backward(x, _dex, _dexs) 103 | else: 104 | raise NotImplemented 105 | dx = dx + dx_ 106 | 107 | return dx, dgamma, dbeta, None, None, None, None, None, None 108 | 109 | dist_syncbatchnorm = dist_syncbatchnorm_.apply 110 | -------------------------------------------------------------------------------- /encoding/functions/encoding.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2018 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | """Functions for Encoding Layer""" 11 | import torch 12 | from torch.autograd import Function, Variable 13 | import torch.nn.functional as F 14 | 15 | from encoding import cpu 16 | if torch.cuda.device_count() > 0: 17 | from encoding import gpu 18 | 19 | __all__ = ['aggregate', 'scaled_l2', 'pairwise_cosine'] 20 | 21 | class _aggregate(Function): 22 | @staticmethod 23 | def forward(ctx, A, X, C): 24 | # A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD) 25 | ctx.save_for_backward(A, X, C) 26 | if A.is_cuda: 27 | E = gpu.aggregate_forward(A, X, C) 28 | else: 29 | E = cpu.aggregate_forward(A, X, C) 30 | return E 31 | 32 | @staticmethod 33 | def backward(ctx, gradE): 34 | A, X, C = ctx.saved_variables 35 | if A.is_cuda: 36 | gradA, gradX, gradC = gpu.aggregate_backward(gradE, A, X, C) 37 | else: 38 | gradA, gradX, gradC = cpu.aggregate_backward(gradE, A, X, C) 39 | return gradA, gradX, gradC 40 | 41 | def aggregate(A, X, C): 42 | r""" Aggregate operation, aggregate the residuals of inputs (:math:`X`) with repect 43 | to the codewords (:math:`C`) with assignment weights (:math:`A`). 44 | 45 | .. math:: 46 | 47 | e_{k} = \sum_{i=1}^{N} a_{ik} (x_i - d_k) 48 | 49 | Shape: 50 | - Input: :math:`A\in\mathcal{R}^{B\times N\times K}` 51 | :math:`X\in\mathcal{R}^{B\times N\times D}` :math:`C\in\mathcal{R}^{K\times D}` 52 | (where :math:`B` is batch, :math:`N` is total number of features, 53 | :math:`K` is number is codewords, :math:`D` is feature dimensions.) 54 | - Output: :math:`E\in\mathcal{R}^{B\times K\times D}` 55 | 56 | Examples: 57 | >>> B,N,K,D = 2,3,4,5 58 | >>> A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), requires_grad=True) 59 | >>> X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), requires_grad=True) 60 | >>> C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), requires_grad=True) 61 | >>> func = encoding.aggregate() 62 | >>> E = func(A, X, C) 63 | """ 64 | return _aggregate.apply(A, X, C) 65 | 66 | class _scaled_l2(Function): 67 | @staticmethod 68 | def forward(ctx, X, C, S): 69 | if X.is_cuda: 70 | SL = gpu.scaled_l2_forward(X, C, S) 71 | else: 72 | SL = cpu.scaled_l2_forward(X, C, S) 73 | ctx.save_for_backward(X, C, S, SL) 74 | return SL 75 | 76 | @staticmethod 77 | def backward(ctx, gradSL): 78 | X, C, S, SL = ctx.saved_variables 79 | if X.is_cuda: 80 | gradX, gradC, gradS = gpu.scaled_l2_backward(gradSL, X, C, S, SL) 81 | else: 82 | gradX, gradC, gradS = cpu.scaled_l2_backward(gradSL, X, C, S, SL) 83 | return gradX, gradC, gradS 84 | 85 | def scaled_l2(X, C, S): 86 | r""" scaled_l2 distance 87 | 88 | .. math:: 89 | sl_{ik} = s_k \|x_i-c_k\|^2 90 | 91 | Shape: 92 | - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` 93 | :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K` 94 | (where :math:`B` is batch, :math:`N` is total number of features, 95 | :math:`K` is number is codewords, :math:`D` is feature dimensions.) 96 | - Output: :math:`E\in\mathcal{R}^{B\times N\times K}` 97 | """ 98 | return _scaled_l2.apply(X, C, S) 99 | 100 | # Experimental 101 | def pairwise_cosine(X, C, normalize=False): 102 | r"""Pairwise Cosine Similarity or Dot-product Similarity 103 | Shape: 104 | - Input: :math:`X\in\mathcal{R}^{B\times N\times D}` 105 | :math:`C\in\mathcal{R}^{K\times D}` :math:`S\in \mathcal{R}^K` 106 | (where :math:`B` is batch, :math:`N` is total number of features, 107 | :math:`K` is number is codewords, :math:`D` is feature dimensions.) 108 | - Output: :math:`E\in\mathcal{R}^{B\times N\times K}` 109 | """ 110 | if normalize: 111 | X = F.normalize(X, dim=2, eps=1e-8) 112 | C = F.normalize(C, dim=1, eps=1e-8) 113 | return torch.matmul(X, C.t()) 114 | -------------------------------------------------------------------------------- /encoding/functions/rectify.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | 9 | """Rectify function""" 10 | import torch 11 | from torch.autograd import Function 12 | 13 | from encoding import cpu 14 | if torch.cuda.device_count() > 0: 15 | from encoding import gpu 16 | 17 | __all__ = ['rectify'] 18 | 19 | class _rectify(Function): 20 | @staticmethod 21 | def forward(ctx, y, x, kernel_size, stride, padding, dilation, average): 22 | ctx.save_for_backward(x) 23 | # assuming kernel_size is 3 24 | kernel_size = [k + 2 * (d - 1) for k,d in zip(kernel_size, dilation)] 25 | ctx.kernel_size = kernel_size 26 | ctx.stride = stride 27 | ctx.padding = padding 28 | ctx.dilation = dilation 29 | ctx.average = average 30 | if x.is_cuda: 31 | gpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average) 32 | else: 33 | cpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average) 34 | ctx.mark_dirty(y) 35 | return y 36 | 37 | @staticmethod 38 | def backward(ctx, grad_y): 39 | x, = ctx.saved_variables 40 | if x.is_cuda: 41 | gpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride, 42 | ctx.padding, ctx.dilation, ctx.average) 43 | else: 44 | cpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride, 45 | ctx.padding, ctx.dilation, ctx.average) 46 | ctx.mark_dirty(grad_y) 47 | return grad_y, None, None, None, None, None, None 48 | 49 | rectify = _rectify.apply 50 | -------------------------------------------------------------------------------- /encoding/lib/cpu/encoding_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | at::Tensor Aggregate_Forward_CPU( 5 | const at::Tensor A, 6 | const at::Tensor X, 7 | const at::Tensor C) { 8 | auto E = (A.unsqueeze(3) * (X.unsqueeze(2).expand({X.size(0), X.size(1), 9 | C.size(0), C.size(1)}) - C.unsqueeze(0).unsqueeze(0))).sum(1); 10 | return E; 11 | } 12 | 13 | std::vector Aggregate_Backward_CPU( 14 | const at::Tensor GE, 15 | const at::Tensor A, 16 | const at::Tensor X, 17 | const at::Tensor C) { 18 | auto gradA = (GE.unsqueeze(1) * (X.unsqueeze(2).expand({X.size(0), X.size(1), 19 | C.size(0), C.size(1)}) - C.unsqueeze(0).unsqueeze(0))).sum(3); 20 | auto gradX = at::bmm(A, GE); 21 | auto gradC = (-GE * A.sum(1).unsqueeze(2)).sum(0); 22 | return {gradA, gradX, gradC}; 23 | } 24 | 25 | at::Tensor ScaledL2_Forward_CPU( 26 | const at::Tensor X, 27 | const at::Tensor C, 28 | const at::Tensor S) { 29 | auto SL = S.view({1, 1, C.size(0)}) * (X.unsqueeze(2).expand({X.size(0), X.size(1), 30 | C.size(0), C.size(1)}) - C.unsqueeze(0).unsqueeze(0)).pow(2).sum(3); 31 | return SL; 32 | } 33 | 34 | std::vector ScaledL2_Backward_CPU( 35 | const at::Tensor GSL, 36 | const at::Tensor X, 37 | const at::Tensor C, 38 | const at::Tensor S, 39 | const at::Tensor SL) { 40 | auto tmp = (2 * GSL * S.view({1, 1, C.size(0)})).unsqueeze(3) * 41 | (X.unsqueeze(2).expand({X.size(0), X.size(1), C.size(0), C.size(1)}) - 42 | C.unsqueeze(0).unsqueeze(0)); 43 | auto GX = tmp.sum(2); 44 | auto GC = tmp.sum(0).sum(0); 45 | auto GS = (GSL * (SL / S.view({1, 1, C.size(0)}))).sum(0).sum(0); 46 | return {GX, GC, GS}; 47 | } 48 | -------------------------------------------------------------------------------- /encoding/lib/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #ifdef _OPENMP 6 | #include 7 | #endif 8 | 9 | template 10 | inline scalar IoU(scalar* rawInput, int idx_x, int idx_y) { 11 | scalar lr = std::fmin(rawInput[idx_x*4] + rawInput[idx_x*4+2], 12 | rawInput[idx_y*4] + rawInput[idx_y*4+2]); 13 | scalar rl = std::fmax(rawInput[idx_x*4], rawInput[idx_y*4]); 14 | scalar tb = std::fmin(rawInput[idx_x*4+1] + rawInput[idx_x*4+3], 15 | rawInput[idx_y*4+1] + rawInput[idx_y*4+3]); 16 | scalar bt = std::fmax(rawInput[idx_x*4+1], rawInput[idx_y*4+1]); 17 | scalar inter = std::fmax(0, lr-rl)*std::fmax(0, tb-bt); 18 | scalar uni = (rawInput[idx_x*4+2]*rawInput[idx_x*4+3] 19 | + rawInput[idx_y*4+2]*rawInput[idx_y*4+3] - inter); 20 | return inter/uni; 21 | } 22 | 23 | 24 | std::vector Non_Max_Suppression_CPU( 25 | const at::Tensor& input, 26 | const at::Tensor& scores, 27 | double thresh) { 28 | AT_ASSERT(input.ndimension() == 3); 29 | AT_ASSERT(scores.ndimension() == 2); 30 | AT_ASSERT(input.size(0) == scores.size(0)); 31 | AT_ASSERT(input.size(1) == scores.size(1)); 32 | AT_ASSERT(input.size(2) == 4); 33 | AT_ASSERT(input.is_contiguous()); 34 | AT_ASSERT(scores.is_contiguous()); 35 | AT_ASSERT(input.type().scalarType() == at::kFloat || input.type().scalarType() == at::kDouble); 36 | AT_ASSERT(scores.type().scalarType() == at::kFloat || scores.type().scalarType() == at::kDouble); 37 | AT_ASSERT(input.is_contiguous()); 38 | AT_ASSERT(scores.is_contiguous()); 39 | 40 | 41 | at::Tensor sorted_inds = std::get<1>(scores.sort(-1, true)); 42 | //at::Tensor rawIdx = std::get<1>(scores.sort(-1, true)); 43 | 44 | auto num_boxes = input.size(1); 45 | auto batch_size = input.size(0); 46 | auto mask = torch::zeros({batch_size, num_boxes}, input.type().toScalarType(at::kByte)); 47 | //auto mask = input.type().toScalarType(at::kByte).tensor({batch_size, num_boxes}); 48 | mask.fill_(1); 49 | auto *rawMask = mask.data(); 50 | auto *rawIdx = sorted_inds.data(); 51 | 52 | if (input.type().scalarType() == at::kFloat) 53 | { 54 | auto *rawInput = input.data(); 55 | 56 | for(int batch=0; batch thresh) 67 | rawMask[i] = 0; 68 | } 69 | ++pos; 70 | while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0)) 71 | ++pos; 72 | } 73 | } 74 | } 75 | else 76 | { 77 | auto *rawInput = input.data(); 78 | for(int batch=0; batch thresh) 89 | rawMask[i] = 0; 90 | } 91 | ++pos; 92 | while(pos < (1+batch)*num_boxes-1 and (rawMask[pos] == 0)) 93 | ++pos; 94 | } 95 | } 96 | } 97 | //see ./cuda/NonMaxSuppression.cu for comment about return value. 98 | return {mask, sorted_inds}; 99 | } 100 | -------------------------------------------------------------------------------- /encoding/lib/cpu/operator.cpp: -------------------------------------------------------------------------------- 1 | #include "operator.h" 2 | 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 4 | m.def("roi_align_forward", &ROIAlign_Forward_CPU, "ROI Align forward (CPU)"); 5 | m.def("roi_align_backward", &ROIAlign_Backward_CPU, "ROI Align backward (CPU)"); 6 | m.def("aggregate_forward", &Aggregate_Forward_CPU, "Aggregate forward (CPU)"); 7 | m.def("aggregate_backward", &Aggregate_Backward_CPU, "Aggregate backward (CPU)"); 8 | m.def("scaled_l2_forward", &ScaledL2_Forward_CPU, "ScaledL2 forward (CPU)"); 9 | m.def("scaled_l2_backward", &ScaledL2_Backward_CPU, "ScaledL2 backward (CPU)"); 10 | m.def("batchnorm_forward", &BatchNorm_Forward_CPU, "BatchNorm forward (CPU)"); 11 | m.def("batchnorm_backward", &BatchNorm_Backward_CPU, "BatchNorm backward (CPU)"); 12 | m.def("sumsquare_forward", &Sum_Square_Forward_CPU, "SumSqu forward (CPU)"); 13 | m.def("sumsquare_backward", &Sum_Square_Backward_CPU, "SumSqu backward (CPU)"); 14 | m.def("non_max_suppression", &Non_Max_Suppression_CPU, "NMS (CPU)"); 15 | m.def("conv_rectify", &CONV_RECTIFY_CPU, "Convolution Rectifier (CPU)"); 16 | // Apply fused color jitter 17 | m.def("apply_transform", &apply_transform, "apply_transform"); 18 | } 19 | -------------------------------------------------------------------------------- /encoding/lib/cpu/operator.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | at::Tensor ROIAlign_Forward_CPU( 9 | const at::Tensor& input, 10 | const at::Tensor& bottom_rois, 11 | int64_t pooled_height, 12 | int64_t pooled_width, 13 | double spatial_scale, 14 | int64_t sampling_ratio); 15 | 16 | at::Tensor ROIAlign_Backward_CPU( 17 | const at::Tensor& bottom_rois, 18 | const at::Tensor& grad_output, 19 | int64_t b_size, 20 | int64_t channels, 21 | int64_t height, 22 | int64_t width, 23 | int64_t pooled_height, 24 | int64_t pooled_width, 25 | double spatial_scale, 26 | int64_t sampling_ratio); 27 | 28 | at::Tensor Aggregate_Forward_CPU( 29 | const at::Tensor A, 30 | const at::Tensor X, 31 | const at::Tensor C); 32 | 33 | std::vector Aggregate_Backward_CPU( 34 | const at::Tensor GE, 35 | const at::Tensor A, 36 | const at::Tensor X, 37 | const at::Tensor C); 38 | 39 | at::Tensor ScaledL2_Forward_CPU( 40 | const at::Tensor X_, 41 | const at::Tensor C_, 42 | const at::Tensor S_); 43 | 44 | std::vector ScaledL2_Backward_CPU( 45 | const at::Tensor GSL_, 46 | const at::Tensor X_, 47 | const at::Tensor C_, 48 | const at::Tensor S_, 49 | const at::Tensor SL_); 50 | 51 | at::Tensor BatchNorm_Forward_CPU( 52 | const at::Tensor input_, 53 | const at::Tensor mean_, 54 | const at::Tensor std_, 55 | const at::Tensor gamma_, 56 | const at::Tensor beta_); 57 | 58 | std::vector BatchNorm_Backward_CPU( 59 | const at::Tensor gradoutput_, 60 | const at::Tensor input_, 61 | const at::Tensor mean_, 62 | const at::Tensor std_, 63 | const at::Tensor gamma_, 64 | const at::Tensor beta_, 65 | bool train); 66 | 67 | std::vector Sum_Square_Forward_CPU( 68 | const at::Tensor input_); 69 | 70 | at::Tensor Sum_Square_Backward_CPU( 71 | const at::Tensor input_, 72 | const at::Tensor gradSum_, 73 | const at::Tensor gradSquare_); 74 | 75 | std::vector Non_Max_Suppression_CPU( 76 | const at::Tensor& input, 77 | const at::Tensor& scores, 78 | double thresh); 79 | 80 | void CONV_RECTIFY_CPU( 81 | at::Tensor& output, 82 | const at::Tensor& input, 83 | at::IntArrayRef kernel_size, 84 | at::IntArrayRef stride, 85 | at::IntArrayRef padding, 86 | at::IntArrayRef dilation, 87 | bool avg_mode); 88 | 89 | // Fused color jitter application 90 | // ctm [4,4], img [H, W, C] 91 | py::array_t apply_transform(int H, int W, int C, py::array_t img, py::array_t ctm) { 92 | auto img_buf = img.request(); 93 | auto ctm_buf = ctm.request(); 94 | 95 | // printf("H: %d, W: %d, C: %d\n", H, W, C); 96 | py::array_t result{static_cast(img_buf.size)}; 97 | auto res_buf = result.request(); 98 | 99 | float *img_ptr = (float *)img_buf.ptr; 100 | float *ctm_ptr = (float *)ctm_buf.ptr; 101 | float *res_ptr = (float *)res_buf.ptr; 102 | 103 | for (int h = 0; h < H; ++h) { 104 | for (int w = 0; w < W; ++w) { 105 | float *ptr = &img_ptr[h * W * C + w * C]; 106 | float *out_ptr = &res_ptr[h * W * C + w * C]; 107 | // manually unroll over C 108 | out_ptr[0] = ctm_ptr[0] * ptr[0] + ctm_ptr[1] * ptr[1] + ctm_ptr[2] * ptr[2] + ctm_ptr[3]; 109 | out_ptr[1] = ctm_ptr[4] * ptr[0] + ctm_ptr[5] * ptr[1] + ctm_ptr[6] * ptr[2] + ctm_ptr[7]; 110 | out_ptr[2] = ctm_ptr[8] * ptr[0] + ctm_ptr[9] * ptr[1] + ctm_ptr[10] * ptr[2] + ctm_ptr[11]; 111 | } 112 | } 113 | 114 | result.resize({H, W, C}); 115 | 116 | return result; 117 | } 118 | -------------------------------------------------------------------------------- /encoding/lib/cpu/syncbn_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { 6 | if (x.ndimension() == 2) { 7 | return v; 8 | } else { 9 | std::vector broadcast_size = {1, -1}; 10 | for (int64_t i = 2; i < x.ndimension(); ++i) 11 | broadcast_size.push_back(1); 12 | 13 | return v.view(broadcast_size); 14 | } 15 | } 16 | 17 | at::Tensor BatchNorm_Forward_CPU( 18 | const at::Tensor input, 19 | const at::Tensor mean, 20 | const at::Tensor std, 21 | const at::Tensor gamma, 22 | const at::Tensor beta) { 23 | auto output = (input - broadcast_to(mean, input)) / broadcast_to(std, input); 24 | output = output * broadcast_to(gamma, input) + broadcast_to(beta, input); 25 | return output; 26 | } 27 | 28 | // Not implementing CPU backward for now 29 | std::vector BatchNorm_Backward_CPU( 30 | const at::Tensor gradoutput, 31 | const at::Tensor input, 32 | const at::Tensor mean, 33 | const at::Tensor std, 34 | const at::Tensor gamma, 35 | const at::Tensor beta, 36 | bool train) { 37 | /* outputs*/ 38 | at::Tensor gradinput = at::zeros_like(input); 39 | at::Tensor gradgamma = at::zeros_like(gamma); 40 | at::Tensor gradbeta = at::zeros_like(beta); 41 | at::Tensor gradMean = at::zeros_like(mean); 42 | at::Tensor gradStd = at::zeros_like(std); 43 | return {gradinput, gradMean, gradStd, gradgamma, gradbeta}; 44 | } 45 | 46 | std::vector Sum_Square_Forward_CPU( 47 | const at::Tensor input) { 48 | /* outputs */ 49 | at::Tensor sum = torch::zeros({input.size(1)}, input.options()); 50 | at::Tensor square = torch::zeros({input.size(1)}, input.options()); 51 | return {sum, square}; 52 | } 53 | 54 | at::Tensor Sum_Square_Backward_CPU( 55 | const at::Tensor input, 56 | const at::Tensor gradSum, 57 | const at::Tensor gradSquare) { 58 | /* outputs */ 59 | at::Tensor gradInput = at::zeros_like(input); 60 | return gradInput; 61 | } 62 | -------------------------------------------------------------------------------- /encoding/lib/gpu/activation_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include "common.h" 10 | 11 | using namespace std; 12 | 13 | namespace { 14 | 15 | // template 16 | // inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) { 17 | // // Create thrust pointers 18 | // thrust::device_ptr th_z = thrust::device_pointer_cast(z); 19 | // thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); 20 | // 21 | // thrust::transform_if(th_dz, th_dz + count, th_z, th_dz, 22 | // [slope] __device__ (const T& dz) { return dz * slope; }, 23 | // [] __device__ (const T& z) { return z < 0; }); 24 | // thrust::transform_if(th_z, th_z + count, th_z, 25 | // [slope] __device__ (const T& z) { return z / slope; }, 26 | // [] __device__ (const T& z) { return z < 0; }); 27 | // } 28 | 29 | } 30 | 31 | void LeakyRelu_Forward_CUDA(at::Tensor z, float slope) { 32 | at::leaky_relu_(z, slope); 33 | } 34 | 35 | void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope) { 36 | int64_t count = z.numel(); 37 | 38 | /* 39 | AT_DISPATCH_FLOATING_TYPES(z.type(), "LeakyRelu_Backward_CUDA", ([&] { 40 | leaky_relu_backward_impl(z.data(), dz.data(), slope, count); 41 | })); 42 | */ 43 | // unstable after scaling 44 | at::leaky_relu_(z, 1.0 / slope); 45 | // This API is changed on pytorch side, feature broken 46 | throw "PyTorch API break, Don't use InplaceABN for now."; 47 | // at::leaky_relu_backward(dz, z, slope, false); 48 | } 49 | -------------------------------------------------------------------------------- /encoding/lib/gpu/device_tensor.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | template 4 | struct DeviceTensor { 5 | public: 6 | inline __device__ __host__ DeviceTensor(DType *p, const int *size) 7 | : dptr_(p) { 8 | for (int i = 0; i < Dim; ++i) { 9 | size_[i] = size ? size[i] : 0; 10 | } 11 | } 12 | 13 | inline __device__ __host__ unsigned getSize(const int i) const { 14 | assert(i < Dim); 15 | return size_[i]; 16 | } 17 | 18 | inline __device__ __host__ int numElements() const { 19 | int n = 1; 20 | for (int i = 0; i < Dim; ++i) { 21 | n *= size_[i]; 22 | } 23 | return n; 24 | } 25 | 26 | inline __device__ __host__ DeviceTensor select(const size_t x) const { 27 | assert(Dim > 1); 28 | int offset = x; 29 | for (int i = 1; i < Dim; ++i) { 30 | offset *= size_[i]; 31 | } 32 | DeviceTensor tensor(dptr_ + offset, nullptr); 33 | for (int i = 0; i < Dim - 1; ++i) { 34 | tensor.size_[i] = this->size_[i+1]; 35 | } 36 | return tensor; 37 | } 38 | 39 | inline __device__ __host__ DeviceTensor operator[](const size_t x) const { 40 | assert(Dim > 1); 41 | int offset = x; 42 | for (int i = 1; i < Dim; ++i) { 43 | offset *= size_[i]; 44 | } 45 | DeviceTensor tensor(dptr_ + offset, nullptr); 46 | for (int i = 0; i < Dim - 1; ++i) { 47 | tensor.size_[i] = this->size_[i+1]; 48 | } 49 | return tensor; 50 | } 51 | 52 | inline __device__ __host__ size_t InnerSize() const { 53 | assert(Dim >= 3); 54 | size_t sz = 1; 55 | for (size_t i = 2; i < Dim; ++i) { 56 | sz *= size_[i]; 57 | } 58 | return sz; 59 | } 60 | 61 | inline __device__ __host__ size_t ChannelCount() const { 62 | assert(Dim >= 3); 63 | return size_[1]; 64 | } 65 | 66 | inline __device__ __host__ DType* data_ptr() const { 67 | return dptr_; 68 | } 69 | 70 | DType *dptr_; 71 | int size_[Dim]; 72 | }; 73 | 74 | template 75 | struct DeviceTensor { 76 | inline __device__ __host__ DeviceTensor(DType *p, const int *size) 77 | : dptr_(p) { 78 | size_[0] = size ? size[0] : 0; 79 | } 80 | 81 | inline __device__ __host__ unsigned getSize(const int i) const { 82 | assert(i == 0); 83 | return size_[0]; 84 | } 85 | 86 | inline __device__ __host__ int numElements() const { 87 | return size_[0]; 88 | } 89 | 90 | inline __device__ __host__ DType &operator[](const size_t x) const { 91 | return *(dptr_ + x); 92 | } 93 | 94 | inline __device__ __host__ DType* data_ptr() const { 95 | return dptr_; 96 | } 97 | 98 | DType *dptr_; 99 | int size_[1]; 100 | }; 101 | 102 | template 103 | static DeviceTensor devicetensor(const at::Tensor &blob) { 104 | DType *data = blob.data_ptr(); 105 | DeviceTensor tensor(data, nullptr); 106 | for (int i = 0; i < Dim; ++i) { 107 | tensor.size_[i] = blob.size(i); 108 | } 109 | return tensor; 110 | } 111 | -------------------------------------------------------------------------------- /encoding/lib/gpu/operator.cpp: -------------------------------------------------------------------------------- 1 | #include "operator.h" 2 | 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 4 | m.def("roi_align_forward", &ROIAlign_Forward_CUDA, "ROI Align forward (CUDA)"); 5 | m.def("roi_align_backward", &ROIAlign_Backward_CUDA, "ROI Align backward (CUDA)"); 6 | m.def("non_max_suppression", &Non_Max_Suppression_CUDA, "NMS (CUDA)"); 7 | m.def("aggregate_forward", &Aggregate_Forward_CUDA, "Aggregate forward (CUDA)"); 8 | m.def("aggregate_backward", &Aggregate_Backward_CUDA, "Aggregate backward (CUDA)"); 9 | m.def("scaled_l2_forward", &ScaledL2_Forward_CUDA, "ScaledL2 forward (CUDA)"); 10 | m.def("scaled_l2_backward", &ScaledL2_Backward_CUDA, "ScaledL2 backward (CUDA)"); 11 | m.def("batchnorm_forward", &BatchNorm_Forward_CUDA, "BatchNorm forward (CUDA)"); 12 | m.def("batchnorm_inp_forward", &BatchNorm_Forward_Inp_CUDA, "BatchNorm forward (CUDA)"); 13 | m.def("batchnorm_backward", &BatchNorm_Backward_CUDA, "BatchNorm backward (CUDA)"); 14 | m.def("batchnorm_inp_backward", &BatchNorm_Inp_Backward_CUDA, "BatchNorm backward (CUDA)"); 15 | m.def("expectation_forward", &Expectation_Forward_CUDA, "Expectation forward (CUDA)"); 16 | m.def("expectation_backward", &Expectation_Backward_CUDA, "Expectation backward (CUDA)"); 17 | m.def("expectation_inp_backward", &Expectation_Inp_Backward_CUDA, 18 | "Inplace Expectation backward (CUDA)"); 19 | m.def("leaky_relu_forward", &LeakyRelu_Forward_CUDA, "Learky ReLU forward (CUDA)"); 20 | m.def("leaky_relu_backward", &LeakyRelu_Backward_CUDA, "Learky ReLU backward (CUDA)"); 21 | m.def("conv_rectify", &CONV_RECTIFY_CUDA, "Convolution Rectifier (CUDA)"); 22 | // batched box encoder 23 | m.def("box_encoder", &box_encoder, "box_encoder"); 24 | m.def("random_horiz_flip", &random_horiz_flip, "random_horiz_flip"); 25 | } 26 | -------------------------------------------------------------------------------- /encoding/lib/gpu/operator.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | std::vector box_encoder( 6 | const int N_img, 7 | const at::Tensor& bbox_input, 8 | const at::Tensor& bbox_offsets, 9 | const at::Tensor& labels_input, 10 | const at::Tensor& dbox, 11 | const float criteria = 0.5); 12 | 13 | std::vector random_horiz_flip( 14 | at::Tensor& img, 15 | at::Tensor& bboxes, 16 | const at::Tensor& bbox_offsets, 17 | const float p, 18 | const bool nhwc); 19 | 20 | at::Tensor ROIAlign_Forward_CUDA( 21 | const at::Tensor input, 22 | const at::Tensor rois, 23 | int64_t pooled_height, 24 | int64_t pooled_width, 25 | double spatial_scale, 26 | int64_t sample_ratio); 27 | 28 | at::Tensor ROIAlign_Backward_CUDA( 29 | const at::Tensor rois, 30 | const at::Tensor grad_output, 31 | int64_t b_size, 32 | int64_t channels, 33 | int64_t height, 34 | int64_t width, 35 | int64_t pooled_height, 36 | int64_t pooled_width, 37 | double spatial_scale, 38 | int64_t sampling_ratio); 39 | 40 | std::vector Non_Max_Suppression_CUDA( 41 | const at::Tensor& input, 42 | const at::Tensor& scores, 43 | double thresh); 44 | 45 | at::Tensor Aggregate_Forward_CUDA( 46 | const at::Tensor A_, 47 | const at::Tensor X_, 48 | const at::Tensor C_); 49 | 50 | std::vector Aggregate_Backward_CUDA( 51 | const at::Tensor GE_, 52 | const at::Tensor A_, 53 | const at::Tensor X_, 54 | const at::Tensor C_); 55 | 56 | at::Tensor ScaledL2_Forward_CUDA( 57 | const at::Tensor X_, 58 | const at::Tensor C_, 59 | const at::Tensor S_); 60 | 61 | std::vector ScaledL2_Backward_CUDA( 62 | const at::Tensor GSL_, 63 | const at::Tensor X_, 64 | const at::Tensor C_, 65 | const at::Tensor S_, 66 | const at::Tensor SL_); 67 | 68 | at::Tensor BatchNorm_Forward_CUDA( 69 | const at::Tensor input_, 70 | const at::Tensor mean_, 71 | const at::Tensor std_, 72 | const at::Tensor gamma_, 73 | const at::Tensor beta_, 74 | float eps); 75 | 76 | at::Tensor BatchNorm_Forward_Inp_CUDA( 77 | const at::Tensor input_, 78 | const at::Tensor ex_, 79 | const at::Tensor exs_, 80 | const at::Tensor gamma_, 81 | const at::Tensor beta_, 82 | float eps); 83 | 84 | std::vector BatchNorm_Backward_CUDA( 85 | const at::Tensor gradoutput_, 86 | const at::Tensor input_, 87 | const at::Tensor ex_, 88 | const at::Tensor exs_, 89 | const at::Tensor gamma_, 90 | const at::Tensor beta_, 91 | float eps); 92 | 93 | std::vector BatchNorm_Inp_Backward_CUDA( 94 | const at::Tensor gradoutput_, 95 | const at::Tensor output_, 96 | const at::Tensor ex_, 97 | const at::Tensor exs_, 98 | const at::Tensor gamma_, 99 | const at::Tensor beta_, 100 | float eps); 101 | 102 | std::vector Expectation_Forward_CUDA( 103 | const at::Tensor input_); 104 | 105 | at::Tensor Expectation_Backward_CUDA( 106 | const at::Tensor input_, 107 | const at::Tensor gradEx_, 108 | const at::Tensor gradExs_); 109 | 110 | at::Tensor Expectation_Inp_Backward_CUDA( 111 | const at::Tensor gradInput_, 112 | const at::Tensor output_, 113 | const at::Tensor gradEx_, 114 | const at::Tensor gradExs_, 115 | const at::Tensor ex_, 116 | const at::Tensor exs_, 117 | const at::Tensor gamma_, 118 | const at::Tensor beta_, 119 | float eps); 120 | 121 | void LeakyRelu_Forward_CUDA(at::Tensor z, float slope); 122 | 123 | void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope); 124 | 125 | void CONV_RECTIFY_CUDA( 126 | at::Tensor& output, 127 | const at::Tensor& input, 128 | at::IntArrayRef kernel_size, 129 | at::IntArrayRef stride, 130 | at::IntArrayRef padding, 131 | at::IntArrayRef dilation, 132 | bool avg_mode); 133 | -------------------------------------------------------------------------------- /encoding/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .model_zoo import get_model 2 | from .model_zoo import model_list 3 | from .model_store import get_model_file, pretrained_model_list 4 | 5 | from .sseg import get_segmentation_model, MultiEvalModule 6 | -------------------------------------------------------------------------------- /encoding/models/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .resnest import * 3 | from .resnext import * 4 | from .resnet_variants import * 5 | from .wideresnet import * 6 | from .xception import * 7 | -------------------------------------------------------------------------------- /encoding/models/backbone/resnest.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | """ResNeSt models""" 9 | 10 | import torch 11 | from .resnet import ResNet, Bottleneck 12 | from ..model_store import get_model_file 13 | 14 | __all__ = ['resnest50', 'resnest101', 'resnest200', 'resnest269'] 15 | 16 | _url_format = 'https://hangzh.s3.amazonaws.com/encoding/models/{}-{}.pth' 17 | 18 | 19 | def resnest50(pretrained=False, root='~/.encoding/models', **kwargs): 20 | model = ResNet(Bottleneck, [3, 4, 6, 3], 21 | radix=2, groups=1, bottleneck_width=64, 22 | deep_stem=True, stem_width=32, avg_down=True, 23 | avd=True, avd_first=False, **kwargs) 24 | if pretrained: 25 | model.load_state_dict(torch.load( 26 | get_model_file('resnest50', root=root)), strict=True) 27 | return model 28 | 29 | def resnest101(pretrained=False, root='~/.encoding/models', **kwargs): 30 | model = ResNet(Bottleneck, [3, 4, 23, 3], 31 | radix=2, groups=1, bottleneck_width=64, 32 | deep_stem=True, stem_width=64, avg_down=True, 33 | avd=True, avd_first=False, **kwargs) 34 | if pretrained: 35 | model.load_state_dict(torch.load( 36 | get_model_file('resnest101', root=root)), strict=True) 37 | return model 38 | 39 | def resnest200(pretrained=False, root='~/.encoding/models', **kwargs): 40 | model = ResNet(Bottleneck, [3, 24, 36, 3], 41 | radix=2, groups=1, bottleneck_width=64, 42 | deep_stem=True, stem_width=64, avg_down=True, 43 | avd=True, avd_first=False, **kwargs) 44 | if pretrained: 45 | model.load_state_dict(torch.load( 46 | get_model_file('resnest200', root=root)), strict=False) 47 | return model 48 | 49 | def resnest269(pretrained=False, root='~/.encoding/models', **kwargs): 50 | model = ResNet(Bottleneck, [3, 30, 48, 8], 51 | radix=2, groups=1, bottleneck_width=64, 52 | deep_stem=True, stem_width=64, avg_down=True, 53 | avd=True, avd_first=False, **kwargs) 54 | if pretrained: 55 | model.load_state_dict(torch.load( 56 | get_model_file('resnest269', root=root)), strict=True) 57 | return model 58 | 59 | def resnest50_fast(pretrained=False, root='~/.encoding/models', **kwargs): 60 | model = ResNet(Bottleneck, [3, 4, 6, 3], 61 | radix=2, groups=1, bottleneck_width=64, 62 | deep_stem=True, stem_width=32, avg_down=True, 63 | avd=True, avd_first=True, **kwargs) 64 | if pretrained: 65 | model.load_state_dict(torch.load( 66 | get_model_file('resnest50fast', root=root)), strict=True) 67 | return model 68 | 69 | def resnest101_fast(pretrained=False, root='~/.encoding/models', **kwargs): 70 | model = ResNet(Bottleneck, [3, 4, 23, 3], 71 | radix=2, groups=1, bottleneck_width=64, 72 | deep_stem=True, stem_width=64, avg_down=True, 73 | avd=True, avd_first=True, **kwargs) 74 | if pretrained: 75 | model.load_state_dict(torch.load( 76 | get_model_file('resnest101fast', root=root)), strict=True) 77 | return model 78 | -------------------------------------------------------------------------------- /encoding/models/backbone/resnet_variants.py: -------------------------------------------------------------------------------- 1 | """ResNet variants""" 2 | 3 | import torch 4 | from .resnet import ResNet, Bottleneck 5 | from ..model_store import get_model_file 6 | 7 | __all__ = ['resnet50s', 'resnet101s', 'resnet152s', 8 | 'resnet50d'] 9 | 10 | # pspnet version of ResNet 11 | def resnet50s(pretrained=False, root='~/.encoding/models', **kwargs): 12 | """Constructs a ResNetS-50 model as in PSPNet. 13 | 14 | Args: 15 | pretrained (bool): If True, returns a model pre-trained on ImageNet 16 | """ 17 | kwargs['deep_stem'] = True 18 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 19 | if pretrained: 20 | model.load_state_dict(torch.load( 21 | get_model_file('resnet50s', root=root)), strict=False) 22 | return model 23 | 24 | def resnet101s(pretrained=False, root='~/.encoding/models', **kwargs): 25 | """Constructs a ResNetS-101 model as in PSPNet. 26 | 27 | Args: 28 | pretrained (bool): If True, returns a model pre-trained on ImageNet 29 | """ 30 | kwargs['deep_stem'] = True 31 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 32 | if pretrained: 33 | model.load_state_dict(torch.load( 34 | get_model_file('resnet101s', root=root)), strict=False) 35 | return model 36 | 37 | def resnet152s(pretrained=False, root='~/.encoding/models', **kwargs): 38 | """Constructs a ResNetS-152 model as in PSPNet. 39 | 40 | Args: 41 | pretrained (bool): If True, returns a model pre-trained on ImageNet 42 | """ 43 | kwargs['deep_stem'] = True 44 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 45 | if pretrained: 46 | model.load_state_dict(torch.load( 47 | get_model_file('resnet152s', root=root)), strict=False) 48 | return model 49 | 50 | # ResNet-D 51 | def resnet50d(pretrained=False, root='~/.encoding/models', **kwargs): 52 | model = ResNet(Bottleneck, [3, 4, 6, 3], 53 | deep_stem=True, stem_width=32, 54 | avg_down=True, **kwargs) 55 | if pretrained: 56 | model.load_state_dict(torch.load( 57 | get_model_file('resnet50d', root=root)), strict=False) 58 | return model 59 | -------------------------------------------------------------------------------- /encoding/models/backbone/resnext.py: -------------------------------------------------------------------------------- 1 | """ResNeXt models""" 2 | 3 | from .resnet import ResNet, Bottleneck 4 | from ..model_store import get_model_file 5 | 6 | __all__ = ['resnext50_32x4d', 'resnext101_32x8d'] 7 | 8 | def resnext50_32x4d(pretrained=False, root='~/.encoding/models', **kwargs): 9 | r"""ResNeXt-50 32x4d model from 10 | `"Aggregated Residual Transformation for Deep Neural Networks" `_ 11 | 12 | Args: 13 | pretrained (bool): If True, returns a model pre-trained on ImageNet 14 | progress (bool): If True, displays a progress bar of the download to stderr 15 | """ 16 | kwargs['groups'] = 32 17 | kwargs['bottleneck_width'] = 4 18 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 19 | if pretrained: 20 | model.load_state_dict(torch.load( 21 | get_model_file('resnext50_32x4d', root=root)), strict=False) 22 | return model 23 | 24 | def resnext101_32x8d(pretrained=False, root='~/.encoding/models', **kwargs): 25 | r"""ResNeXt-101 32x8d model from 26 | `"Aggregated Residual Transformation for Deep Neural Networks" `_ 27 | 28 | Args: 29 | pretrained (bool): If True, returns a model pre-trained on ImageNet 30 | progress (bool): If True, displays a progress bar of the download to stderr 31 | """ 32 | kwargs['groups'] = 32 33 | kwargs['bottleneck_width'] = 8 34 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 35 | if pretrained: 36 | model.load_state_dict(torch.load( 37 | get_model_file('resnext101_32x8d', root=root)), strict=False) 38 | return model 39 | 40 | -------------------------------------------------------------------------------- /encoding/models/deepten.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import torch 12 | import torch.nn as nn 13 | 14 | from ..nn import Encoding, View, Normalize 15 | from .backbone import resnet50s, resnet101s, resnet152s 16 | 17 | __all__ = ['DeepTen', 'get_deepten', 'get_deepten_resnet50_minc'] 18 | 19 | class DeepTen(nn.Module): 20 | def __init__(self, nclass, backbone): 21 | super(DeepTen, self).__init__() 22 | self.backbone = backbone 23 | # copying modules from pretrained models 24 | if self.backbone == 'resnet50': 25 | self.pretrained = resnet50s(pretrained=True, dilated=False) 26 | elif self.backbone == 'resnet101': 27 | self.pretrained = resnet101s(pretrained=True, dilated=False) 28 | elif self.backbone == 'resnet152': 29 | self.pretrained = resnet152s(pretrained=True, dilated=False) 30 | else: 31 | raise RuntimeError('unknown backbone: {}'.format(self.backbone)) 32 | n_codes = 32 33 | self.head = nn.Sequential( 34 | nn.Conv2d(2048, 128, 1), 35 | nn.BatchNorm2d(128), 36 | nn.ReLU(inplace=True), 37 | Encoding(D=128,K=n_codes), 38 | View(-1, 128*n_codes), 39 | Normalize(), 40 | nn.Linear(128*n_codes, nclass), 41 | ) 42 | 43 | def forward(self, x): 44 | _, _, h, w = x.size() 45 | x = self.pretrained.conv1(x) 46 | x = self.pretrained.bn1(x) 47 | x = self.pretrained.relu(x) 48 | x = self.pretrained.maxpool(x) 49 | x = self.pretrained.layer1(x) 50 | x = self.pretrained.layer2(x) 51 | x = self.pretrained.layer3(x) 52 | x = self.pretrained.layer4(x) 53 | return self.head(x) 54 | 55 | def get_deepten(dataset='pascal_voc', backbone='resnet50', pretrained=False, 56 | root='~/.encoding/models', **kwargs): 57 | r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network" 58 | `_ 59 | Parameters 60 | ---------- 61 | dataset : str, default pascal_voc 62 | The dataset that model pretrained on. (pascal_voc, ade20k) 63 | pretrained : bool, default False 64 | Whether to load the pretrained weights for model. 65 | root : str, default '~/.encoding/models' 66 | Location for keeping the model parameters. 67 | Examples 68 | -------- 69 | >>> model = get_deepten(dataset='minc', backbone='resnet50', pretrained=False) 70 | >>> print(model) 71 | """ 72 | from ..datasets import datasets, acronyms 73 | model = DeepTen(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, **kwargs) 74 | if pretrained: 75 | from .model_store import get_model_file 76 | model.load_state_dict(torch.load( 77 | get_model_file('deepten_%s_%s'%(backbone, acronyms[dataset]), root=root))) 78 | return model 79 | 80 | def get_deepten_resnet50_minc(pretrained=False, root='~/.encoding/models', **kwargs): 81 | r"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network" 82 | `_ 83 | Parameters 84 | ---------- 85 | pretrained : bool, default False 86 | Whether to load the pretrained weights for model. 87 | root : str, default '~/.encoding/models' 88 | Location for keeping the model parameters. 89 | 90 | 91 | Examples 92 | -------- 93 | >>> model = get_deepten_resnet50_minc(pretrained=True) 94 | >>> print(model) 95 | """ 96 | return get_deepten(dataset='minc', backbone='resnet50', pretrained=pretrained, 97 | root=root, **kwargs) 98 | -------------------------------------------------------------------------------- /encoding/models/model_zoo.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wildcard-import, unused-wildcard-import 2 | 3 | from .backbone import * 4 | from .sseg import * 5 | from .deepten import * 6 | 7 | __all__ = ['model_list', 'get_model'] 8 | 9 | models = { 10 | # resnet 11 | 'resnet50': resnet50, 12 | 'resnet101': resnet101, 13 | 'resnet152': resnet152, 14 | # resnest 15 | 'resnest50': resnest50, 16 | 'resnest101': resnest101, 17 | 'resnest200': resnest200, 18 | 'resnest269': resnest269, 19 | # resnet other variants 20 | 'resnet50s': resnet50s, 21 | 'resnet101s': resnet101s, 22 | 'resnet152s': resnet152s, 23 | 'resnet50d': resnet50d, 24 | 'resnext50_32x4d': resnext50_32x4d, 25 | 'resnext101_32x8d': resnext101_32x8d, 26 | # other segmentation backbones 27 | 'xception65': xception65, 28 | 'wideresnet38': wideresnet38, 29 | 'wideresnet50': wideresnet50, 30 | # deepten paper 31 | 'deepten_resnet50_minc': get_deepten_resnet50_minc, 32 | # segmentation resnet models 33 | 'encnet_resnet101s_coco': get_encnet_resnet101_coco, 34 | 'fcn_resnet50s_pcontext': get_fcn_resnet50_pcontext, 35 | 'encnet_resnet50s_pcontext': get_encnet_resnet50_pcontext, 36 | 'encnet_resnet101s_pcontext': get_encnet_resnet101_pcontext, 37 | 'encnet_resnet50s_ade': get_encnet_resnet50_ade, 38 | 'encnet_resnet101s_ade': get_encnet_resnet101_ade, 39 | 'fcn_resnet50s_ade': get_fcn_resnet50_ade, 40 | 'psp_resnet50s_ade': get_psp_resnet50_ade, 41 | # segmentation resnest models 42 | 'fcn_resnest50_ade': get_fcn_resnest50_ade, 43 | 'deeplab_resnest50_ade': get_deeplab_resnest50_ade, 44 | 'deeplab_resnest101_ade': get_deeplab_resnest101_ade, 45 | 'deeplab_resnest200_ade': get_deeplab_resnest200_ade, 46 | 'deeplab_resnest269_ade': get_deeplab_resnest269_ade, 47 | 'fcn_resnest50_pcontext': get_fcn_resnest50_pcontext, 48 | 'deeplab_resnest50_pcontext': get_deeplab_resnest50_pcontext, 49 | 'deeplab_resnest101_pcontext': get_deeplab_resnest101_pcontext, 50 | 'deeplab_resnest200_pcontext': get_deeplab_resnest200_pcontext, 51 | 'deeplab_resnest269_pcontext': get_deeplab_resnest269_pcontext, 52 | } 53 | 54 | model_list = list(models.keys()) 55 | 56 | def get_model(name, **kwargs): 57 | """Returns a pre-defined model by name 58 | 59 | Parameters 60 | ---------- 61 | name : str 62 | Name of the model. 63 | pretrained : bool 64 | Whether to load the pretrained weights for model. 65 | root : str, default '~/.encoding/models' 66 | Location for keeping the model parameters. 67 | 68 | Returns 69 | ------- 70 | Module: 71 | The model. 72 | """ 73 | name = name.lower() 74 | if name not in models: 75 | raise ValueError('%s\n\t%s' % (str(name), '\n\t'.join(sorted(models.keys())))) 76 | net = models[name](**kwargs) 77 | return net 78 | -------------------------------------------------------------------------------- /encoding/models/sseg/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | from .fcn import * 3 | from .psp import * 4 | from .fcfpn import * 5 | from .atten import * 6 | from .encnet import * 7 | from .deeplab import * 8 | from .upernet import * 9 | 10 | def get_segmentation_model(name, **kwargs): 11 | models = { 12 | 'fcn': get_fcn, 13 | 'psp': get_psp, 14 | 'fcfpn': get_fcfpn, 15 | 'atten': get_atten, 16 | 'encnet': get_encnet, 17 | 'upernet': get_upernet, 18 | 'deeplab': get_deeplab, 19 | } 20 | return models[name.lower()](**kwargs) 21 | -------------------------------------------------------------------------------- /encoding/models/sseg/psp.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | from __future__ import division 7 | import os 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | from torch.nn.functional import interpolate 12 | 13 | from .base import BaseNet 14 | from .fcn import FCNHead 15 | from ...nn import PyramidPooling 16 | 17 | class PSP(BaseNet): 18 | def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs): 19 | super(PSP, self).__init__(nclass, backbone, aux, se_loss, norm_layer=norm_layer, **kwargs) 20 | self.head = PSPHead(2048, nclass, norm_layer, self._up_kwargs) 21 | if aux: 22 | self.auxlayer = FCNHead(1024, nclass, norm_layer) 23 | 24 | def forward(self, x): 25 | _, _, h, w = x.size() 26 | _, _, c3, c4 = self.base_forward(x) 27 | 28 | outputs = [] 29 | x = self.head(c4) 30 | x = interpolate(x, (h,w), **self._up_kwargs) 31 | outputs.append(x) 32 | if self.aux: 33 | auxout = self.auxlayer(c3) 34 | auxout = interpolate(auxout, (h,w), **self._up_kwargs) 35 | outputs.append(auxout) 36 | return tuple(outputs) 37 | 38 | 39 | class PSPHead(nn.Module): 40 | def __init__(self, in_channels, out_channels, norm_layer, up_kwargs): 41 | super(PSPHead, self).__init__() 42 | inter_channels = in_channels // 4 43 | self.conv5 = nn.Sequential(PyramidPooling(in_channels, norm_layer, up_kwargs), 44 | nn.Conv2d(in_channels * 2, inter_channels, 3, padding=1, bias=False), 45 | norm_layer(inter_channels), 46 | nn.ReLU(True), 47 | nn.Dropout(0.1, False), 48 | nn.Conv2d(inter_channels, out_channels, 1)) 49 | 50 | def forward(self, x): 51 | return self.conv5(x) 52 | 53 | def get_psp(dataset='pascal_voc', backbone='resnet50s', pretrained=False, 54 | root='~/.encoding/models', **kwargs): 55 | # infer number of classes 56 | from ...datasets import datasets, acronyms 57 | model = PSP(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, root=root, **kwargs) 58 | if pretrained: 59 | from ..model_store import get_model_file 60 | model.load_state_dict(torch.load( 61 | get_model_file('psp_%s_%s'%(backbone, acronyms[dataset]), root=root))) 62 | return model 63 | 64 | def get_psp_resnet50_ade(pretrained=False, root='~/.encoding/models', **kwargs): 65 | r"""PSP model from the paper `"Context Encoding for Semantic Segmentation" 66 | `_ 67 | 68 | Parameters 69 | ---------- 70 | pretrained : bool, default False 71 | Whether to load the pretrained weights for model. 72 | root : str, default '~/.encoding/models' 73 | Location for keeping the model parameters. 74 | 75 | 76 | Examples 77 | -------- 78 | >>> model = get_psp_resnet50_ade(pretrained=True) 79 | >>> print(model) 80 | """ 81 | return get_psp('ade20k', 'resnet50s', pretrained, root=root, **kwargs) 82 | -------------------------------------------------------------------------------- /encoding/models/sseg/upernet.py: -------------------------------------------------------------------------------- 1 | ########################################################################### 2 | # Created by: Hang Zhang 3 | # Email: zhang.hang@rutgers.edu 4 | # Copyright (c) 2017 5 | ########################################################################### 6 | from __future__ import division 7 | import os 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | from torch.nn.functional import upsample 12 | 13 | from .base import BaseNet 14 | from .fcfpn import FCFPNHead 15 | from ...nn import PyramidPooling 16 | 17 | torch_ver = torch.__version__[:3] 18 | 19 | __all__ = ['UperNet', 'get_upernet', 'get_upernet_50_ade'] 20 | 21 | class UperNet(BaseNet): 22 | r"""Fully Convolutional Networks for Semantic Segmentation 23 | 24 | Parameters 25 | ---------- 26 | nclass : int 27 | Number of categories for the training dataset. 28 | backbone : string 29 | Pre-trained dilated backbone network type (default:'resnet50s'; 'resnet50s', 30 | 'resnet101s' or 'resnet152s'). 31 | norm_layer : object 32 | Normalization layer used in backbone network (default: :class:`mxnet.gluon.nn.BatchNorm`; 33 | 34 | 35 | Reference: 36 | 37 | Long, Jonathan, Evan Shelhamer, and Trevor Darrell. "Fully convolutional networks 38 | for semantic segmentation." *CVPR*, 2015 39 | 40 | Examples 41 | -------- 42 | >>> model = UperNet(nclass=21, backbone='resnet50s') 43 | >>> print(model) 44 | """ 45 | def __init__(self, nclass, backbone, aux=True, se_loss=False, norm_layer=nn.BatchNorm2d, **kwargs): 46 | super(UperNet, self).__init__(nclass, backbone, aux, se_loss, dilated=False, norm_layer=norm_layer) 47 | self.head = UperNetHead(nclass, norm_layer, up_kwargs=self._up_kwargs) 48 | assert not aux, "UperNet does not support aux loss" 49 | 50 | def forward(self, x): 51 | imsize = x.size()[2:] 52 | features = self.base_forward(x) 53 | 54 | x = list(self.head(*features)) 55 | x[0] = upsample(x[0], imsize, **self._up_kwargs) 56 | return tuple(x) 57 | 58 | 59 | class UperNetHead(FCFPNHead): 60 | def __init__(self, out_channels, norm_layer=None, fpn_inchannels=[256, 512, 1024, 2048], 61 | fpn_dim=256, up_kwargs=None): 62 | fpn_inchannels[-1] = fpn_inchannels[-1] * 2 63 | super(UperNetHead, self).__init__(out_channels, norm_layer, fpn_inchannels, 64 | fpn_dim, up_kwargs) 65 | self.extramodule = PyramidPooling(fpn_inchannels[-1] // 2, norm_layer, up_kwargs) 66 | 67 | 68 | def get_upernet(dataset='pascal_voc', backbone='resnet50s', pretrained=False, 69 | root='~/.encoding/models', **kwargs): 70 | r"""UperNet model from the paper `"Fully Convolutional Network for semantic segmentation" 71 | `_ 72 | Parameters 73 | ---------- 74 | dataset : str, default pascal_voc 75 | The dataset that model pretrained on. (pascal_voc, ade20k) 76 | pretrained : bool, default False 77 | Whether to load the pretrained weights for model. 78 | root : str, default '~/.encoding/models' 79 | Location for keeping the model parameters. 80 | Examples 81 | -------- 82 | >>> model = get_upernet(dataset='pascal_voc', backbone='resnet50s', pretrained=False) 83 | >>> print(model) 84 | """ 85 | acronyms = { 86 | 'pascal_voc': 'voc', 87 | 'pascal_aug': 'voc', 88 | 'ade20k': 'ade', 89 | } 90 | # infer number of classes 91 | from ...datasets import datasets, VOCSegmentation, VOCAugSegmentation, ADE20KSegmentation 92 | model = UperNet(datasets[dataset.lower()].NUM_CLASS, backbone=backbone, **kwargs) 93 | if pretrained: 94 | from ..model_store import get_model_file 95 | model.load_state_dict(torch.load( 96 | get_model_file('upernet_%s_%s'%(backbone, acronyms[dataset]), root=root))) 97 | return model 98 | 99 | 100 | def get_upernet_50_ade(pretrained=False, root='~/.encoding/models', **kwargs): 101 | r"""EncNet-PSP model from the paper `"Context Encoding for Semantic Segmentation" 102 | `_ 103 | 104 | Parameters 105 | ---------- 106 | pretrained : bool, default False 107 | Whether to load the pretrained weights for model. 108 | root : str, default '~/.encoding/models' 109 | Location for keeping the model parameters. 110 | 111 | 112 | Examples 113 | -------- 114 | >>> model = get_upernet_50_ade(pretrained=True) 115 | >>> print(model) 116 | """ 117 | return get_upernet('ade20k', 'resnet50s', pretrained) 118 | -------------------------------------------------------------------------------- /encoding/nn/__init__.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding NN Modules""" 12 | from .encoding import * 13 | from .syncbn import * 14 | from .customize import * 15 | from .attention import * 16 | from .loss import * 17 | from .rectify import * 18 | from .splat import SplAtConv2d 19 | from .dropblock import * 20 | -------------------------------------------------------------------------------- /encoding/nn/dropblock.py: -------------------------------------------------------------------------------- 1 | # https://github.com/Randl/MobileNetV3-pytorch/blob/master/dropblock.py 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | __all__ = ['DropBlock2D', 'reset_dropblock'] 7 | 8 | class DropBlock2D(nn.Module): 9 | r"""Randomly zeroes 2D spatial blocks of the input tensor. 10 | As described in the paper 11 | `DropBlock: A regularization method for convolutional networks`_ , 12 | dropping whole blocks of feature map allows to remove semantic 13 | information as compared to regular dropout. 14 | Args: 15 | drop_prob (float): probability of an element to be dropped. 16 | block_size (int): size of the block to drop 17 | Shape: 18 | - Input: `(N, C, H, W)` 19 | - Output: `(N, C, H, W)` 20 | .. _DropBlock: A regularization method for convolutional networks: 21 | https://arxiv.org/abs/1810.12890 22 | """ 23 | 24 | def __init__(self, drop_prob, block_size, share_channel=False): 25 | super(DropBlock2D, self).__init__() 26 | self.register_buffer('i', torch.zeros(1, dtype=torch.int64)) 27 | self.register_buffer('drop_prob', drop_prob * torch.ones(1, dtype=torch.float32)) 28 | self.inited = False 29 | self.step_size = 0.0 30 | self.start_step = 0 31 | self.nr_steps = 0 32 | self.block_size = block_size 33 | self.share_channel = share_channel 34 | 35 | def reset(self): 36 | """stop DropBlock""" 37 | self.inited = True 38 | self.i[0] = 0 39 | self.drop_prob = 0.0 40 | 41 | def reset_steps(self, start_step, nr_steps, start_value=0, stop_value=None): 42 | self.inited = True 43 | stop_value = self.drop_prob.item() if stop_value is None else stop_value 44 | self.i[0] = 0 45 | self.drop_prob[0] = start_value 46 | self.step_size = (stop_value - start_value) / nr_steps 47 | self.nr_steps = nr_steps 48 | self.start_step = start_step 49 | 50 | def forward(self, x): 51 | if not self.training or self.drop_prob.item() == 0.: 52 | return x 53 | else: 54 | self.step() 55 | 56 | # get gamma value 57 | gamma = self._compute_gamma(x) 58 | 59 | # sample mask and place on input device 60 | if self.share_channel: 61 | mask = (torch.rand(*x.shape[2:], device=x.device, dtype=x.dtype) < gamma).unsqueeze(0).unsqueeze(0) 62 | else: 63 | mask = (torch.rand(*x.shape[1:], device=x.device, dtype=x.dtype) < gamma).unsqueeze(0) 64 | 65 | # compute block mask 66 | block_mask, keeped = self._compute_block_mask(mask) 67 | 68 | # apply block mask 69 | out = x * block_mask 70 | 71 | # scale output 72 | out = out * (block_mask.numel() / keeped).to(out) 73 | return out 74 | 75 | def _compute_block_mask(self, mask): 76 | block_mask = F.max_pool2d(mask, 77 | kernel_size=(self.block_size, self.block_size), 78 | stride=(1, 1), 79 | padding=self.block_size // 2) 80 | 81 | keeped = block_mask.numel() - block_mask.sum().to(torch.float32) 82 | block_mask = 1 - block_mask 83 | 84 | return block_mask, keeped 85 | 86 | def _compute_gamma(self, x): 87 | _, c, h, w = x.size() 88 | gamma = self.drop_prob.item() / (self.block_size ** 2) * (h * w) / \ 89 | ((w - self.block_size + 1) * (h - self.block_size + 1)) 90 | return gamma 91 | 92 | def step(self): 93 | assert self.inited 94 | idx = self.i.item() 95 | if idx > self.start_step and idx < self.start_step + self.nr_steps: 96 | self.drop_prob += self.step_size 97 | self.i += 1 98 | 99 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, 100 | missing_keys, unexpected_keys, error_msgs): 101 | idx_key = prefix + 'i' 102 | drop_prob_key = prefix + 'drop_prob' 103 | if idx_key not in state_dict: 104 | state_dict[idx_key] = torch.zeros(1, dtype=torch.int64) 105 | if idx_key not in drop_prob_key: 106 | state_dict[drop_prob_key] = torch.ones(1, dtype=torch.float32) 107 | super(DropBlock2D, self)._load_from_state_dict( 108 | state_dict, prefix, local_metadata, strict, 109 | missing_keys, unexpected_keys, error_msgs) 110 | 111 | def _save_to_state_dict(self, destination, prefix, keep_vars): 112 | """overwrite save method""" 113 | pass 114 | 115 | def extra_repr(self): 116 | return 'drop_prob={}, step_size={}'.format(self.drop_prob, self.step_size) 117 | 118 | def reset_dropblock(start_step, nr_steps, start_value, stop_value, m): 119 | """ 120 | Example: 121 | from functools import partial 122 | apply_drop_prob = partial(reset_dropblock, 0, epochs*iters_per_epoch, 0.0, 0.1) 123 | net.apply(apply_drop_prob) 124 | """ 125 | if isinstance(m, DropBlock2D): 126 | m.reset_steps(start_step, nr_steps, start_value, stop_value) 127 | -------------------------------------------------------------------------------- /encoding/nn/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | from torch.autograd import Variable 5 | 6 | __all__ = ['LabelSmoothing', 'NLLMultiLabelSmooth', 'SegmentationLosses'] 7 | 8 | class LabelSmoothing(nn.Module): 9 | """ 10 | NLL loss with label smoothing. 11 | """ 12 | def __init__(self, smoothing=0.1): 13 | """ 14 | Constructor for the LabelSmoothing module. 15 | :param smoothing: label smoothing factor 16 | """ 17 | super(LabelSmoothing, self).__init__() 18 | self.confidence = 1.0 - smoothing 19 | self.smoothing = smoothing 20 | 21 | def forward(self, x, target): 22 | logprobs = torch.nn.functional.log_softmax(x, dim=-1) 23 | 24 | nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1)) 25 | nll_loss = nll_loss.squeeze(1) 26 | smooth_loss = -logprobs.mean(dim=-1) 27 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 28 | return loss.mean() 29 | 30 | class NLLMultiLabelSmooth(nn.Module): 31 | def __init__(self, smoothing = 0.1): 32 | super(NLLMultiLabelSmooth, self).__init__() 33 | self.confidence = 1.0 - smoothing 34 | self.smoothing = smoothing 35 | 36 | def forward(self, x, target): 37 | if self.training: 38 | x = x.float() 39 | target = target.float() 40 | logprobs = torch.nn.functional.log_softmax(x, dim = -1) 41 | 42 | nll_loss = -logprobs * target 43 | nll_loss = nll_loss.sum(-1) 44 | 45 | smooth_loss = -logprobs.mean(dim=-1) 46 | 47 | loss = self.confidence * nll_loss + self.smoothing * smooth_loss 48 | 49 | return loss.mean() 50 | else: 51 | return torch.nn.functional.cross_entropy(x, target) 52 | 53 | class SegmentationLosses(nn.CrossEntropyLoss): 54 | """2D Cross Entropy Loss with Auxilary Loss""" 55 | def __init__(self, se_loss=False, se_weight=0.2, nclass=-1, 56 | aux=False, aux_weight=0.4, weight=None, 57 | ignore_index=-1): 58 | super(SegmentationLosses, self).__init__(weight, None, ignore_index) 59 | self.se_loss = se_loss 60 | self.aux = aux 61 | self.nclass = nclass 62 | self.se_weight = se_weight 63 | self.aux_weight = aux_weight 64 | self.bceloss = nn.BCELoss(weight) 65 | 66 | def forward(self, *inputs): 67 | if not self.se_loss and not self.aux: 68 | return super(SegmentationLosses, self).forward(*inputs) 69 | elif not self.se_loss: 70 | pred1, pred2, target = tuple(inputs) 71 | loss1 = super(SegmentationLosses, self).forward(pred1, target) 72 | loss2 = super(SegmentationLosses, self).forward(pred2, target) 73 | return loss1 + self.aux_weight * loss2 74 | elif not self.aux: 75 | pred, se_pred, target = tuple(inputs) 76 | se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred) 77 | loss1 = super(SegmentationLosses, self).forward(pred, target) 78 | loss2 = self.bceloss(torch.sigmoid(se_pred), se_target) 79 | return loss1 + self.se_weight * loss2 80 | else: 81 | pred1, se_pred, pred2, target = tuple(inputs) 82 | se_target = self._get_batch_label_vector(target, nclass=self.nclass).type_as(pred1) 83 | loss1 = super(SegmentationLosses, self).forward(pred1, target) 84 | loss2 = super(SegmentationLosses, self).forward(pred2, target) 85 | loss3 = self.bceloss(torch.sigmoid(se_pred), se_target) 86 | return loss1 + self.aux_weight * loss2 + self.se_weight * loss3 87 | 88 | @staticmethod 89 | def _get_batch_label_vector(target, nclass): 90 | # target is a 3D Variable BxHxW, output is 2D BxnClass 91 | batch = target.size(0) 92 | tvect = Variable(torch.zeros(batch, nclass)) 93 | for i in range(batch): 94 | hist = torch.histc(target[i].cpu().data.float(), 95 | bins=nclass, min=0, 96 | max=nclass-1) 97 | vect = hist>0 98 | tvect[i] = vect 99 | return tvect 100 | -------------------------------------------------------------------------------- /encoding/nn/rectify.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | 9 | """Rectify Module""" 10 | import warnings 11 | 12 | import torch 13 | from torch.nn import Conv2d 14 | import torch.nn.functional as F 15 | from torch.nn.modules.utils import _pair 16 | 17 | from ..functions import rectify 18 | 19 | __all__ = ['RFConv2d'] 20 | 21 | 22 | class RFConv2d(Conv2d): 23 | """Rectified Convolution 24 | """ 25 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 26 | padding=0, dilation=1, groups=1, 27 | bias=True, padding_mode='zeros', 28 | average_mode=False): 29 | kernel_size = _pair(kernel_size) 30 | stride = _pair(stride) 31 | padding = _pair(padding) 32 | dilation = _pair(dilation) 33 | self.rectify = average_mode or (padding[0] > 0 or padding[1] > 0) 34 | self.average = average_mode 35 | 36 | super(RFConv2d, self).__init__( 37 | in_channels, out_channels, kernel_size, stride=stride, 38 | padding=padding, dilation=dilation, groups=groups, 39 | bias=bias, padding_mode=padding_mode) 40 | 41 | def _conv_forward(self, input, weight): 42 | if self.padding_mode != 'zeros': 43 | return F.conv2d(F.pad(input, self._padding_repeated_twice, mode=self.padding_mode), 44 | weight, self.bias, self.stride, 45 | _pair(0), self.dilation, self.groups) 46 | return F.conv2d(input, weight, self.bias, self.stride, 47 | self.padding, self.dilation, self.groups) 48 | 49 | def forward(self, input): 50 | output = self._conv_forward(input, self.weight) 51 | if self.rectify: 52 | output = rectify(output, input, self.kernel_size, self.stride, 53 | self.padding, self.dilation, self.average) 54 | return output 55 | 56 | def extra_repr(self): 57 | return super().extra_repr() + ', rectify={}, average_mode={}'. \ 58 | format(self.rectify, self.average) 59 | -------------------------------------------------------------------------------- /encoding/nn/splat.py: -------------------------------------------------------------------------------- 1 | """Split-Attention""" 2 | 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | from torch.nn import Conv2d, Module, Linear, BatchNorm2d, ReLU 7 | from torch.nn.modules.utils import _pair 8 | 9 | from .rectify import RFConv2d 10 | from .dropblock import DropBlock2D 11 | 12 | __all__ = ['SplAtConv2d'] 13 | 14 | class SplAtConv2d(Module): 15 | """Split-Attention Conv2d 16 | """ 17 | def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0), 18 | dilation=(1, 1), groups=1, bias=True, 19 | radix=2, reduction_factor=4, 20 | rectify=False, rectify_avg=False, norm_layer=None, 21 | dropblock_prob=0.0, **kwargs): 22 | super(SplAtConv2d, self).__init__() 23 | padding = _pair(padding) 24 | self.rectify = rectify and (padding[0] > 0 or padding[1] > 0) 25 | self.rectify_avg = rectify_avg 26 | inter_channels = max(in_channels*radix//reduction_factor, 32) 27 | self.radix = radix 28 | self.cardinality = groups 29 | self.channels = channels 30 | self.dropblock_prob = dropblock_prob 31 | if self.rectify: 32 | self.conv = RFConv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation, 33 | groups=groups*radix, bias=bias, average_mode=rectify_avg, **kwargs) 34 | else: 35 | self.conv = Conv2d(in_channels, channels*radix, kernel_size, stride, padding, dilation, 36 | groups=groups*radix, bias=bias, **kwargs) 37 | self.use_bn = norm_layer is not None 38 | self.bn0 = norm_layer(channels*radix) 39 | self.relu = ReLU(inplace=True) 40 | self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality) 41 | self.bn1 = norm_layer(inter_channels) 42 | self.fc2 = Conv2d(inter_channels, channels*radix, 1, groups=self.cardinality) 43 | if dropblock_prob > 0.0: 44 | self.dropblock = DropBlock2D(dropblock_prob, 3) 45 | self.rsoftmax = rSoftMax(radix, groups) 46 | 47 | def forward(self, x): 48 | x = self.conv(x) 49 | if self.use_bn: 50 | x = self.bn0(x) 51 | if self.dropblock_prob > 0.0: 52 | x = self.dropblock(x) 53 | x = self.relu(x) 54 | 55 | batch, channel = x.shape[:2] 56 | if self.radix > 1: 57 | splited = torch.split(x, channel//self.radix, dim=1) 58 | gap = sum(splited) 59 | else: 60 | gap = x 61 | gap = F.adaptive_avg_pool2d(gap, 1) 62 | gap = self.fc1(gap) 63 | 64 | if self.use_bn: 65 | gap = self.bn1(gap) 66 | gap = self.relu(gap) 67 | 68 | atten = self.fc2(gap) 69 | atten = self.rsoftmax(atten).view(batch, -1, 1, 1) 70 | 71 | if self.radix > 1: 72 | atten = torch.split(atten, channel//self.radix, dim=1) 73 | out = sum([att*split for (att, split) in zip(atten, splited)]) 74 | else: 75 | out = atten * x 76 | return out.contiguous() 77 | 78 | class rSoftMax(nn.Module): 79 | def __init__(self, radix, cardinality): 80 | super().__init__() 81 | self.radix = radix 82 | self.cardinality = cardinality 83 | 84 | def forward(self, x): 85 | batch = x.size(0) 86 | if self.radix > 1: 87 | x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2) 88 | x = F.softmax(x, dim=1) 89 | x = x.reshape(batch, -1) 90 | else: 91 | x = torch.sigmoid(x) 92 | return x 93 | -------------------------------------------------------------------------------- /encoding/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import * 2 | from .get_transform import get_transform 3 | -------------------------------------------------------------------------------- /encoding/transforms/get_transform.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | import torch 9 | from torchvision.transforms import * 10 | from .transforms import * 11 | 12 | def get_transform(dataset, base_size=None, crop_size=224, rand_aug=False, etrans=True, **kwargs): 13 | normalize = Normalize(mean=[0.485, 0.456, 0.406], 14 | std=[0.229, 0.224, 0.225]) 15 | base_size = base_size if base_size is not None else int(1.0 * crop_size / 0.875) 16 | if dataset == 'imagenet': 17 | train_transforms = [] 18 | val_transforms = [] 19 | if rand_aug: 20 | from .autoaug import RandAugment 21 | train_transforms.append(RandAugment(2, 12)) 22 | if etrans: 23 | train_transforms.extend([ 24 | ERandomCrop(crop_size), 25 | ]) 26 | val_transforms.extend([ 27 | ECenterCrop(crop_size), 28 | ]) 29 | 30 | else: 31 | train_transforms.extend([ 32 | RandomResizedCrop(crop_size), 33 | ]) 34 | val_transforms.extend([ 35 | Resize(base_size), 36 | CenterCrop(crop_size), 37 | ]) 38 | train_transforms.extend([ 39 | RandomHorizontalFlip(), 40 | ColorJitter(0.4, 0.4, 0.4), 41 | ToTensor(), 42 | Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']), 43 | normalize, 44 | ]) 45 | val_transforms.extend([ 46 | ToTensor(), 47 | normalize, 48 | ]) 49 | transform_train = Compose(train_transforms) 50 | transform_val = Compose(val_transforms) 51 | elif dataset == 'minc': 52 | transform_train = Compose([ 53 | Resize(base_size), 54 | RandomResizedCrop(crop_size), 55 | RandomHorizontalFlip(), 56 | ColorJitter(0.4, 0.4, 0.4), 57 | ToTensor(), 58 | Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']), 59 | normalize, 60 | ]) 61 | transform_val = Compose([ 62 | Resize(base_size), 63 | CenterCrop(crop_size), 64 | ToTensor(), 65 | normalize, 66 | ]) 67 | elif dataset == 'cifar10': 68 | transform_train = Compose([ 69 | RandomCrop(32, padding=4), 70 | RandomHorizontalFlip(), 71 | ToTensor(), 72 | Normalize((0.4914, 0.4822, 0.4465), 73 | (0.2023, 0.1994, 0.2010)), 74 | ]) 75 | transform_val = Compose([ 76 | ToTensor(), 77 | Normalize((0.4914, 0.4822, 0.4465), 78 | (0.2023, 0.1994, 0.2010)), 79 | ]) 80 | return transform_train, transform_val 81 | 82 | _imagenet_pca = { 83 | 'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), 84 | 'eigvec': torch.Tensor([ 85 | [-0.5675, 0.7192, 0.4009], 86 | [-0.5808, -0.0045, -0.8140], 87 | [-0.5836, -0.6948, 0.4203], 88 | ]) 89 | } 90 | -------------------------------------------------------------------------------- /encoding/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | import math 9 | import random 10 | 11 | from PIL import Image 12 | from torchvision.transforms import Resize 13 | 14 | __all__ = ['Lighting', 'ERandomCrop', 'ECenterCrop'] 15 | 16 | class Lighting(object): 17 | """Lighting noise(AlexNet - style PCA - based noise)""" 18 | 19 | def __init__(self, alphastd, eigval, eigvec): 20 | self.alphastd = alphastd 21 | self.eigval = eigval 22 | self.eigvec = eigvec 23 | 24 | def __call__(self, img): 25 | if self.alphastd == 0: 26 | return img 27 | 28 | alpha = img.new().resize_(3).normal_(0, self.alphastd) 29 | rgb = self.eigvec.type_as(img).clone()\ 30 | .mul(alpha.view(1, 3).expand(3, 3))\ 31 | .mul(self.eigval.view(1, 3).expand(3, 3))\ 32 | .sum(1).squeeze() 33 | 34 | return img.add(rgb.view(3, 1, 1).expand_as(img)) 35 | 36 | 37 | #https://github.com/kakaobrain/fast-autoaugment/blob/master/FastAutoAugment/data.py 38 | class ERandomCrop: 39 | def __init__(self, imgsize, min_covered=0.1, aspect_ratio_range=(3./4, 4./3), 40 | area_range=(0.1, 1.0), max_attempts=10): 41 | assert 0.0 < min_covered 42 | assert 0 < aspect_ratio_range[0] <= aspect_ratio_range[1] 43 | assert 0 < area_range[0] <= area_range[1] 44 | assert 1 <= max_attempts 45 | 46 | self.imgsize = imgsize 47 | self.min_covered = min_covered 48 | self.aspect_ratio_range = aspect_ratio_range 49 | self.area_range = area_range 50 | self.max_attempts = max_attempts 51 | self._fallback = ECenterCrop(imgsize) 52 | self.resize_method = Resize((imgsize, imgsize), interpolation=Image.BICUBIC) 53 | 54 | def __call__(self, img): 55 | original_width, original_height = img.size 56 | min_area = self.area_range[0] * (original_width * original_height) 57 | max_area = self.area_range[1] * (original_width * original_height) 58 | 59 | for _ in range(self.max_attempts): 60 | aspect_ratio = random.uniform(*self.aspect_ratio_range) 61 | height = int(round(math.sqrt(min_area / aspect_ratio))) 62 | max_height = int(round(math.sqrt(max_area / aspect_ratio))) 63 | 64 | if max_height * aspect_ratio > original_width: 65 | max_height = (original_width + 0.5 - 1e-7) / aspect_ratio 66 | max_height = int(max_height) 67 | if max_height * aspect_ratio > original_width: 68 | max_height -= 1 69 | 70 | if max_height > original_height: 71 | max_height = original_height 72 | 73 | if height >= max_height: 74 | height = max_height 75 | 76 | height = int(round(random.uniform(height, max_height))) 77 | width = int(round(height * aspect_ratio)) 78 | area = width * height 79 | 80 | if area < min_area or area > max_area: 81 | continue 82 | if width > original_width or height > original_height: 83 | continue 84 | if area < self.min_covered * (original_width * original_height): 85 | continue 86 | if width == original_width and height == original_height: 87 | return self._fallback(img) 88 | 89 | x = random.randint(0, original_width - width) 90 | y = random.randint(0, original_height - height) 91 | img = img.crop((x, y, x + width, y + height)) 92 | return self.resize_method(img) 93 | 94 | return self._fallback(img) 95 | 96 | 97 | class ECenterCrop: 98 | """Crop the given PIL Image and resize it to desired size. 99 | Args: 100 | img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image. 101 | output_size (sequence or int): (height, width) of the crop box. If int, 102 | it is used for both directions 103 | Returns: 104 | PIL Image: Cropped image. 105 | """ 106 | def __init__(self, imgsize): 107 | self.imgsize = imgsize 108 | self.resize_method = Resize((imgsize, imgsize), interpolation=Image.BICUBIC) 109 | 110 | def __call__(self, img): 111 | image_width, image_height = img.size 112 | image_short = min(image_width, image_height) 113 | 114 | crop_size = float(self.imgsize) / (self.imgsize + 32) * image_short 115 | 116 | crop_height, crop_width = crop_size, crop_size 117 | crop_top = int(round((image_height - crop_height) / 2.)) 118 | crop_left = int(round((image_width - crop_width) / 2.)) 119 | img = img.crop((crop_left, crop_top, crop_left + crop_width, crop_top + crop_height)) 120 | return self.resize_method(img) 121 | -------------------------------------------------------------------------------- /encoding/utils/__init__.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Encoding Util Tools""" 12 | from .lr_scheduler import * 13 | from .metrics import * 14 | from .pallete import get_mask_pallete 15 | from .train_helper import * 16 | from .presets import load_image 17 | from .files import * 18 | from .misc import * 19 | from .dist_helper import * 20 | -------------------------------------------------------------------------------- /encoding/utils/dist_helper.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import torch 12 | 13 | __all__ = ['torch_dist_sum'] 14 | 15 | def torch_dist_sum(gpu, *args): 16 | process_group = torch.distributed.group.WORLD 17 | tensor_args = [] 18 | pending_res = [] 19 | for arg in args: 20 | if isinstance(arg, torch.Tensor): 21 | tensor_arg = arg.clone().reshape(-1).detach().cuda(gpu) 22 | else: 23 | tensor_arg = torch.tensor(arg).reshape(-1).cuda(gpu) 24 | tensor_args.append(tensor_arg) 25 | pending_res.append(torch.distributed.all_reduce(tensor_arg, group=process_group, async_op=True)) 26 | for res in pending_res: 27 | res.wait() 28 | return tensor_args 29 | -------------------------------------------------------------------------------- /encoding/utils/files.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | import errno 4 | import shutil 5 | import hashlib 6 | from tqdm import tqdm 7 | import torch 8 | 9 | __all__ = ['save_checkpoint', 'download', 'mkdir', 'check_sha1'] 10 | 11 | def save_checkpoint(state, args, is_best, filename='checkpoint.pth.tar'): 12 | """Saves checkpoint to disk""" 13 | if hasattr(args, 'backbone'): 14 | directory = "runs/%s/%s/%s/%s/"%(args.dataset, args.model, args.backbone, args.checkname) 15 | else: 16 | directory = "runs/%s/%s/%s/"%(args.dataset, args.model, args.checkname) 17 | if not os.path.exists(directory): 18 | os.makedirs(directory) 19 | filename = directory + filename 20 | torch.save(state, filename) 21 | if is_best: 22 | shutil.copyfile(filename, directory + 'model_best.pth.tar') 23 | 24 | 25 | def download(url, path=None, overwrite=False, sha1_hash=None): 26 | """Download an given URL 27 | Parameters 28 | ---------- 29 | url : str 30 | URL to download 31 | path : str, optional 32 | Destination path to store downloaded file. By default stores to the 33 | current directory with same name as in url. 34 | overwrite : bool, optional 35 | Whether to overwrite destination file if already exists. 36 | sha1_hash : str, optional 37 | Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified 38 | but doesn't match. 39 | Returns 40 | ------- 41 | str 42 | The file path of the downloaded file. 43 | """ 44 | if path is None: 45 | fname = url.split('/')[-1] 46 | else: 47 | path = os.path.expanduser(path) 48 | if os.path.isdir(path): 49 | fname = os.path.join(path, url.split('/')[-1]) 50 | else: 51 | fname = path 52 | 53 | if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)): 54 | dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname))) 55 | if not os.path.exists(dirname): 56 | os.makedirs(dirname) 57 | 58 | print('Downloading %s from %s...'%(fname, url)) 59 | r = requests.get(url, stream=True) 60 | if r.status_code != 200: 61 | raise RuntimeError("Failed downloading url %s"%url) 62 | total_length = r.headers.get('content-length') 63 | with open(fname, 'wb') as f: 64 | if total_length is None: # no content length header 65 | for chunk in r.iter_content(chunk_size=1024): 66 | if chunk: # filter out keep-alive new chunks 67 | f.write(chunk) 68 | else: 69 | total_length = int(total_length) 70 | for chunk in tqdm(r.iter_content(chunk_size=1024), 71 | total=int(total_length / 1024. + 0.5), 72 | unit='KB', unit_scale=False, dynamic_ncols=True): 73 | f.write(chunk) 74 | 75 | if sha1_hash and not check_sha1(fname, sha1_hash): 76 | raise UserWarning('File {} is downloaded but the content hash does not match. ' \ 77 | 'The repo may be outdated or download may be incomplete. ' \ 78 | 'If the "repo_url" is overridden, consider switching to ' \ 79 | 'the default repo.'.format(fname)) 80 | 81 | return fname 82 | 83 | 84 | def check_sha1(filename, sha1_hash): 85 | """Check whether the sha1 hash of the file content matches the expected hash. 86 | Parameters 87 | ---------- 88 | filename : str 89 | Path to the file. 90 | sha1_hash : str 91 | Expected sha1 hash in hexadecimal digits. 92 | Returns 93 | ------- 94 | bool 95 | Whether the file content matches the expected hash. 96 | """ 97 | sha1 = hashlib.sha1() 98 | with open(filename, 'rb') as f: 99 | while True: 100 | data = f.read(1048576) 101 | if not data: 102 | break 103 | sha1.update(data) 104 | 105 | return sha1.hexdigest() == sha1_hash 106 | 107 | 108 | def mkdir(path): 109 | """make dir exists okay""" 110 | try: 111 | os.makedirs(path) 112 | except OSError as exc: # Python >2.5 113 | if exc.errno == errno.EEXIST and os.path.isdir(path): 114 | pass 115 | else: 116 | raise 117 | -------------------------------------------------------------------------------- /encoding/utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import math 12 | 13 | __all__ = ['LR_Scheduler', 'LR_Scheduler_Head'] 14 | 15 | class LR_Scheduler(object): 16 | """Learning Rate Scheduler 17 | 18 | Step mode: ``lr = baselr * 0.1 ^ {floor(epoch-1 / lr_step)}`` 19 | 20 | Cosine mode: ``lr = baselr * 0.5 * (1 + cos(iter/maxiter))`` 21 | 22 | Poly mode: ``lr = baselr * (1 - iter/maxiter) ^ 0.9`` 23 | 24 | Args: 25 | args: :attr:`args.lr_scheduler` lr scheduler mode (`cos`, `poly`), 26 | :attr:`args.lr` base learning rate, :attr:`args.epochs` number of epochs, 27 | :attr:`args.lr_step` 28 | 29 | iters_per_epoch: number of iterations per epoch 30 | """ 31 | def __init__(self, mode, base_lr, num_epochs, iters_per_epoch=0, 32 | lr_step=0, warmup_epochs=0, quiet=False): 33 | self.mode = mode 34 | self.quiet = quiet 35 | if not quiet: 36 | print('Using {} LR scheduler with warm-up epochs of {}!'.format(self.mode, warmup_epochs)) 37 | if mode == 'step': 38 | assert lr_step 39 | self.base_lr = base_lr 40 | self.lr_step = lr_step 41 | self.iters_per_epoch = iters_per_epoch 42 | self.epoch = -1 43 | self.warmup_iters = warmup_epochs * iters_per_epoch 44 | self.total_iters = (num_epochs - warmup_epochs) * iters_per_epoch 45 | 46 | def __call__(self, optimizer, i, epoch, best_pred): 47 | T = epoch * self.iters_per_epoch + i 48 | # warm up lr schedule 49 | if self.warmup_iters > 0 and T < self.warmup_iters: 50 | lr = self.base_lr * 1.0 * T / self.warmup_iters 51 | elif self.mode == 'cos': 52 | T = T - self.warmup_iters 53 | lr = 0.5 * self.base_lr * (1 + math.cos(1.0 * T / self.total_iters * math.pi)) 54 | elif self.mode == 'poly': 55 | T = T - self.warmup_iters 56 | lr = self.base_lr * pow((1 - 1.0 * T / self.total_iters), 0.9) 57 | elif self.mode == 'step': 58 | lr = self.base_lr * (0.1 ** (epoch // self.lr_step)) 59 | else: 60 | raise NotImplemented 61 | if epoch > self.epoch and (epoch == 0 or best_pred > 0.0): 62 | if not self.quiet: 63 | print('\n=>Epoch %i, learning rate = %.4f, \ 64 | previous best = %.4f' % (epoch, lr, best_pred)) 65 | self.epoch = epoch 66 | assert lr >= 0 67 | self._adjust_learning_rate(optimizer, lr) 68 | 69 | def _adjust_learning_rate(self, optimizer, lr): 70 | for i in range(len(optimizer.param_groups)): 71 | optimizer.param_groups[i]['lr'] = lr 72 | 73 | class LR_Scheduler_Head(LR_Scheduler): 74 | """Incease the additional head LR to be 10 times""" 75 | def _adjust_learning_rate(self, optimizer, lr): 76 | if len(optimizer.param_groups) == 1: 77 | optimizer.param_groups[0]['lr'] = lr 78 | else: 79 | # enlarge the lr at the head 80 | optimizer.param_groups[0]['lr'] = lr 81 | for i in range(1, len(optimizer.param_groups)): 82 | optimizer.param_groups[i]['lr'] = lr * 10 83 | -------------------------------------------------------------------------------- /encoding/utils/misc.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## LICENSE file in the root directory of this source tree 7 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 | import warnings 9 | 10 | __all__ = ['AverageMeter', 'EncodingDeprecationWarning'] 11 | 12 | class AverageMeter(object): 13 | """Computes and stores the average and current value""" 14 | def __init__(self): 15 | self.reset() 16 | 17 | def reset(self): 18 | #self.val = 0 19 | self.sum = 0 20 | self.count = 0 21 | 22 | def update(self, val, n=1): 23 | #self.val = val 24 | self.sum += val * n 25 | self.count += n 26 | 27 | @property 28 | def avg(self): 29 | avg = 0 if self.count == 0 else self.sum / self.count 30 | return avg 31 | 32 | class EncodingDeprecationWarning(DeprecationWarning): 33 | pass 34 | 35 | warnings.simplefilter('once', EncodingDeprecationWarning) 36 | -------------------------------------------------------------------------------- /encoding/utils/precise_bn.py: -------------------------------------------------------------------------------- 1 | # Adapted from https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/precise_bn.py 2 | import itertools 3 | from typing import Any, Iterable, List, Tuple, Type 4 | 5 | import torch 6 | from torch import nn 7 | from ..nn import DistSyncBatchNorm, SyncBatchNorm 8 | 9 | BN_MODULE_TYPES: Tuple[Type[nn.Module]] = ( 10 | torch.nn.BatchNorm1d, 11 | torch.nn.BatchNorm2d, 12 | torch.nn.BatchNorm3d, 13 | torch.nn.SyncBatchNorm, 14 | DistSyncBatchNorm, 15 | SyncBatchNorm, 16 | ) 17 | 18 | 19 | @torch.no_grad() 20 | def update_bn_stats( 21 | model: nn.Module, data_loader: Iterable[Any], num_iters: int = 200 # pyre-ignore 22 | ) -> None: 23 | """ 24 | Recompute and update the batch norm stats to make them more precise. During 25 | training both BN stats and the weight are changing after every iteration, so 26 | the running average can not precisely reflect the actual stats of the 27 | current model. 28 | In this function, the BN stats are recomputed with fixed weights, to make 29 | the running average more precise. Specifically, it computes the true average 30 | of per-batch mean/variance instead of the running average. 31 | Args: 32 | model (nn.Module): the model whose bn stats will be recomputed. 33 | Note that: 34 | 1. This function will not alter the training mode of the given model. 35 | Users are responsible for setting the layers that needs 36 | precise-BN to training mode, prior to calling this function. 37 | 2. Be careful if your models contain other stateful layers in 38 | addition to BN, i.e. layers whose state can change in forward 39 | iterations. This function will alter their state. If you wish 40 | them unchanged, you need to either pass in a submodule without 41 | those layers, or backup the states. 42 | data_loader (iterator): an iterator. Produce data as inputs to the model. 43 | num_iters (int): number of iterations to compute the stats. 44 | """ 45 | bn_layers = get_bn_modules(model) 46 | 47 | if len(bn_layers) == 0: 48 | return 49 | 50 | # In order to make the running stats only reflect the current batch, the 51 | # momentum is disabled. 52 | # bn.running_mean = (1 - momentum) * bn.running_mean + momentum * batch_mean 53 | # Setting the momentum to 1.0 to compute the stats without momentum. 54 | momentum_actual = [bn.momentum for bn in bn_layers] # pyre-ignore 55 | for bn in bn_layers: 56 | bn.momentum = 1.0 57 | 58 | # Note that running_var actually means "running average of variance" 59 | running_mean = [ 60 | torch.zeros_like(bn.running_mean) for bn in bn_layers # pyre-ignore 61 | ] 62 | running_var = [torch.zeros_like(bn.running_var) for bn in bn_layers] # pyre-ignore 63 | 64 | ind = -1 65 | for ind, inputs in enumerate(itertools.islice(data_loader, num_iters)): 66 | inputs=inputs.cuda() 67 | with torch.no_grad(): # No need to backward 68 | model(inputs) 69 | 70 | for i, bn in enumerate(bn_layers): 71 | # Accumulates the bn stats. 72 | running_mean[i] += (bn.running_mean - running_mean[i]) / (ind + 1) 73 | running_var[i] += (bn.running_var - running_var[i]) / (ind + 1) 74 | # We compute the "average of variance" across iterations. 75 | assert ind == num_iters - 1, ( 76 | "update_bn_stats is meant to run for {} iterations, " 77 | "but the dataloader stops at {} iterations.".format(num_iters, ind) 78 | ) 79 | 80 | for i, bn in enumerate(bn_layers): 81 | # Sets the precise bn stats. 82 | bn.running_mean = running_mean[i] 83 | bn.running_var = running_var[i] 84 | bn.momentum = momentum_actual[i] 85 | 86 | 87 | def get_bn_modules(model: nn.Module) -> List[nn.Module]: 88 | """ 89 | Find all BatchNorm (BN) modules that are in training mode. See 90 | fvcore.precise_bn.BN_MODULE_TYPES for a list of all modules that are 91 | included in this search. 92 | Args: 93 | model (nn.Module): a model possibly containing BN modules. 94 | Returns: 95 | list[nn.Module]: all BN modules in the model. 96 | """ 97 | # Finds all the bn layers. 98 | bn_layers = [ 99 | m for m in model.modules() if m.training and isinstance(m, BN_MODULE_TYPES) 100 | ] 101 | return bn_layers 102 | -------------------------------------------------------------------------------- /encoding/utils/presets.py: -------------------------------------------------------------------------------- 1 | """Preset Transforms for Demos""" 2 | from PIL import Image 3 | import numpy as np 4 | import torch 5 | import torchvision.transforms as transform 6 | 7 | __all__ = ['load_image'] 8 | 9 | input_transform = transform.Compose([ 10 | transform.ToTensor(), 11 | transform.Normalize([.485, .456, .406], [.229, .224, .225])]) 12 | 13 | def load_image(filename, size=None, scale=None, keep_asp=True, transform=input_transform): 14 | """Load the image for demos""" 15 | img = Image.open(filename).convert('RGB') 16 | if size is not None: 17 | if keep_asp: 18 | size2 = int(size * 1.0 / img.size[0] * img.size[1]) 19 | img = img.resize((size, size2), Image.ANTIALIAS) 20 | else: 21 | img = img.resize((size, size), Image.ANTIALIAS) 22 | elif scale is not None: 23 | img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS) 24 | 25 | if transform: 26 | img = transform(img) 27 | return img 28 | -------------------------------------------------------------------------------- /encoding/utils/train_helper.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import numpy as np 12 | import torch 13 | import torch.nn as nn 14 | 15 | #from ..nn import SyncBatchNorm 16 | from torch.nn.modules.batchnorm import _BatchNorm 17 | 18 | __all__ = ['MixUpWrapper', 'get_selabel_vector'] 19 | 20 | class MixUpWrapper(object): 21 | def __init__(self, alpha, num_classes, dataloader, device): 22 | self.alpha = alpha 23 | self.dataloader = dataloader 24 | self.num_classes = num_classes 25 | self.device = device 26 | 27 | def mixup_loader(self, loader): 28 | def mixup(alpha, num_classes, data, target): 29 | with torch.no_grad(): 30 | bs = data.size(0) 31 | c = np.random.beta(alpha, alpha) 32 | perm = torch.randperm(bs).cuda() 33 | 34 | md = c * data + (1-c) * data[perm, :] 35 | mt = c * target + (1-c) * target[perm, :] 36 | return md, mt 37 | 38 | for input, target in loader: 39 | input, target = input.cuda(self.device), target.cuda(self.device) 40 | target = torch.nn.functional.one_hot(target, self.num_classes) 41 | i, t = mixup(self.alpha, self.num_classes, input, target) 42 | yield i, t 43 | 44 | def __len__(self): 45 | return len(self.dataloader) 46 | 47 | def __iter__(self): 48 | return self.mixup_loader(self.dataloader) 49 | 50 | 51 | def get_selabel_vector(target, nclass): 52 | r"""Get SE-Loss Label in a batch 53 | Args: 54 | predict: input 4D tensor 55 | target: label 3D tensor (BxHxW) 56 | nclass: number of categories (int) 57 | Output: 58 | 2D tensor (BxnClass) 59 | """ 60 | batch = target.size(0) 61 | tvect = torch.zeros(batch, nclass) 62 | for i in range(batch): 63 | hist = torch.histc(target[i].data.float(), 64 | bins=nclass, min=0, 65 | max=nclass-1) 66 | vect = hist>0 67 | tvect[i] = vect 68 | return tvect 69 | -------------------------------------------------------------------------------- /experiments/recognition/README.md: -------------------------------------------------------------------------------- 1 | - [Link to Docs](https://hangzhang.org/PyTorch-Encoding/model_zoo/imagenet.html) 2 | -------------------------------------------------------------------------------- /experiments/recognition/resnet50_baseline.sh: -------------------------------------------------------------------------------- 1 | # baseline 2 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_check --lr 0.025 --batch-size 64 3 | 4 | # rectify 5 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt --lr 0.1 --batch-size 256 --rectify 6 | 7 | # warmup 8 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_warm --lr 0.1 --batch-size 256 --warmup-epochs 5 --rectify 9 | 10 | # no-bn-wd 11 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_nobnwd_warm --lr 0.1 --batch-size 256 --no-bn-wd --warmup-epochs 5 --rectify 12 | 13 | # LS 14 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_ls --lr 0.1 --batch-size 256 --label-smoothing 0.1 --rectify 15 | 16 | # Mixup + LS 17 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 200 --checkname resnet50_rt_ls_mixup --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --rectify 18 | 19 | # last-gamma 20 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 120 --checkname resnet50_rt_gamma --lr 0.1 --batch-size 256 --last-gamma --rectify 21 | 22 | # BoTs 23 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 200 --checkname resnet50_rt_bots --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --last-gamma --no-bn-wd --warmup-epochs 5 --rectify 24 | 25 | # resnet50d 26 | python train_dist.py --dataset imagenet --model resnet50d --lr-scheduler cos --epochs 200 --checkname resnet50d_rt_bots --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --last-gamma --no-bn-wd --warmup-epochs 5 --rectify 27 | 28 | # dropblock 29 | python train_dist.py --dataset imagenet --model resnet50 --lr-scheduler cos --epochs 200 --checkname --label-smoothing 0.1 --mixup 0.2 --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --dropblock-prob 0.1 --rectify 30 | 31 | # resnest50 32 | python train_dist.py --dataset imagenet --model resnest50 --lr-scheduler cos --epochs 270 --checkname resnest50_rt_bots --lr 0.1 --batch-size 256 --label-smoothing 0.1 --mixup 0.2 --last-gamma --no-bn-wd --warmup-epochs 5 --dropblock-prob 0.1 --rectify 33 | -------------------------------------------------------------------------------- /experiments/recognition/test_flops.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | import argparse 11 | import torch 12 | 13 | from thop import profile, clever_format 14 | 15 | import encoding 16 | 17 | def get_args(): 18 | # data settings 19 | parser = argparse.ArgumentParser(description='Deep Encoding') 20 | parser.add_argument('--crop-size', type=int, default=224, 21 | help='crop image size') 22 | # model params 23 | parser.add_argument('--model', type=str, default='densenet', 24 | help='network model type (default: densenet)') 25 | parser.add_argument('--rectify', action='store_true', 26 | default=False, help='rectify convolution') 27 | parser.add_argument('--rectify-avg', action='store_true', 28 | default=False, help='rectify convolution') 29 | # checking point 30 | parser = parser 31 | 32 | args = parser.parse_args() 33 | return args 34 | 35 | def main(): 36 | args = get_args() 37 | 38 | model_kwargs = {} 39 | if args.rectify: 40 | model_kwargs['rectified_conv'] = True 41 | model_kwargs['rectify_avg'] = args.rectify_avg 42 | 43 | model = encoding.models.get_model(args.model, **model_kwargs) 44 | print(model) 45 | 46 | dummy_images = torch.rand(1, 3, args.crop_size, args.crop_size) 47 | 48 | #count_ops(model, dummy_images, verbose=False) 49 | macs, params = profile(model, inputs=(dummy_images, )) 50 | macs, params = clever_format([macs, params], "%.3f") 51 | 52 | print(f"macs: {macs}, params: {params}") 53 | 54 | if __name__ == '__main__': 55 | main() 56 | -------------------------------------------------------------------------------- /experiments/segmentation/demo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import encoding 3 | 4 | # Get the model 5 | model = encoding.models.get_model('fcn_resnet50s_ade', pretrained=True).cuda() 6 | model.eval() 7 | 8 | # Prepare the image 9 | url = 'https://github.com/zhanghang1989/image-data/blob/master/' + \ 10 | 'encoding/segmentation/ade20k/ADE_val_00001142.jpg?raw=true' 11 | filename = 'example.jpg' 12 | img = encoding.utils.load_image( 13 | encoding.utils.download(url, filename)).cuda().unsqueeze(0) 14 | 15 | # Make prediction 16 | output = model.evaluate(img) 17 | predict = torch.max(output, 1)[1].cpu().numpy() + 1 18 | 19 | # Get color pallete for visualization 20 | mask = encoding.utils.get_mask_pallete(predict, 'ade20k') 21 | mask.save('output.png') 22 | -------------------------------------------------------------------------------- /experiments/segmentation/test_models.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import torch 3 | import encoding 4 | from option import Options 5 | from torch.autograd import Variable 6 | 7 | if __name__ == "__main__": 8 | args = Options().parse() 9 | model = encoding.models.get_segmentation_model(args.model, dataset=args.dataset, aux=args.aux, 10 | backbone=args.backbone, 11 | se_loss=args.se_loss, norm_layer=torch.nn.BatchNorm2d) 12 | print('Creating the model:') 13 | 14 | print(model) 15 | model.cuda() 16 | model.eval() 17 | x = Variable(torch.Tensor(4, 3, 480, 480)).cuda() 18 | with torch.no_grad(): 19 | out = model(x) 20 | for y in out: 21 | print(y.size()) 22 | -------------------------------------------------------------------------------- /scripts/build_docker.sh: -------------------------------------------------------------------------------- 1 | docker build --network=host -t encoding . 2 | -------------------------------------------------------------------------------- /scripts/prepare_ade20k.py: -------------------------------------------------------------------------------- 1 | """Prepare ADE20K dataset""" 2 | import os 3 | import shutil 4 | import argparse 5 | import zipfile 6 | from encoding.utils import download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Initialize ADE20K dataset.', 13 | epilog='Example: python prepare_ade20k.py', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk') 16 | args = parser.parse_args() 17 | return args 18 | 19 | def download_ade(path, overwrite=False): 20 | _AUG_DOWNLOAD_URLS = [ 21 | ('http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip', '219e1696abb36c8ba3a3afe7fb2f4b4606a897c7'), 22 | ('http://data.csail.mit.edu/places/ADEchallenge/release_test.zip', 'e05747892219d10e9243933371a497e905a4860c'),] 23 | download_dir = os.path.join(path, 'downloads') 24 | mkdir(download_dir) 25 | for url, checksum in _AUG_DOWNLOAD_URLS: 26 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 27 | # extract 28 | with zipfile.ZipFile(filename,"r") as zip_ref: 29 | zip_ref.extractall(path=path) 30 | 31 | 32 | if __name__ == '__main__': 33 | args = parse_args() 34 | mkdir(os.path.expanduser('~/.encoding/data')) 35 | if args.download_dir is not None: 36 | if os.path.isdir(_TARGET_DIR): 37 | os.remove(_TARGET_DIR) 38 | # make symlink 39 | os.symlink(args.download_dir, _TARGET_DIR) 40 | else: 41 | download_ade(_TARGET_DIR, overwrite=False) 42 | -------------------------------------------------------------------------------- /scripts/prepare_citys.py: -------------------------------------------------------------------------------- 1 | """Prepare Cityscapes dataset""" 2 | import os 3 | import shutil 4 | import argparse 5 | import zipfile 6 | from encoding.utils import check_sha1, download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Initialize ADE20K dataset.', 13 | epilog='Example: python prepare_cityscapes.py', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk') 16 | args = parser.parse_args() 17 | return args 18 | 19 | def download_city(path, overwrite=False): 20 | _CITY_DOWNLOAD_URLS = [ 21 | ('gtFine_trainvaltest.zip', '99f532cb1af174f5fcc4c5bc8feea8c66246ddbc'), 22 | ('leftImg8bit_trainvaltest.zip', '2c0b77ce9933cc635adda307fbba5566f5d9d404')] 23 | download_dir = os.path.join(path, 'downloads') 24 | mkdir(download_dir) 25 | for filename, checksum in _CITY_DOWNLOAD_URLS: 26 | if not check_sha1(filename, checksum): 27 | raise UserWarning('File {} is downloaded but the content hash does not match. ' \ 28 | 'The repo may be outdated or download may be incomplete. ' \ 29 | 'If the "repo_url" is overridden, consider switching to ' \ 30 | 'the default repo.'.format(filename)) 31 | # extract 32 | with zipfile.ZipFile(filename,"r") as zip_ref: 33 | zip_ref.extractall(path=path) 34 | print("Extracted", filename) 35 | 36 | if __name__ == '__main__': 37 | args = parse_args() 38 | mkdir(os.path.expanduser('~/.encoding/data')) 39 | if args.download_dir is not None: 40 | if os.path.isdir(_TARGET_DIR): 41 | os.remove(_TARGET_DIR) 42 | # make symlink 43 | os.symlink(args.download_dir, _TARGET_DIR) 44 | else: 45 | download_city(_TARGET_DIR, overwrite=False) 46 | -------------------------------------------------------------------------------- /scripts/prepare_coco.py: -------------------------------------------------------------------------------- 1 | """Prepare MS COCO datasets""" 2 | import os 3 | import shutil 4 | import argparse 5 | import zipfile 6 | from encoding.utils import download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Initialize MS COCO dataset.', 13 | epilog='Example: python mscoco.py --download-dir ~/mscoco', 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 15 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') 16 | args = parser.parse_args() 17 | return args 18 | 19 | def download_coco(path, overwrite=False): 20 | _DOWNLOAD_URLS = [ 21 | ('http://images.cocodataset.org/zips/train2017.zip', 22 | '10ad623668ab00c62c096f0ed636d6aff41faca5'), 23 | ('http://images.cocodataset.org/zips/val2017.zip', 24 | '4950dc9d00dbe1c933ee0170f5797584351d2a41'), 25 | ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', 26 | '8551ee4bb5860311e79dace7e79cb91e432e78b3'), 27 | #('http://images.cocodataset.org/annotations/stuff_annotations_trainval2017.zip', 28 | # '46cdcf715b6b4f67e980b529534e79c2edffe084'), 29 | #('http://images.cocodataset.org/zips/test2017.zip', 30 | # '99813c02442f3c112d491ea6f30cecf421d0e6b3'), 31 | ('https://hangzh.s3.amazonaws.com/encoding/data/coco/train_ids.pth', 32 | '12cd266f97c8d9ea86e15a11f11bcb5faba700b6'), 33 | ('https://hangzh.s3.amazonaws.com/encoding/data/coco/val_ids.pth', 34 | '4ce037ac33cbf3712fd93280a1c5e92dae3136bb'), 35 | ] 36 | mkdir(path) 37 | for url, checksum in _DOWNLOAD_URLS: 38 | filename = download(url, path=path, overwrite=overwrite, sha1_hash=checksum) 39 | # extract 40 | if os.path.splitext(filename)[1] == '.zip': 41 | with zipfile.ZipFile(filename) as zf: 42 | zf.extractall(path=path) 43 | else: 44 | shutil.move(filename, os.path.join(path, 'annotations/'+os.path.basename(filename))) 45 | 46 | 47 | def install_coco_api(): 48 | repo_url = "https://github.com/cocodataset/cocoapi" 49 | os.system("git clone " + repo_url) 50 | os.system("cd cocoapi/PythonAPI/ && python setup.py install") 51 | shutil.rmtree('cocoapi') 52 | try: 53 | import pycocotools 54 | except Exception: 55 | print("Installing COCO API failed, please install it manually %s"%(repo_url)) 56 | 57 | 58 | if __name__ == '__main__': 59 | args = parse_args() 60 | mkdir(os.path.expanduser('~/.encoding/data')) 61 | if args.download_dir is not None: 62 | if os.path.isdir(_TARGET_DIR): 63 | os.remove(_TARGET_DIR) 64 | # make symlink 65 | os.symlink(args.download_dir, _TARGET_DIR) 66 | else: 67 | download_coco(_TARGET_DIR, overwrite=False) 68 | install_coco_api() 69 | -------------------------------------------------------------------------------- /scripts/prepare_imagenet.py: -------------------------------------------------------------------------------- 1 | """Prepare the ImageNet dataset""" 2 | import os 3 | import argparse 4 | import tarfile 5 | import pickle 6 | import gzip 7 | import subprocess 8 | from tqdm import tqdm 9 | import subprocess 10 | from encoding.utils import check_sha1, download, mkdir 11 | 12 | _TARGET_DIR = os.path.expanduser('~/.encoding/data/ILSVRC2012') 13 | _TRAIN_TAR = 'ILSVRC2012_img_train.tar' 14 | _TRAIN_TAR_SHA1 = '43eda4fe35c1705d6606a6a7a633bc965d194284' 15 | _VAL_TAR = 'ILSVRC2012_img_val.tar' 16 | _VAL_TAR_SHA1 = '5f3f73da3395154b60528b2b2a2caf2374f5f178' 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser( 20 | description='Setup the ImageNet dataset.', 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 22 | parser.add_argument('--download-dir', required=True, 23 | help="The directory that contains downloaded tar files") 24 | parser.add_argument('--target-dir', default=_TARGET_DIR, 25 | help="The directory to store extracted images") 26 | parser.add_argument('--checksum', action='store_true', 27 | help="If check integrity before extracting.") 28 | parser.add_argument('--with-rec', action='store_true', 29 | help="If build image record files.") 30 | parser.add_argument('--num-thread', type=int, default=1, 31 | help="Number of threads to use when building image record file.") 32 | args = parser.parse_args() 33 | return args 34 | 35 | def check_file(filename, checksum, sha1): 36 | if not os.path.exists(filename): 37 | raise ValueError('File not found: '+filename) 38 | if checksum and not check_sha1(filename, sha1): 39 | raise ValueError('Corrupted file: '+filename) 40 | 41 | def extract_train(tar_fname, target_dir, with_rec=False, num_thread=1): 42 | mkdir(target_dir) 43 | with tarfile.open(tar_fname) as tar: 44 | print("Extracting "+tar_fname+"...") 45 | # extract each class one-by-one 46 | pbar = tqdm(total=len(tar.getnames())) 47 | for class_tar in tar: 48 | pbar.set_description('Extract '+class_tar.name) 49 | tar.extract(class_tar, target_dir) 50 | class_fname = os.path.join(target_dir, class_tar.name) 51 | class_dir = os.path.splitext(class_fname)[0] 52 | os.mkdir(class_dir) 53 | with tarfile.open(class_fname) as f: 54 | f.extractall(class_dir) 55 | os.remove(class_fname) 56 | pbar.update(1) 57 | pbar.close() 58 | 59 | def extract_val(tar_fname, target_dir, with_rec=False, num_thread=1): 60 | mkdir(target_dir) 61 | print('Extracting ' + tar_fname) 62 | with tarfile.open(tar_fname) as tar: 63 | tar.extractall(target_dir) 64 | # build rec file before images are moved into subfolders 65 | # move images to proper subfolders 66 | subprocess.call(["wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash"], 67 | cwd=target_dir, shell=True) 68 | 69 | 70 | def main(): 71 | args = parse_args() 72 | 73 | target_dir = os.path.expanduser(args.target_dir) 74 | #if os.path.exists(target_dir): 75 | # raise ValueError('Target dir ['+target_dir+'] exists. Remove it first') 76 | 77 | download_dir = os.path.expanduser(args.download_dir) 78 | train_tar_fname = os.path.join(download_dir, _TRAIN_TAR) 79 | check_file(train_tar_fname, args.checksum, _TRAIN_TAR_SHA1) 80 | val_tar_fname = os.path.join(download_dir, _VAL_TAR) 81 | check_file(val_tar_fname, args.checksum, _VAL_TAR_SHA1) 82 | 83 | build_rec = args.with_rec 84 | if build_rec: 85 | os.makedirs(os.path.join(target_dir, 'rec')) 86 | extract_train(train_tar_fname, os.path.join(target_dir, 'train'), build_rec, args.num_thread) 87 | extract_val(val_tar_fname, os.path.join(target_dir, 'val'), build_rec, args.num_thread) 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /scripts/prepare_minc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import argparse 4 | import tarfile 5 | from encoding.utils import download, mkdir 6 | 7 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='Initialize MINC dataset.', 12 | epilog='Example: python prepare_minc.py', 13 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 14 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') 15 | parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') 16 | parser.add_argument('--overwrite', action='store_true', 17 | help='overwrite downloaded files if set, in case they are corrputed') 18 | args = parser.parse_args() 19 | return args 20 | 21 | def download_minc(path, overwrite=False): 22 | _AUG_DOWNLOAD_URLS = [ 23 | ('http://opensurfaces.cs.cornell.edu/static/minc/minc-2500.tar.gz', 'bcccbb3b1ab396ef540f024a5ba23eff54f7fe31')] 24 | download_dir = os.path.join(path, 'downloads') 25 | mkdir(download_dir) 26 | for url, checksum in _AUG_DOWNLOAD_URLS: 27 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 28 | # extract 29 | with tarfile.open(filename) as tar: 30 | tar.extractall(path=path) 31 | 32 | if __name__ == '__main__': 33 | args = parse_args() 34 | mkdir(os.path.expanduser('~/.encoding/datasets')) 35 | if args.download_dir is not None: 36 | if os.path.isdir(_TARGET_DIR): 37 | os.remove(_TARGET_DIR) 38 | os.symlink(args.download_dir, _TARGET_DIR) 39 | else: 40 | download_minc(_TARGET_DIR, overwrite=False) 41 | -------------------------------------------------------------------------------- /scripts/prepare_pascal.py: -------------------------------------------------------------------------------- 1 | """Prepare PASCAL VOC datasets""" 2 | import os 3 | import shutil 4 | import argparse 5 | import tarfile 6 | from encoding.utils import download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Initialize PASCAL VOC dataset.', 14 | epilog='Example: python prepare_pascal.py', 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | parser.add_argument('--download-dir', type=str, default=None, help='dataset directory on disk') 17 | parser.add_argument('--no-download', action='store_true', help='disable automatic download if set') 18 | parser.add_argument('--overwrite', action='store_true', help='overwrite downloaded files if set, in case they are corrputed') 19 | args = parser.parse_args() 20 | return args 21 | 22 | 23 | def download_voc(path, overwrite=False): 24 | _DOWNLOAD_URLS = [ 25 | ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', 26 | '4e443f8a2eca6b1dac8a6c57641b67dd40621a49')] 27 | download_dir = os.path.join(path, 'downloads') 28 | mkdir(download_dir) 29 | for url, checksum in _DOWNLOAD_URLS: 30 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 31 | # extract 32 | with tarfile.open(filename) as tar: 33 | tar.extractall(path=path) 34 | 35 | 36 | def download_aug(path, overwrite=False): 37 | _AUG_DOWNLOAD_URLS = [ 38 | ('http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz', '7129e0a480c2d6afb02b517bb18ac54283bfaa35')] 39 | download_dir = os.path.join(path, 'downloads') 40 | mkdir(download_dir) 41 | for url, checksum in _AUG_DOWNLOAD_URLS: 42 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 43 | # extract 44 | with tarfile.open(filename) as tar: 45 | tar.extractall(path=path) 46 | shutil.move(os.path.join(path, 'benchmark_RELEASE'), 47 | os.path.join(path, 'VOCaug')) 48 | filenames = ['VOCaug/dataset/train.txt', 'VOCaug/dataset/val.txt'] 49 | # generate trainval.txt 50 | with open(os.path.join(path, 'VOCaug/dataset/trainval.txt'), 'w') as outfile: 51 | for fname in filenames: 52 | fname = os.path.join(path, fname) 53 | with open(fname) as infile: 54 | for line in infile: 55 | outfile.write(line) 56 | 57 | 58 | if __name__ == '__main__': 59 | args = parse_args() 60 | mkdir(os.path.expanduser('~/.encoding/datasets')) 61 | if args.download_dir is not None: 62 | if os.path.isdir(_TARGET_DIR): 63 | os.remove(_TARGET_DIR) 64 | os.symlink(args.download_dir, _TARGET_DIR) 65 | else: 66 | download_voc(_TARGET_DIR, overwrite=False) 67 | download_aug(_TARGET_DIR, overwrite=False) 68 | -------------------------------------------------------------------------------- /scripts/prepare_pcontext.py: -------------------------------------------------------------------------------- 1 | """Prepare PASCAL Context dataset""" 2 | import os 3 | import shutil 4 | import argparse 5 | import tarfile 6 | from encoding.utils import download, mkdir 7 | 8 | _TARGET_DIR = os.path.expanduser('~/.encoding/data') 9 | PASD_URL="https://codalabuser.blob.core.windows.net/public/%s" 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='Initialize PASCAL Context dataset.', 14 | epilog='Example: python prepare_pcontext.py', 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 16 | parser.add_argument('--download-dir', default=None, help='dataset directory on disk') 17 | args = parser.parse_args() 18 | return args 19 | 20 | def download_ade(path, overwrite=False): 21 | _AUG_DOWNLOAD_URLS = [ 22 | ('http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar', 23 | 'bf9985e9f2b064752bf6bd654d89f017c76c395a'), 24 | ('https://codalabuser.blob.core.windows.net/public/trainval_merged.json', 25 | '169325d9f7e9047537fedca7b04de4dddf10b881'), 26 | ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/train.pth', 27 | '4bfb49e8c1cefe352df876c9b5434e655c9c1d07'), 28 | ('https://hangzh.s3.amazonaws.com/encoding/data/pcontext/val.pth', 29 | 'ebedc94247ec616c57b9a2df15091784826a7b0c'), 30 | ] 31 | download_dir = os.path.join(path, 'downloads') 32 | mkdir(download_dir) 33 | for url, checksum in _AUG_DOWNLOAD_URLS: 34 | filename = download(url, path=download_dir, overwrite=overwrite, sha1_hash=checksum) 35 | # extract 36 | if os.path.splitext(filename)[1] == '.tar': 37 | with tarfile.open(filename) as tar: 38 | tar.extractall(path=path) 39 | else: 40 | shutil.move(filename, os.path.join(path, 'VOCdevkit/VOC2010/'+os.path.basename(filename))) 41 | 42 | def install_pcontext_api(): 43 | repo_url = "https://github.com/zhanghang1989/detail-api" 44 | os.system("git clone " + repo_url) 45 | os.system("cd detail-api/PythonAPI/ && python setup.py install") 46 | shutil.rmtree('detail-api') 47 | try: 48 | import detail 49 | except Exception: 50 | print("Installing PASCAL Context API failed, please install it manually %s"%(repo_url)) 51 | 52 | 53 | if __name__ == '__main__': 54 | args = parse_args() 55 | mkdir(os.path.expanduser('~/.encoding/data')) 56 | if args.download_dir is not None: 57 | if os.path.isdir(_TARGET_DIR): 58 | os.remove(_TARGET_DIR) 59 | # make symlink 60 | os.symlink(args.download_dir, _TARGET_DIR) 61 | else: 62 | download_ade(_TARGET_DIR, overwrite=False) 63 | install_pcontext_api() 64 | -------------------------------------------------------------------------------- /scripts/run_docker.sh: -------------------------------------------------------------------------------- 1 | docker run --rm -it --ulimit memlock=-1 --ulimit stack=67108864 -v $COCO_DIR:/coco --ipc=host --gpus=all encoding 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import io 12 | import os 13 | import glob 14 | import subprocess 15 | 16 | from setuptools import setup, find_packages 17 | 18 | import torch 19 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 20 | 21 | cwd = os.path.dirname(os.path.abspath(__file__)) 22 | 23 | version = '1.2.2' 24 | try: 25 | if not os.getenv('RELEASE'): 26 | from datetime import date 27 | today = date.today() 28 | day = today.strftime("b%Y%m%d") 29 | version += day 30 | except Exception: 31 | pass 32 | 33 | def create_version_file(): 34 | global version, cwd 35 | print('-- Building version ' + version) 36 | version_path = os.path.join(cwd, 'encoding', 'version.py') 37 | with open(version_path, 'w') as f: 38 | f.write('"""This is encoding version file."""\n') 39 | f.write("__version__ = '{}'\n".format(version)) 40 | 41 | requirements = [ 42 | 'numpy', 43 | 'tqdm', 44 | 'nose', 45 | 'portalocker', 46 | 'torch>=1.4.0', 47 | 'torchvision>=0.5.0', 48 | 'Pillow', 49 | 'scipy', 50 | 'requests', 51 | 'portalocker', 52 | ] 53 | 54 | def get_extensions(): 55 | this_dir = os.path.dirname(os.path.abspath(__file__)) 56 | 57 | cpu_extensions_dir = os.path.join(this_dir, "encoding", "lib", "cpu") 58 | gpu_extensions_dir = os.path.join(this_dir, "encoding", "lib", "gpu") 59 | 60 | source_cpu = glob.glob(os.path.join(cpu_extensions_dir, "*.cpp")) 61 | source_cuda = glob.glob(os.path.join(gpu_extensions_dir, "*.cpp")) + \ 62 | glob.glob(os.path.join(gpu_extensions_dir, "*.cu")) 63 | 64 | print('c++: ', source_cpu) 65 | print('cuda: ', source_cuda) 66 | 67 | sources = source_cpu 68 | 69 | extra_compile_args = {"cxx": []} 70 | include_dirs = [cpu_extensions_dir] 71 | 72 | ext_modules = [ 73 | CppExtension( 74 | "encoding.cpu", 75 | source_cpu, 76 | include_dirs=include_dirs, 77 | extra_compile_args=extra_compile_args, 78 | ) 79 | ] 80 | 81 | if CUDA_HOME is not None: 82 | define_macros = [("WITH_CUDA", None)] 83 | include_dirs += [gpu_extensions_dir] 84 | extra_compile_args["nvcc"] = [ 85 | "-DCUDA_HAS_FP16=1", 86 | "-D__CUDA_NO_HALF_OPERATORS__", 87 | "-D__CUDA_NO_HALF_CONVERSIONS__", 88 | "-D__CUDA_NO_HALF2_OPERATORS__", 89 | ] 90 | 91 | ext_modules.extend([ 92 | CUDAExtension( 93 | "encoding.gpu", 94 | source_cuda, 95 | include_dirs=include_dirs, 96 | define_macros=define_macros, 97 | extra_compile_args=extra_compile_args, 98 | ) 99 | ]) 100 | 101 | return ext_modules 102 | 103 | if __name__ == '__main__': 104 | create_version_file() 105 | setup( 106 | name="torch-encoding", 107 | version=version, 108 | author="Hang Zhang", 109 | author_email="zhanghang0704@gmail.com", 110 | url="https://github.com/zhanghang1989/PyTorch-Encoding", 111 | description="PyTorch Encoding Package", 112 | long_description=open('README.md').read(), 113 | long_description_content_type='text/markdown', 114 | license='MIT', 115 | install_requires=requirements, 116 | packages=find_packages(exclude=["tests", "experiments"]), 117 | package_data={ 'encoding': [ 118 | 'LICENSE', 119 | 'lib/cpu/*.h', 120 | 'lib/cpu/*.cpp', 121 | 'lib/gpu/*.h', 122 | 'lib/gpu/*.cpp', 123 | 'lib/gpu/*.cu', 124 | ]}, 125 | ext_modules=get_extensions(), 126 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 127 | ) 128 | -------------------------------------------------------------------------------- /tests/unit_test/test_dataset.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | from tqdm import tqdm 12 | from torch.utils import data 13 | import torchvision.transforms as transform 14 | from encoding.datasets import get_segmentation_dataset 15 | 16 | def test_ade_dataset(): 17 | 18 | def test_dataset(dataset_name): 19 | input_transform = transform.Compose([ 20 | transform.ToTensor(), 21 | transform.Normalize([.485, .456, .406], [.229, .224, .225])]) 22 | trainset = get_segmentation_dataset(dataset_name, split='val', mode='train', 23 | transform=input_transform) 24 | trainloader = data.DataLoader(trainset, batch_size=16, 25 | drop_last=True, shuffle=True) 26 | tbar = tqdm(trainloader) 27 | max_label = -10 28 | for i, (image, target) in enumerate(tbar): 29 | tmax = target.max().item() 30 | tmin = target.min().item() 31 | assert(tmin >= -1) 32 | if tmax > max_label: 33 | max_label = tmax 34 | assert(max_label < trainset.NUM_CLASS) 35 | tbar.set_description("Batch %d, max label %d"%(i, max_label)) 36 | test_dataset('ade20k') 37 | 38 | if __name__ == "__main__": 39 | import nose 40 | nose.runmodule() 41 | -------------------------------------------------------------------------------- /tests/unit_test/test_function.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import numpy as np 12 | import torch 13 | from torch.autograd import Variable, gradcheck 14 | import encoding 15 | 16 | EPS = 1e-3 17 | ATOL = 1e-3 18 | 19 | def _assert_tensor_close(a, b, atol=ATOL, rtol=EPS): 20 | npa, npb = a.cpu().numpy(), b.cpu().numpy() 21 | assert np.allclose(npa, npb, rtol=rtol, atol=atol), \ 22 | 'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format( 23 | a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max()) 24 | 25 | def test_aggregate(): 26 | B,N,K,D = 2,3,4,5 27 | A = Variable(torch.cuda.DoubleTensor(B,N,K).uniform_(-0.5,0.5), 28 | requires_grad=True) 29 | X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), 30 | requires_grad=True) 31 | C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), 32 | requires_grad=True) 33 | input = (A, X, C) 34 | test = gradcheck(encoding.functions.aggregate, input, eps=EPS, atol=ATOL) 35 | print('Testing aggregate(): {}'.format(test)) 36 | 37 | def test_scaled_l2(): 38 | B,N,K,D = 2,3,4,5 39 | X = Variable(torch.cuda.DoubleTensor(B,N,D).uniform_(-0.5,0.5), 40 | requires_grad=True) 41 | C = Variable(torch.cuda.DoubleTensor(K,D).uniform_(-0.5,0.5), 42 | requires_grad=True) 43 | S = Variable(torch.cuda.DoubleTensor(K).uniform_(-0.5,0.5), 44 | requires_grad=True) 45 | input = (X, C, S) 46 | test = gradcheck(encoding.functions.scaled_l2, input, eps=EPS, atol=ATOL) 47 | print('Testing scaled_l2(): {}'.format(test)) 48 | 49 | 50 | def test_moments(): 51 | B,C,H = 2,3,4 52 | X = Variable(torch.cuda.DoubleTensor(B,C,H).uniform_(-0.5,0.5), 53 | requires_grad=True) 54 | input = (X,) 55 | test = gradcheck(encoding.functions.moments, input, eps=EPS, atol=ATOL) 56 | print('Testing moments(): {}'.format(test)) 57 | 58 | def test_non_max_suppression(): 59 | def _test_nms(cuda): 60 | # check a small test case 61 | boxes = torch.Tensor([ 62 | [[10.2, 23., 50., 20.], 63 | [11.3, 23., 52., 20.1], 64 | [23.2, 102.3, 23.3, 50.3], 65 | [101.2, 32.4, 70.6, 70.], 66 | [100.2, 30.9, 70.7, 69.]], 67 | [[200.3, 234., 530., 320.], 68 | [110.3, 223., 152., 420.1], 69 | [243.2, 240.3, 50.3, 30.3], 70 | [243.2, 236.4, 48.6, 30.], 71 | [100.2, 310.9, 170.7, 691.]]]) 72 | 73 | scores = torch.Tensor([ 74 | [0.9, 0.7, 0.11, 0.23, 0.8], 75 | [0.13, 0.89, 0.45, 0.23, 0.3]]) 76 | 77 | if cuda: 78 | boxes = boxes.cuda() 79 | scores = scores.cuda() 80 | 81 | expected_output = ( 82 | torch.ByteTensor( 83 | [[1, 1, 0, 0, 1], [1, 1, 1, 0, 1]]), 84 | torch.LongTensor( 85 | [[0, 4, 1, 3, 2], [1, 2, 4, 3, 0]]) 86 | ) 87 | 88 | mask, inds = encoding.functions.NonMaxSuppression(boxes, scores, 0.7) 89 | _assert_tensor_close(mask, expected_output[0]) 90 | _assert_tensor_close(inds, expected_output[1]) 91 | 92 | _test_nms(False) 93 | _test_nms(True) 94 | 95 | if __name__ == '__main__': 96 | import nose 97 | nose.runmodule() 98 | -------------------------------------------------------------------------------- /tests/unit_test/test_model.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## Email: zhanghang0704@gmail.com 4 | ## Copyright (c) 2020 5 | ## 6 | ## This source code is licensed under the MIT-style license found in the 7 | ## LICENSE file in the root directory of this source tree 8 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 9 | 10 | import torch 11 | import encoding 12 | 13 | def test_model_inference(): 14 | x = torch.rand(1, 3, 224, 224) 15 | for model_name in encoding.models.pretrained_model_list(): 16 | print('Doing: ', model_name) 17 | if 'wideresnet' in model_name: continue # need multi-gpu 18 | model = encoding.models.get_model(model_name, pretrained=True) 19 | model.eval() 20 | y = model(x) 21 | 22 | if __name__ == "__main__": 23 | import nose 24 | nose.runmodule() 25 | -------------------------------------------------------------------------------- /tests/unit_test/test_module.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | import numpy as np 12 | import torch 13 | from torch.autograd import Variable, gradcheck 14 | import encoding 15 | 16 | EPS = 1e-3 17 | ATOL = 1e-3 18 | 19 | def _assert_tensor_close(a, b, atol=ATOL, rtol=EPS): 20 | npa, npb = a.cpu().numpy(), b.cpu().numpy() 21 | assert np.allclose(npa, npb, rtol=rtol, atol=atol), \ 22 | 'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format( 23 | a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max()) 24 | 25 | def test_encoding(): 26 | B,C,H,W,K = 2,3,4,5,6 27 | X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), 28 | requires_grad=True) 29 | input = (X,) 30 | layer = encoding.nn.Encoding(C,K).double().cuda() 31 | test = gradcheck(layer, input, eps=EPS, atol=ATOL) 32 | print('Testing encoding(): {}'.format(test)) 33 | 34 | def test_all_reduce(): 35 | ngpu = torch.cuda.device_count() 36 | X = [torch.DoubleTensor(2,4,4).uniform_(-0.5,0.5).cuda(i) for i in range(ngpu)] 37 | for x in X: 38 | x.requires_grad = True 39 | Y = encoding.parallel.allreduce(1, *X) 40 | assert (len(X) == len(Y)) 41 | for i in range(1, ngpu): 42 | _assert_tensor_close(Y[i].data, Y[0].data) 43 | input = (1, *X) 44 | test = gradcheck(encoding.parallel.allreduce, input, eps=EPS, atol=ATOL) 45 | print('Testing allreduce(): {}'.format(test)) 46 | 47 | def testSyncBN(): 48 | def _check_batchnorm_result(bn1, bn2, input, is_train, cuda=False): 49 | def _find_bn(module): 50 | for m in module.modules(): 51 | if isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d, 52 | encoding.nn.SyncBatchNorm)): 53 | return m 54 | def _syncParameters(bn1, bn2): 55 | bn1.reset_parameters() 56 | bn2.reset_parameters() 57 | if bn1.affine and bn2.affine: 58 | bn2.weight.data.copy_(bn1.weight.data) 59 | bn2.bias.data.copy_(bn1.bias.data) 60 | bn2.running_mean.copy_(bn1.running_mean) 61 | bn2.running_var.copy_(bn1.running_var) 62 | 63 | bn1.train(mode=is_train) 64 | bn2.train(mode=is_train) 65 | 66 | if cuda: 67 | input = input.cuda() 68 | # using the same values for gamma and beta 69 | _syncParameters(_find_bn(bn1), _find_bn(bn2)) 70 | 71 | input1 = Variable(input.clone().detach(), requires_grad=True) 72 | input2 = Variable(input.clone().detach(), requires_grad=True) 73 | if is_train: 74 | bn1.train() 75 | bn2.train() 76 | output1 = bn1(input1) 77 | output2 = bn2(input2) 78 | else: 79 | bn1.eval() 80 | bn2.eval() 81 | with torch.no_grad(): 82 | output1 = bn1(input1) 83 | output2 = bn2(input2) 84 | # assert forwarding 85 | #_assert_tensor_close(input1.data, input2.data) 86 | _assert_tensor_close(output1.data, output2.data) 87 | if not is_train: 88 | return 89 | (output1 ** 2).sum().backward() 90 | (output2 ** 2).sum().backward() 91 | _assert_tensor_close(_find_bn(bn1).bias.grad.data, _find_bn(bn2).bias.grad.data) 92 | _assert_tensor_close(_find_bn(bn1).weight.grad.data, _find_bn(bn2).weight.grad.data) 93 | _assert_tensor_close(input1.grad.data, input2.grad.data) 94 | _assert_tensor_close(_find_bn(bn1).running_mean, _find_bn(bn2).running_mean) 95 | #_assert_tensor_close(_find_bn(bn1).running_var, _find_bn(bn2).running_var) 96 | 97 | bn = torch.nn.BatchNorm2d(10).cuda().double() 98 | sync_bn = encoding.nn.SyncBatchNorm(10, inplace=True, sync=True).cuda().double() 99 | sync_bn = torch.nn.DataParallel(sync_bn).cuda() 100 | # check with unsync version 101 | #_check_batchnorm_result(bn, sync_bn, torch.rand(2, 1, 2, 2).double(), True, cuda=True) 102 | for i in range(10): 103 | print(i) 104 | _check_batchnorm_result(bn, sync_bn, torch.rand(16, 10, 16, 16).double(), True, cuda=True) 105 | _check_batchnorm_result(bn, sync_bn, torch.rand(16, 10, 16, 16).double(), False, cuda=True) 106 | 107 | 108 | def test_Atten_Module(): 109 | B, C, H, W = 8, 24, 10, 10 110 | X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), 111 | requires_grad=True) 112 | layer1 = encoding.nn.ACFModule(4, 2, 24, 24, 24).double().cuda() 113 | Y = layer1(X) 114 | 115 | if __name__ == '__main__': 116 | import nose 117 | nose.runmodule() 118 | -------------------------------------------------------------------------------- /tests/unit_test/test_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from encoding.utils.metrics import * 4 | 5 | def test_segmentation_metrics(): 6 | # check torch evaluation metrics 7 | rows, cols = 640, 480 8 | nclass = 30 9 | # numpy data 10 | im_lab = np.matrix(np.random.randint(0, nclass, size=(rows, cols))) 11 | mask = np.random.random((nclass, rows, cols)) 12 | im_pred = mask.argmax(axis=0) 13 | # torch data 14 | tim_lab = torch.from_numpy(im_lab).unsqueeze(0).long() 15 | tim_pred = torch.from_numpy(mask).unsqueeze(0) 16 | # numpy prediction 17 | pixel_correct, pixel_labeled = pixel_accuracy(im_pred, im_lab) 18 | area_inter, area_union = intersection_and_union(im_pred, im_lab, nclass) 19 | pixAcc = 1.0 * pixel_correct / (np.spacing(1) + pixel_labeled) 20 | IoU = 1.0 * area_inter / (np.spacing(1) + area_union) 21 | mIoU = IoU.mean() 22 | print('numpy predictionis :', pixAcc, mIoU) 23 | # torch metric prediction 24 | pixel_correct, pixel_labeled = batch_pix_accuracy(tim_pred, tim_lab) 25 | area_inter, area_union = batch_intersection_union(tim_pred, tim_lab, nclass) 26 | batch_pixAcc = 1.0 * pixel_correct / (np.spacing(1) + pixel_labeled) 27 | IoU = 1.0 * area_inter / (np.spacing(1) + area_union) 28 | batch_mIoU = IoU.mean() 29 | print('torch predictionis :', batch_pixAcc, batch_mIoU) 30 | assert (batch_pixAcc - pixAcc) < 1e-3 31 | assert (batch_mIoU - mIoU) < 1e-3 32 | --------------------------------------------------------------------------------