├── .gitignore ├── LICENSE ├── README.md ├── cls ├── configs │ ├── _base_ │ │ ├── models │ │ │ ├── rednet101.py │ │ │ ├── rednet152.py │ │ │ ├── rednet26.py │ │ │ ├── rednet38.py │ │ │ └── rednet50.py │ │ └── schedules │ │ │ └── imagenet_bs2048_coslr_130e.py │ └── rednet │ │ ├── rednet101_b32x64_warmup_coslr_imagenet.py │ │ ├── rednet152_b32x64_warmup_coslr_imagenet.py │ │ ├── rednet26_b32x64_warmup_coslr_imagenet.py │ │ ├── rednet38_b32x64_warmup_coslr_imagenet.py │ │ └── rednet50_b32x64_warmup_coslr_imagenet.py └── mmcls │ └── models │ ├── backbones │ ├── __init__.py │ └── rednet.py │ └── utils │ ├── involution_cuda.py │ └── involution_naive.py ├── det ├── configs │ ├── _base_ │ │ ├── models │ │ │ ├── faster_rcnn_red50_fpn.py │ │ │ ├── faster_rcnn_red50_neck_fpn.py │ │ │ ├── faster_rcnn_red50_neck_fpn_head.py │ │ │ ├── mask_rcnn_red50_fpn.py │ │ │ ├── mask_rcnn_red50_neck_fpn.py │ │ │ ├── mask_rcnn_red50_neck_fpn_head.py │ │ │ ├── retinanet_red50_fpn.py │ │ │ └── retinanet_red50_neck_fpn.py │ │ └── schedules │ │ │ └── schedule_1x_warmup.py │ └── involution │ │ ├── faster_rcnn_red50_fpn_1x_coco.py │ │ ├── faster_rcnn_red50_neck_fpn_1x_coco.py │ │ ├── faster_rcnn_red50_neck_fpn_head_1x_coco.py │ │ ├── mask_rcnn_red50_fpn_1x_coco.py │ │ ├── mask_rcnn_red50_neck_fpn_1x_coco.py │ │ ├── mask_rcnn_red50_neck_fpn_head_1x_coco.py │ │ ├── retinanet_red50_fpn_1x_coco.py │ │ └── retinanet_red50_neck_fpn_1x_coco.py └── mmdet │ ├── datasets │ └── utils.py │ └── models │ ├── backbones │ ├── __init__.py │ ├── base_backbone.py │ └── rednet.py │ ├── dense_heads │ ├── __init__.py │ └── rpn_head_involution.py │ ├── necks │ ├── __init__.py │ └── fpn_involution.py │ ├── roi_heads │ ├── __init__.py │ └── mask_heads │ │ ├── __init__.py │ │ └── fcn_mask_head_involution.py │ └── utils │ ├── involution_cuda.py │ └── involution_naive.py ├── fig ├── complexity.png ├── involution.png └── parameter.png └── seg ├── configs ├── _base_ │ └── models │ │ ├── fpn_red50.py │ │ ├── fpn_red50_neck.py │ │ └── upernet_red50.py └── involution │ ├── fpn_red50_512x1024_80k_cityscapes.py │ ├── fpn_red50_neck_512x1024_80k_cityscapes.py │ └── upernet_red50_512x1024_80k_cityscapes.py └── mmseg └── models ├── backbones ├── __init__.py ├── base_backbone.py └── rednet.py ├── necks ├── __init__.py └── fpn_involution.py └── utils ├── involution_cuda.py └── involution_naive.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Duo Li 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # involution 2 | 3 | Official implementation of a neural operator as described in [Involution: Inverting the Inherence of Convolution for Visual Recognition](https://arxiv.org/abs/2103.06255) (CVPR'21) 4 | 5 | By [Duo Li](https://duoli.org/), [Jie Hu](https://github.com/hujie-frank), [Changhu Wang](https://scholar.google.com/citations?user=DsVZkjAAAAAJ), [Xiangtai Li](https://github.com/lxtGH), [Qi She](https://scholar.google.com/citations?user=iHoGTt4AAAAJ), [Lei Zhu](https://github.com/zh460045050), [Tong Zhang](http://tongzhang-ml.org/), and [Qifeng Chen](https://cqf.io/) 6 | 7 |

8 | 9 | **TL; DR.** `involution` is a general-purpose neural primitive that is versatile for a spectrum of deep learning models on different vision tasks. `involution` bridges `convolution` and `self-attention` in design, while being more efficient and effective than `convolution`, simpler than `self-attention` in form. 10 | 11 |

12 | 13 | If you find our work useful in your research, please cite: 14 | ``` 15 | @InProceedings{Li_2021_CVPR, 16 | author = {Li, Duo and Hu, Jie and Wang, Changhu and Li, Xiangtai and She, Qi and Zhu, Lei and Zhang, Tong and Chen, Qifeng}, 17 | title = {Involution: Inverting the Inherence of Convolution for Visual Recognition}, 18 | booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 19 | month = {June}, 20 | year = {2021} 21 | } 22 | ``` 23 | 24 | ## Getting Started 25 | 26 | This repository is fully built upon the [OpenMMLab](https://openmmlab.com/) toolkits. For each individual task, the config and model files follow the same directory organization as [mmcls](https://github.com/open-mmlab/mmclassification), [mmdet](https://github.com/open-mmlab/mmdetection), and [mmseg](https://github.com/open-mmlab/mmsegmentation) respectively, so just copy-and-paste them to the corresponding locations to get started. 27 | 28 | For example, in terms of evaluating detectors 29 | ```shell 30 | git clone https://github.com/open-mmlab/mmdetection # and install 31 | 32 | # copy model files 33 | cp det/mmdet/models/backbones/* mmdetection/mmdet/models/backbones 34 | cp det/mmdet/models/necks/* mmdetection/mmdet/models/necks 35 | cp det/mmdet/models/dense_heads/* mmdetection/mmdet/models/dense_heads 36 | cp det/mmdet/models/roi_heads/* mmdetection/mmdet/models/roi_heads 37 | cp det/mmdet/models/roi_heads/mask_heads/* mmdetection/mmdet/models/roi_heads/mask_heads 38 | cp det/mmdet/models/utils/* mmdetection/mmdet/models/utils 39 | cp det/mmdet/datasets/* mmdetection/mmdet/datasets 40 | 41 | # copy config files 42 | cp det/configs/_base_/models/* mmdetection/configs/_base_/models 43 | cp det/configs/_base_/schedules/* mmdetection/configs/_base_/schedules 44 | cp det/configs/involution mmdetection/configs -r 45 | 46 | # evaluate checkpoints 47 | cd mmdetection 48 | bash tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] 49 | ``` 50 | 51 | For more detailed guidance, please refer to the original [mmcls](https://github.com/open-mmlab/mmclassification), [mmdet](https://github.com/open-mmlab/mmdetection), and [mmseg](https://github.com/open-mmlab/mmsegmentation) tutorials. 52 | 53 | Currently, we provide an memory-efficient implementation of the involuton operator based on [CuPy](https://cupy.dev/). Please install this library in advance. A customized CUDA kernel would bring about further acceleration on the hardware. Any contribution from the community regarding this is welcomed! 54 | 55 | ## Model Zoo 56 | 57 | The parameters/FLOPs↓ and performance↑ compared to the convolution baselines are marked in the parentheses. Part of these checkpoints are obtained in our reimplementation runs, whose performance may show slight differences with those reported in our paper. Models are trained with 64 GPUs on ImageNet, 8 GPUs on COCO, and 4 GPUs on Cityscapes. 58 | 59 | ### Image Classification on ImageNet 60 | 61 | | Model | Params(M) | FLOPs(G) | Top-1 (%) | Top-5 (%) | Config | Download | 62 | |:---------------------:|:---------:|:--------:|:---------:|:---------:|:---------:|:--------:| 63 | | RedNet-26 | 9.23(32.8%↓) | 1.73(29.2%↓) | 75.96 | 93.19 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet26_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EWmTnvB1cqtIi-OI4HfxGBgBKzO0w_qc3CnErHhNfBitlg?e=XPws5X) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EVJ_eDMSsr1JqhInx67OCxcB-P54pj3o5mGO_rYVsRSk3A?e=70tJAc) | 64 | | RedNet-38 | 12.39(36.7%↓) | 2.22(31.3%↓) | 77.48 | 93.57 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet38_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ETZIquU7P3lDvru0OAPiTYIBAt-B__2LpP_NeB4sR0hJsg?e=b9Rbl0) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/Ed62YcJgC-NCp72NpEsMLGABkb7f-EkCQ1X-RyLmAMYoUQ?e=Hqetbj) | 65 | | RedNet-50 | 15.54(39.5%↓) | 2.71(34.1%↓) | 78.35 | 94.13 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet50_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EZjRG3qUMu5IuR7YH4Giyc8B6koPvu6s8rOlIG8-BuFevg?e=f4ce5G) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ETL5NxDwnQpCldbJb906aOABjjuhZSquxKzK5xYQm-6Bhw?e=lOzEEf) | 66 | | RedNet-101 | 25.65(42.6%↓) | 4.74(40.5%↓) | 78.92 | 94.35 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet101_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EXAuVXdXz1xAg5eG-dkvwTUBkds2IOK1kglHtkMeGz5z_A?e=vHvh5y) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EbbiBxdZoZJFmTPSg9hW3BIBLRmRpfPa70nu8pi_8ddOSw?e=CdAV86) | 67 | | RedNet-152 | 33.99(43.5%↓) | 6.79(41.4%↓) | 79.12 | 94.38 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet152_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ERxcS4wXUCtPl4uUnPoT9vcByzhLA0eHgDE-fw_EESfP0w?e=x0dZWB) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EYr2Yx-p4w1AuT-Q3E7M2m0BFhAGDoYvxps09vYy4Cnj3A?e=XGxzPF) | 68 | 69 | Before finetuning on the following downstream tasks, download the ImageNet pre-trained [RedNet-50 weights](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EaVInpb6TGJApN6QCAWwKJAB3cK9Iz55QfJgmhhaV7yuHw?e=yuWxyI) and set the `pretrained` argument in `det/configs/_base_/models/*.py` or `seg/configs/_base_/models/*.py` to your local path. 70 | 71 | ### Object Detection and Instance Segmentation on COCO 72 | 73 | #### Faster R-CNN 74 | | Backbone | Neck | Head | Style | Lr schd | Params(M) | FLOPs(G) | box AP | Config | Download | 75 | | :-------------: | :---------: | :---------: | :-----: | :-----: |:---------:|:--------:| :----: | :------: | :--------: | 76 | | RedNet-50-FPN | convolution | convolution | pytorch | 1x | 31.6(23.9%↓) | 177.9(14.1%↓) | 39.5(1.8↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/faster_rcnn_red50_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ESOJAF74jK5HrevtBdMDku0Bgf71nC7F4UcMmGWER5z1_w?e=qGPdA5) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ESYSpzei_INMn1wu5qa0Su8B9YxXf_rOtib5xHjb1y2alA?e=Qn3lyd) | 77 | | RedNet-50-FPN | involution | convolution | pytorch | 1x | 29.5(28.9%↓) | 135.0(34.8%↓) | 40.2(2.5↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/faster_rcnn_red50_neck_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EV90stAJIXxEnDRe0QM0lvwB_jm9jwqwHoBOVVOqosPHJw?e=0QoikN) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/Ec8z-SZbJTxJrAJ3FLq0PSsB1Q7T1dXLvhfHmegQqH7rqA?e=5O9jDY) | 78 | | RedNet-50-FPN | involution | involution | pytorch | 1x | 29.0(30.1%↓) | 91.5(55.8%↓) | 39.2(1.5↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/faster_rcnn_red50_neck_fpn_head_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EeTwxsehR5VLhvf5TbTr8WwBmiNUwUeuXtbdOJlg0mFkmw?e=DL3gWX) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EUBsDdHQ10BKp8wW2aj2GHYBzhHtmW2BP65PIhn3KcSYqA?e=6dmNn7) | 79 | 80 | #### Mask R-CNN 81 | | Backbone | Neck | Head | Style | Lr schd | Params(M) | FLOPs(G) | box AP | mask AP | Config | Download | 82 | | :-------------: | :---------: | :---------: | :-----: | :-----: |:---------:|:--------:| :----: | :-----: | :------: | :--------: | 83 | | RedNet-50-FPN | convolution | convolution | pytorch | 1x | 34.2(22.6%↓) | 224.2(11.5%↓) | 39.9(1.5↑) | 35.7(0.6↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/mask_rcnn_red50_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EdheYm71X2pFu427_557zqcBmuKaLKEoU5R0Z2Kwo2alvg?e=qXShyW) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EQK-5qH_XxhHn4QnxmQbJ4cBL3sz9HqjS0EoybT2s1751g?e=4gpwK2) | 84 | | RedNet-50-FPN | involution | convolution | pytorch | 1x | 32.2(27.1%↓) | 181.3(28.5%↓) | 40.8(2.4↑) | 36.4(1.3↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/mask_rcnn_red50_neck_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EYYgUzXjJ3VBrscng-5QW_oB9wFK-dcqSDYB-LUXldFweg?e=idFEgd) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ETWdfYuhjY5AlGkUH11rLl4BLk9zsyKgwAbay47TYzIU-w?e=6ey6cD) | 85 | | RedNet-50-FPN | involution | involution | pytorch | 1x | 29.5(33.3%↓) | 104.6(58.7%↓) | 39.6(1.2↑) | 35.1(0.0↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/mask_rcnn_red50_neck_fpn_head_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EZwtdWXX8sBLp7L__TrmkykBPEe7kJInbkbUblP3PxuURQ?e=09l25P) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/Ebevxbj_0OtNkb3uCdpM0aoBeMQUABiQ0bDfZ9P9Jw1AZA?e=ZUcbUo) | 86 | 87 | #### RetinaNet 88 | | Backbone | Neck | Style | Lr schd | Params(M) | FLOPs(G) | box AP | Config | Download | 89 | | :-------------: | :---------: | :-----: | :-----: |:---------:|:--------:| :----: | :------: | :--------: | 90 | | RedNet-50-FPN | convolution | pytorch | 1x | 27.8(26.3%↓) | 210.1(12.2%↓) | 38.2(1.6↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/retinanet_red50_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EfUY9orEyCVCsYMlcDhIZ2wBBDw7k1HqfTm9u11KfTopmA?e=4Jhu79) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EQQ_EVDmVg1FlfgpAu9NF5wB6xe6qnqaYWKJw9lL7kRxdw?e=fXxjPg) | 91 | | RedNet-50-FPN | involution | pytorch | 1x | 26.3(30.2%↓) | 199.9(16.5%↓) | 38.2(1.6↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/retinanet_red50_neck_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EedZ3bMWZkJIvKjyLkTZHksBc_8wdOMHhFZA7RDewjPO8g?e=jsSjYI) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ES7chxQh5-lGr5--GqroMScBKNTNACyvosdVuThPvkZGkg?e=CrlN9F) | 92 | 93 | 94 | ### Semantic Segmentation on Cityscapes 95 | 96 | | Method | Backbone | Neck | Crop Size | Lr schd | Params(M) | FLOPs(G) | mIoU | Config | download | 97 | |--------|----------|------|-----------|--------:|:---------:|:--------:|------:|:------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 98 | | FPN | RedNet-50 | convolution | 512x1024 | 80000 | 18.5(35.1%↓) | 293.9(19.0%↓) | 78.0(3.6↑) | [config](https://github.com/d-li14/involution/blob/main/seg/configs/involution/fpn_red50_512x1024_80k_cityscapes.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EYstjiI28SJPohJE54wapFUBW5Wc95Di2Rsh0vf6K79vPw?e=lOvbkZ) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EXdupIgFuAlFuH854wThyXcBQTyL7YhK3wPYcR98rw7PJg?e=MyXx2w) | 99 | | FPN | RedNet-50 | involution | 512x1024 | 80000 | 16.4(42.5%↓) | 205.2(43.4%↓) | 79.1(4.7↑) | [config](https://github.com/d-li14/involution/blob/main/seg/configs/involution/fpn_red50_neck_512x1024_80k_cityscapes.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EZzDyESh0ElFp2pIFL1xN70BAj1EyvhFyqi0g7Mp1OZxog?e=F7kZYH) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EXcP_3ujO_1Juj8ap7rqDJ8BWZDCyJL86BWjeZiJ_FfLOw?e=47lvtq) | 100 | | UPerNet| RedNet-50 | convolution | 512x1024 | 80000 | 56.4(15.1%↓) | 1825.6(3.6%↓) | 80.6(2.4↑) | [config](https://github.com/d-li14/involution/blob/main/seg/configs/involution/upernet_red50_512x1024_80k_cityscapes.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/Eb8-frsvSuNAm7qQ6-H2DtEBdACuf-mUOBhvE3YIOiobmA?e=Ibb2cN) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EWhyFAZpxfRBoFi1myoT-RMB6-HeaP7NjSv88YQve4bZkg?e=wC8ccl) | 101 | -------------------------------------------------------------------------------- /cls/configs/_base_/models/rednet101.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='RedNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict( 16 | type='LabelSmoothLoss', 17 | loss_weight=1.0, 18 | label_smooth_val=0.1, 19 | num_classes=1000), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /cls/configs/_base_/models/rednet152.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='RedNet', 6 | depth=152, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict( 16 | type='LabelSmoothLoss', 17 | loss_weight=1.0, 18 | label_smooth_val=0.1, 19 | num_classes=1000), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /cls/configs/_base_/models/rednet26.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='RedNet', 6 | depth=26, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict( 16 | type='LabelSmoothLoss', 17 | loss_weight=1.0, 18 | label_smooth_val=0.1, 19 | num_classes=1000), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /cls/configs/_base_/models/rednet38.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='RedNet', 6 | depth=38, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict( 16 | type='LabelSmoothLoss', 17 | loss_weight=1.0, 18 | label_smooth_val=0.1, 19 | num_classes=1000), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /cls/configs/_base_/models/rednet50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='ImageClassifier', 4 | backbone=dict( 5 | type='RedNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(3, ), 9 | style='pytorch'), 10 | neck=dict(type='GlobalAveragePooling'), 11 | head=dict( 12 | type='LinearClsHead', 13 | num_classes=1000, 14 | in_channels=2048, 15 | loss=dict( 16 | type='LabelSmoothLoss', 17 | loss_weight=1.0, 18 | label_smooth_val=0.1, 19 | num_classes=1000), 20 | topk=(1, 5), 21 | )) 22 | -------------------------------------------------------------------------------- /cls/configs/_base_/schedules/imagenet_bs2048_coslr_130e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict( 3 | type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True) 4 | optimizer_config = dict(grad_clip=None) 5 | # learning policy 6 | lr_config = dict( 7 | policy='CosineAnnealing', 8 | min_lr=0, 9 | warmup='linear', 10 | warmup_iters=3130, 11 | warmup_ratio=0.25) 12 | runner = dict(type='EpochBasedRunner', max_epochs=130) 13 | -------------------------------------------------------------------------------- /cls/configs/rednet/rednet101_b32x64_warmup_coslr_imagenet.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/rednet101.py', '../_base_/datasets/imagenet_bs32.py', 3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /cls/configs/rednet/rednet152_b32x64_warmup_coslr_imagenet.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/rednet152.py', '../_base_/datasets/imagenet_bs32.py', 3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /cls/configs/rednet/rednet26_b32x64_warmup_coslr_imagenet.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/rednet26.py', '../_base_/datasets/imagenet_bs32.py', 3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /cls/configs/rednet/rednet38_b32x64_warmup_coslr_imagenet.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/rednet38.py', '../_base_/datasets/imagenet_bs32.py', 3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /cls/configs/rednet/rednet50_b32x64_warmup_coslr_imagenet.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/rednet50.py', '../_base_/datasets/imagenet_bs32.py', 3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /cls/mmcls/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .alexnet import AlexNet 2 | from .lenet import LeNet5 3 | from .mobilenet_v2 import MobileNetV2 4 | from .mobilenet_v3 import MobileNetv3 5 | from .regnet import RegNet 6 | from .resnest import ResNeSt 7 | from .resnet import ResNet, ResNetV1d 8 | from .resnet_cifar import ResNet_CIFAR 9 | from .resnext import ResNeXt 10 | from .seresnet import SEResNet 11 | from .seresnext import SEResNeXt 12 | from .shufflenet_v1 import ShuffleNetV1 13 | from .shufflenet_v2 import ShuffleNetV2 14 | from .vgg import VGG 15 | from .rednet import RedNet 16 | 17 | __all__ = [ 18 | 'LeNet5', 'AlexNet', 'VGG', 'RegNet', 'ResNet', 'ResNeXt', 'ResNetV1d', 19 | 'ResNeSt', 'ResNet_CIFAR', 'SEResNet', 'SEResNeXt', 'ShuffleNetV1', 20 | 'ShuffleNetV2', 'MobileNetV2', 'MobileNetv3', 21 | 'RedNet' 22 | ] 23 | -------------------------------------------------------------------------------- /cls/mmcls/models/backbones/rednet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.utils.checkpoint as cp 3 | from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer, 4 | constant_init, kaiming_init) 5 | from mmcv.utils.parrots_wrapper import _BatchNorm 6 | 7 | from ..builder import BACKBONES 8 | from .base_backbone import BaseBackbone 9 | from ..utils.involution_cuda import involution 10 | 11 | 12 | class Bottleneck(nn.Module): 13 | """Bottleneck block for ResNet. 14 | 15 | Args: 16 | in_channels (int): Input channels of this block. 17 | out_channels (int): Output channels of this block. 18 | expansion (int): The ratio of ``out_channels/mid_channels`` where 19 | ``mid_channels`` is the input/output channels of conv2. Default: 4. 20 | stride (int): stride of the block. Default: 1 21 | dilation (int): dilation of convolution. Default: 1 22 | downsample (nn.Module): downsample operation on identity branch. 23 | Default: None. 24 | style (str): ``"pytorch"`` or ``"caffe"``. If set to "pytorch", the 25 | stride-two layer is the 3x3 conv layer, otherwise the stride-two 26 | layer is the first 1x1 conv layer. Default: "pytorch". 27 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 28 | memory while slowing down the training speed. 29 | conv_cfg (dict): dictionary to construct and config conv layer. 30 | Default: None 31 | norm_cfg (dict): dictionary to construct and config norm layer. 32 | Default: dict(type='BN') 33 | """ 34 | 35 | def __init__(self, 36 | in_channels, 37 | out_channels, 38 | expansion=4, 39 | stride=1, 40 | dilation=1, 41 | downsample=None, 42 | style='pytorch', 43 | with_cp=False, 44 | conv_cfg=None, 45 | norm_cfg=dict(type='BN')): 46 | super(Bottleneck, self).__init__() 47 | assert style in ['pytorch', 'caffe'] 48 | 49 | self.in_channels = in_channels 50 | self.out_channels = out_channels 51 | self.expansion = expansion 52 | assert out_channels % expansion == 0 53 | self.mid_channels = out_channels // expansion 54 | self.stride = stride 55 | self.dilation = dilation 56 | self.style = style 57 | self.with_cp = with_cp 58 | self.conv_cfg = conv_cfg 59 | self.norm_cfg = norm_cfg 60 | 61 | if self.style == 'pytorch': 62 | self.conv1_stride = 1 63 | self.conv2_stride = stride 64 | else: 65 | self.conv1_stride = stride 66 | self.conv2_stride = 1 67 | 68 | self.norm1_name, norm1 = build_norm_layer( 69 | norm_cfg, self.mid_channels, postfix=1) 70 | self.norm2_name, norm2 = build_norm_layer( 71 | norm_cfg, self.mid_channels, postfix=2) 72 | self.norm3_name, norm3 = build_norm_layer( 73 | norm_cfg, out_channels, postfix=3) 74 | 75 | self.conv1 = build_conv_layer( 76 | conv_cfg, 77 | in_channels, 78 | self.mid_channels, 79 | kernel_size=1, 80 | stride=self.conv1_stride, 81 | bias=False) 82 | self.add_module(self.norm1_name, norm1) 83 | self.conv2 = involution(self.mid_channels, 7, self.conv2_stride) 84 | 85 | self.add_module(self.norm2_name, norm2) 86 | self.conv3 = build_conv_layer( 87 | conv_cfg, 88 | self.mid_channels, 89 | out_channels, 90 | kernel_size=1, 91 | bias=False) 92 | self.add_module(self.norm3_name, norm3) 93 | 94 | self.relu = nn.ReLU(inplace=True) 95 | self.downsample = downsample 96 | 97 | @property 98 | def norm1(self): 99 | return getattr(self, self.norm1_name) 100 | 101 | @property 102 | def norm2(self): 103 | return getattr(self, self.norm2_name) 104 | 105 | @property 106 | def norm3(self): 107 | return getattr(self, self.norm3_name) 108 | 109 | def forward(self, x): 110 | 111 | def _inner_forward(x): 112 | identity = x 113 | 114 | out = self.conv1(x) 115 | out = self.norm1(out) 116 | out = self.relu(out) 117 | 118 | out = self.conv2(out) 119 | out = self.norm2(out) 120 | out = self.relu(out) 121 | 122 | out = self.conv3(out) 123 | out = self.norm3(out) 124 | 125 | if self.downsample is not None: 126 | identity = self.downsample(x) 127 | 128 | out += identity 129 | 130 | return out 131 | 132 | if self.with_cp and x.requires_grad: 133 | out = cp.checkpoint(_inner_forward, x) 134 | else: 135 | out = _inner_forward(x) 136 | 137 | out = self.relu(out) 138 | 139 | return out 140 | 141 | 142 | def get_expansion(block, expansion=None): 143 | """Get the expansion of a residual block. 144 | 145 | The block expansion will be obtained by the following order: 146 | 147 | 1. If ``expansion`` is given, just return it. 148 | 2. If ``block`` has the attribute ``expansion``, then return 149 | ``block.expansion``. 150 | 3. Return the default value according the the block type: 151 | 1 for ``BasicBlock`` and 4 for ``Bottleneck``. 152 | 153 | Args: 154 | block (class): The block class. 155 | expansion (int | None): The given expansion ratio. 156 | 157 | Returns: 158 | int: The expansion of the block. 159 | """ 160 | if isinstance(expansion, int): 161 | assert expansion > 0 162 | elif expansion is None: 163 | if hasattr(block, 'expansion'): 164 | expansion = block.expansion 165 | elif issubclass(block, Bottleneck): 166 | expansion = 4 167 | else: 168 | raise TypeError(f'expansion is not specified for {block.__name__}') 169 | else: 170 | raise TypeError('expansion must be an integer or None') 171 | 172 | return expansion 173 | 174 | 175 | class ResLayer(nn.Sequential): 176 | """ResLayer to build ResNet style backbone. 177 | 178 | Args: 179 | block (nn.Module): Residual block used to build ResLayer. 180 | num_blocks (int): Number of blocks. 181 | in_channels (int): Input channels of this block. 182 | out_channels (int): Output channels of this block. 183 | expansion (int, optional): The expansion for BasicBlock/Bottleneck. 184 | If not specified, it will firstly be obtained via 185 | ``block.expansion``. If the block has no attribute "expansion", 186 | the following default values will be used: 1 for BasicBlock and 187 | 4 for Bottleneck. Default: None. 188 | stride (int): stride of the first block. Default: 1. 189 | avg_down (bool): Use AvgPool instead of stride conv when 190 | downsampling in the bottleneck. Default: False 191 | conv_cfg (dict): dictionary to construct and config conv layer. 192 | Default: None 193 | norm_cfg (dict): dictionary to construct and config norm layer. 194 | Default: dict(type='BN') 195 | """ 196 | 197 | def __init__(self, 198 | block, 199 | num_blocks, 200 | in_channels, 201 | out_channels, 202 | expansion=None, 203 | stride=1, 204 | avg_down=False, 205 | conv_cfg=None, 206 | norm_cfg=dict(type='BN'), 207 | **kwargs): 208 | self.block = block 209 | self.expansion = get_expansion(block, expansion) 210 | 211 | downsample = None 212 | if stride != 1 or in_channels != out_channels: 213 | downsample = [] 214 | conv_stride = stride 215 | if avg_down and stride != 1: 216 | conv_stride = 1 217 | downsample.append( 218 | nn.AvgPool2d( 219 | kernel_size=stride, 220 | stride=stride, 221 | ceil_mode=True, 222 | count_include_pad=False)) 223 | downsample.extend([ 224 | build_conv_layer( 225 | conv_cfg, 226 | in_channels, 227 | out_channels, 228 | kernel_size=1, 229 | stride=conv_stride, 230 | bias=False), 231 | build_norm_layer(norm_cfg, out_channels)[1] 232 | ]) 233 | downsample = nn.Sequential(*downsample) 234 | 235 | layers = [] 236 | layers.append( 237 | block( 238 | in_channels=in_channels, 239 | out_channels=out_channels, 240 | expansion=self.expansion, 241 | stride=stride, 242 | downsample=downsample, 243 | conv_cfg=conv_cfg, 244 | norm_cfg=norm_cfg, 245 | **kwargs)) 246 | in_channels = out_channels 247 | for i in range(1, num_blocks): 248 | layers.append( 249 | block( 250 | in_channels=in_channels, 251 | out_channels=out_channels, 252 | expansion=self.expansion, 253 | stride=1, 254 | conv_cfg=conv_cfg, 255 | norm_cfg=norm_cfg, 256 | **kwargs)) 257 | super(ResLayer, self).__init__(*layers) 258 | 259 | 260 | @BACKBONES.register_module() 261 | class RedNet(BaseBackbone): 262 | """ResNet backbone. 263 | 264 | Please refer to the `paper `_ for 265 | details. 266 | 267 | Args: 268 | depth (int): Network depth, from {18, 34, 50, 101, 152}. 269 | in_channels (int): Number of input image channels. Default: 3. 270 | stem_channels (int): Output channels of the stem layer. Default: 64. 271 | base_channels (int): Middle channels of the first stage. Default: 64. 272 | num_stages (int): Stages of the network. Default: 4. 273 | strides (Sequence[int]): Strides of the first block of each stage. 274 | Default: ``(1, 2, 2, 2)``. 275 | dilations (Sequence[int]): Dilation of each stage. 276 | Default: ``(1, 1, 1, 1)``. 277 | out_indices (Sequence[int]): Output from which stages. If only one 278 | stage is specified, a single tensor (feature map) is returned, 279 | otherwise multiple stages are specified, a tuple of tensors will 280 | be returned. Default: ``(3, )``. 281 | style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two 282 | layer is the 3x3 conv layer, otherwise the stride-two layer is 283 | the first 1x1 conv layer. 284 | deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv. 285 | Default: False. 286 | avg_down (bool): Use AvgPool instead of stride conv when 287 | downsampling in the bottleneck. Default: False. 288 | frozen_stages (int): Stages to be frozen (stop grad and set eval mode). 289 | -1 means not freezing any parameters. Default: -1. 290 | conv_cfg (dict | None): The config dict for conv layers. Default: None. 291 | norm_cfg (dict): The config dict for norm layers. 292 | norm_eval (bool): Whether to set norm layers to eval mode, namely, 293 | freeze running stats (mean and var). Note: Effect on Batch Norm 294 | and its variants only. Default: False. 295 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 296 | memory while slowing down the training speed. Default: False. 297 | zero_init_residual (bool): Whether to use zero init for last norm layer 298 | in resblocks to let them behave as identity. Default: True. 299 | 300 | Example: 301 | >>> from mmcls.models import ResNet 302 | >>> import torch 303 | >>> self = ResNet(depth=18) 304 | >>> self.eval() 305 | >>> inputs = torch.rand(1, 3, 32, 32) 306 | >>> level_outputs = self.forward(inputs) 307 | >>> for level_out in level_outputs: 308 | ... print(tuple(level_out.shape)) 309 | (1, 64, 8, 8) 310 | (1, 128, 4, 4) 311 | (1, 256, 2, 2) 312 | (1, 512, 1, 1) 313 | """ 314 | 315 | arch_settings = { 316 | 26: (Bottleneck, (1, 2, 4, 1)), 317 | 38: (Bottleneck, (2, 3, 5, 2)), 318 | 50: (Bottleneck, (3, 4, 6, 3)), 319 | 101: (Bottleneck, (3, 4, 23, 3)), 320 | 152: (Bottleneck, (3, 8, 36, 3)) 321 | } 322 | 323 | def __init__(self, 324 | depth, 325 | in_channels=3, 326 | stem_channels=64, 327 | base_channels=64, 328 | expansion=None, 329 | num_stages=4, 330 | strides=(1, 2, 2, 2), 331 | dilations=(1, 1, 1, 1), 332 | out_indices=(3, ), 333 | style='pytorch', 334 | avg_down=False, 335 | frozen_stages=-1, 336 | conv_cfg=None, 337 | norm_cfg=dict(type='BN', requires_grad=True), 338 | norm_eval=False, 339 | with_cp=False, 340 | zero_init_residual=True): 341 | super(RedNet, self).__init__() 342 | if depth not in self.arch_settings: 343 | raise KeyError(f'invalid depth {depth} for resnet') 344 | self.depth = depth 345 | self.stem_channels = stem_channels 346 | self.base_channels = base_channels 347 | self.num_stages = num_stages 348 | assert num_stages >= 1 and num_stages <= 4 349 | self.strides = strides 350 | self.dilations = dilations 351 | assert len(strides) == len(dilations) == num_stages 352 | self.out_indices = out_indices 353 | assert max(out_indices) < num_stages 354 | self.style = style 355 | self.avg_down = avg_down 356 | self.frozen_stages = frozen_stages 357 | self.conv_cfg = conv_cfg 358 | self.norm_cfg = norm_cfg 359 | self.with_cp = with_cp 360 | self.norm_eval = norm_eval 361 | self.zero_init_residual = zero_init_residual 362 | self.block, stage_blocks = self.arch_settings[depth] 363 | self.stage_blocks = stage_blocks[:num_stages] 364 | self.expansion = get_expansion(self.block, expansion) 365 | 366 | self._make_stem_layer(in_channels, stem_channels) 367 | 368 | self.res_layers = [] 369 | _in_channels = stem_channels 370 | _out_channels = base_channels * self.expansion 371 | for i, num_blocks in enumerate(self.stage_blocks): 372 | stride = strides[i] 373 | dilation = dilations[i] 374 | res_layer = self.make_res_layer( 375 | block=self.block, 376 | num_blocks=num_blocks, 377 | in_channels=_in_channels, 378 | out_channels=_out_channels, 379 | expansion=self.expansion, 380 | stride=stride, 381 | dilation=dilation, 382 | style=self.style, 383 | avg_down=self.avg_down, 384 | with_cp=with_cp, 385 | conv_cfg=conv_cfg, 386 | norm_cfg=norm_cfg) 387 | _in_channels = _out_channels 388 | _out_channels *= 2 389 | layer_name = f'layer{i + 1}' 390 | self.add_module(layer_name, res_layer) 391 | self.res_layers.append(layer_name) 392 | 393 | self._freeze_stages() 394 | 395 | self.feat_dim = res_layer[-1].out_channels 396 | 397 | def make_res_layer(self, **kwargs): 398 | return ResLayer(**kwargs) 399 | 400 | @property 401 | def norm1(self): 402 | return getattr(self, self.norm1_name) 403 | 404 | def _make_stem_layer(self, in_channels, stem_channels): 405 | self.stem = nn.Sequential( 406 | ConvModule( 407 | in_channels, 408 | stem_channels // 2, 409 | kernel_size=3, 410 | stride=2, 411 | padding=1, 412 | conv_cfg=self.conv_cfg, 413 | norm_cfg=self.norm_cfg, 414 | inplace=True), 415 | involution(stem_channels // 2, 3, 1), 416 | nn.BatchNorm2d(stem_channels // 2), 417 | nn.ReLU(inplace=True), 418 | ConvModule( 419 | stem_channels // 2, 420 | stem_channels, 421 | kernel_size=3, 422 | stride=1, 423 | padding=1, 424 | conv_cfg=self.conv_cfg, 425 | norm_cfg=self.norm_cfg, 426 | inplace=True)) 427 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 428 | 429 | def _freeze_stages(self): 430 | if self.frozen_stages >= 0: 431 | self.stem.eval() 432 | for param in self.stem.parameters(): 433 | param.requires_grad = False 434 | 435 | for i in range(1, self.frozen_stages + 1): 436 | m = getattr(self, f'layer{i}') 437 | m.eval() 438 | for param in m.parameters(): 439 | param.requires_grad = False 440 | 441 | def init_weights(self, pretrained=None): 442 | super(RedNet, self).init_weights(pretrained) 443 | if pretrained is None: 444 | for m in self.modules(): 445 | if isinstance(m, nn.Conv2d): 446 | kaiming_init(m) 447 | elif isinstance(m, (_BatchNorm, nn.GroupNorm)): 448 | constant_init(m, 1) 449 | 450 | if self.zero_init_residual: 451 | for m in self.modules(): 452 | if isinstance(m, Bottleneck): 453 | constant_init(m.norm3, 0) 454 | 455 | def forward(self, x): 456 | x = self.stem(x) 457 | x = self.maxpool(x) 458 | outs = [] 459 | for i, layer_name in enumerate(self.res_layers): 460 | res_layer = getattr(self, layer_name) 461 | x = res_layer(x) 462 | if i in self.out_indices: 463 | outs.append(x) 464 | if len(outs) == 1: 465 | return outs[0] 466 | else: 467 | return tuple(outs) 468 | 469 | def train(self, mode=True): 470 | super(RedNet, self).train(mode) 471 | self._freeze_stages() 472 | if mode and self.norm_eval: 473 | for m in self.modules(): 474 | # trick: eval have effect on BatchNorm only 475 | if isinstance(m, _BatchNorm): 476 | m.eval() 477 | 478 | -------------------------------------------------------------------------------- /cls/mmcls/models/utils/involution_cuda.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | import torch 3 | from torch.nn.modules.utils import _pair 4 | import torch.nn.functional as F 5 | import torch.nn as nn 6 | from mmcv.cnn import ConvModule 7 | 8 | 9 | from collections import namedtuple 10 | import cupy 11 | from string import Template 12 | 13 | 14 | Stream = namedtuple('Stream', ['ptr']) 15 | 16 | 17 | def Dtype(t): 18 | if isinstance(t, torch.cuda.FloatTensor): 19 | return 'float' 20 | elif isinstance(t, torch.cuda.DoubleTensor): 21 | return 'double' 22 | 23 | 24 | @cupy._util.memoize(for_each_device=True) 25 | def load_kernel(kernel_name, code, **kwargs): 26 | code = Template(code).substitute(**kwargs) 27 | kernel_code = cupy.cuda.compile_with_cache(code) 28 | return kernel_code.get_function(kernel_name) 29 | 30 | 31 | CUDA_NUM_THREADS = 1024 32 | 33 | kernel_loop = ''' 34 | #define CUDA_KERNEL_LOOP(i, n) \ 35 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 36 | i < (n); \ 37 | i += blockDim.x * gridDim.x) 38 | ''' 39 | 40 | 41 | def GET_BLOCKS(N): 42 | return (N + CUDA_NUM_THREADS - 1) // CUDA_NUM_THREADS 43 | 44 | 45 | _involution_kernel = kernel_loop + ''' 46 | extern "C" 47 | __global__ void involution_forward_kernel( 48 | const ${Dtype}* bottom_data, const ${Dtype}* weight_data, ${Dtype}* top_data) { 49 | CUDA_KERNEL_LOOP(index, ${nthreads}) { 50 | const int n = index / ${channels} / ${top_height} / ${top_width}; 51 | const int c = (index / ${top_height} / ${top_width}) % ${channels}; 52 | const int h = (index / ${top_width}) % ${top_height}; 53 | const int w = index % ${top_width}; 54 | const int g = c / (${channels} / ${groups}); 55 | ${Dtype} value = 0; 56 | #pragma unroll 57 | for (int kh = 0; kh < ${kernel_h}; ++kh) { 58 | #pragma unroll 59 | for (int kw = 0; kw < ${kernel_w}; ++kw) { 60 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h}; 61 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w}; 62 | if ((h_in >= 0) && (h_in < ${bottom_height}) 63 | && (w_in >= 0) && (w_in < ${bottom_width})) { 64 | const int offset = ((n * ${channels} + c) * ${bottom_height} + h_in) 65 | * ${bottom_width} + w_in; 66 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h) 67 | * ${top_width} + w; 68 | value += weight_data[offset_weight] * bottom_data[offset]; 69 | } 70 | } 71 | } 72 | top_data[index] = value; 73 | } 74 | } 75 | ''' 76 | 77 | 78 | _involution_kernel_backward_grad_input = kernel_loop + ''' 79 | extern "C" 80 | __global__ void involution_backward_grad_input_kernel( 81 | const ${Dtype}* const top_diff, const ${Dtype}* const weight_data, ${Dtype}* const bottom_diff) { 82 | CUDA_KERNEL_LOOP(index, ${nthreads}) { 83 | const int n = index / ${channels} / ${bottom_height} / ${bottom_width}; 84 | const int c = (index / ${bottom_height} / ${bottom_width}) % ${channels}; 85 | const int h = (index / ${bottom_width}) % ${bottom_height}; 86 | const int w = index % ${bottom_width}; 87 | const int g = c / (${channels} / ${groups}); 88 | ${Dtype} value = 0; 89 | #pragma unroll 90 | for (int kh = 0; kh < ${kernel_h}; ++kh) { 91 | #pragma unroll 92 | for (int kw = 0; kw < ${kernel_w}; ++kw) { 93 | const int h_out_s = h + ${pad_h} - kh * ${dilation_h}; 94 | const int w_out_s = w + ${pad_w} - kw * ${dilation_w}; 95 | if (((h_out_s % ${stride_h}) == 0) && ((w_out_s % ${stride_w}) == 0)) { 96 | const int h_out = h_out_s / ${stride_h}; 97 | const int w_out = w_out_s / ${stride_w}; 98 | if ((h_out >= 0) && (h_out < ${top_height}) 99 | && (w_out >= 0) && (w_out < ${top_width})) { 100 | const int offset = ((n * ${channels} + c) * ${top_height} + h_out) 101 | * ${top_width} + w_out; 102 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h_out) 103 | * ${top_width} + w_out; 104 | value += weight_data[offset_weight] * top_diff[offset]; 105 | } 106 | } 107 | } 108 | } 109 | bottom_diff[index] = value; 110 | } 111 | } 112 | ''' 113 | 114 | 115 | _involution_kernel_backward_grad_weight = kernel_loop + ''' 116 | extern "C" 117 | __global__ void involution_backward_grad_weight_kernel( 118 | const ${Dtype}* const top_diff, const ${Dtype}* const bottom_data, ${Dtype}* const buffer_data) { 119 | CUDA_KERNEL_LOOP(index, ${nthreads}) { 120 | const int h = (index / ${top_width}) % ${top_height}; 121 | const int w = index % ${top_width}; 122 | const int kh = (index / ${kernel_w} / ${top_height} / ${top_width}) 123 | % ${kernel_h}; 124 | const int kw = (index / ${top_height} / ${top_width}) % ${kernel_w}; 125 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h}; 126 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w}; 127 | if ((h_in >= 0) && (h_in < ${bottom_height}) 128 | && (w_in >= 0) && (w_in < ${bottom_width})) { 129 | const int g = (index / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${groups}; 130 | const int n = (index / ${groups} / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${num}; 131 | ${Dtype} value = 0; 132 | #pragma unroll 133 | for (int c = g * (${channels} / ${groups}); c < (g + 1) * (${channels} / ${groups}); ++c) { 134 | const int top_offset = ((n * ${channels} + c) * ${top_height} + h) 135 | * ${top_width} + w; 136 | const int bottom_offset = ((n * ${channels} + c) * ${bottom_height} + h_in) 137 | * ${bottom_width} + w_in; 138 | value += top_diff[top_offset] * bottom_data[bottom_offset]; 139 | } 140 | buffer_data[index] = value; 141 | } else { 142 | buffer_data[index] = 0; 143 | } 144 | } 145 | } 146 | ''' 147 | 148 | 149 | class _involution(Function): 150 | @staticmethod 151 | def forward(ctx, input, weight, stride, padding, dilation): 152 | assert input.dim() == 4 and input.is_cuda 153 | assert weight.dim() == 6 and weight.is_cuda 154 | batch_size, channels, height, width = input.size() 155 | kernel_h, kernel_w = weight.size()[2:4] 156 | output_h = int((height + 2 * padding[0] - (dilation[0] * (kernel_h - 1) + 1)) / stride[0] + 1) 157 | output_w = int((width + 2 * padding[1] - (dilation[1] * (kernel_w - 1) + 1)) / stride[1] + 1) 158 | 159 | output = input.new(batch_size, channels, output_h, output_w) 160 | n = output.numel() 161 | 162 | with torch.cuda.device_of(input): 163 | f = load_kernel('involution_forward_kernel', _involution_kernel, Dtype=Dtype(input), nthreads=n, 164 | num=batch_size, channels=channels, groups=weight.size()[1], 165 | bottom_height=height, bottom_width=width, 166 | top_height=output_h, top_width=output_w, 167 | kernel_h=kernel_h, kernel_w=kernel_w, 168 | stride_h=stride[0], stride_w=stride[1], 169 | dilation_h=dilation[0], dilation_w=dilation[1], 170 | pad_h=padding[0], pad_w=padding[1]) 171 | f(block=(CUDA_NUM_THREADS,1,1), 172 | grid=(GET_BLOCKS(n),1,1), 173 | args=[input.data_ptr(), weight.data_ptr(), output.data_ptr()], 174 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) 175 | 176 | ctx.save_for_backward(input, weight) 177 | ctx.stride, ctx.padding, ctx.dilation = stride, padding, dilation 178 | return output 179 | 180 | @staticmethod 181 | def backward(ctx, grad_output): 182 | assert grad_output.is_cuda and grad_output.is_contiguous() 183 | input, weight = ctx.saved_tensors 184 | stride, padding, dilation = ctx.stride, ctx.padding, ctx.dilation 185 | 186 | batch_size, channels, height, width = input.size() 187 | kernel_h, kernel_w = weight.size()[2:4] 188 | output_h, output_w = grad_output.size()[2:] 189 | 190 | grad_input, grad_weight = None, None 191 | 192 | opt = dict(Dtype=Dtype(grad_output), 193 | num=batch_size, channels=channels, groups=weight.size()[1], 194 | bottom_height=height, bottom_width=width, 195 | top_height=output_h, top_width=output_w, 196 | kernel_h=kernel_h, kernel_w=kernel_w, 197 | stride_h=stride[0], stride_w=stride[1], 198 | dilation_h=dilation[0], dilation_w=dilation[1], 199 | pad_h=padding[0], pad_w=padding[1]) 200 | 201 | with torch.cuda.device_of(input): 202 | if ctx.needs_input_grad[0]: 203 | grad_input = input.new(input.size()) 204 | 205 | n = grad_input.numel() 206 | opt['nthreads'] = n 207 | 208 | f = load_kernel('involution_backward_grad_input_kernel', 209 | _involution_kernel_backward_grad_input, **opt) 210 | f(block=(CUDA_NUM_THREADS,1,1), 211 | grid=(GET_BLOCKS(n),1,1), 212 | args=[grad_output.data_ptr(), weight.data_ptr(), grad_input.data_ptr()], 213 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) 214 | 215 | if ctx.needs_input_grad[1]: 216 | grad_weight = weight.new(weight.size()) 217 | 218 | n = grad_weight.numel() 219 | opt['nthreads'] = n 220 | 221 | f = load_kernel('involution_backward_grad_weight_kernel', 222 | _involution_kernel_backward_grad_weight, **opt) 223 | f(block=(CUDA_NUM_THREADS,1,1), 224 | grid=(GET_BLOCKS(n),1,1), 225 | args=[grad_output.data_ptr(), input.data_ptr(), grad_weight.data_ptr()], 226 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) 227 | 228 | return grad_input, grad_weight, None, None, None 229 | 230 | 231 | def _involution_cuda(input, weight, bias=None, stride=1, padding=0, dilation=1): 232 | """ involution kernel 233 | """ 234 | assert input.size(0) == weight.size(0) 235 | assert input.size(-2)//stride == weight.size(-2) 236 | assert input.size(-1)//stride == weight.size(-1) 237 | if input.is_cuda: 238 | out = _involution.apply(input, weight, _pair(stride), _pair(padding), _pair(dilation)) 239 | if bias is not None: 240 | out += bias.view(1,-1,1,1) 241 | else: 242 | raise NotImplementedError 243 | return out 244 | 245 | 246 | class involution(nn.Module): 247 | 248 | def __init__(self, 249 | channels, 250 | kernel_size, 251 | stride): 252 | super(involution, self).__init__() 253 | self.kernel_size = kernel_size 254 | self.stride = stride 255 | self.channels = channels 256 | reduction_ratio = 4 257 | self.group_channels = 16 258 | self.groups = self.channels // self.group_channels 259 | self.conv1 = ConvModule( 260 | in_channels=channels, 261 | out_channels=channels // reduction_ratio, 262 | kernel_size=1, 263 | conv_cfg=None, 264 | norm_cfg=dict(type='BN'), 265 | act_cfg=dict(type='ReLU')) 266 | self.conv2 = ConvModule( 267 | in_channels=channels // reduction_ratio, 268 | out_channels=kernel_size**2 * self.groups, 269 | kernel_size=1, 270 | stride=1, 271 | conv_cfg=None, 272 | norm_cfg=None, 273 | act_cfg=None) 274 | if stride > 1: 275 | self.avgpool = nn.AvgPool2d(stride, stride) 276 | 277 | def forward(self, x): 278 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x))) 279 | b, c, h, w = weight.shape 280 | weight = weight.view(b, self.groups, self.kernel_size, self.kernel_size, h, w) 281 | out = _involution_cuda(x, weight, stride=self.stride, padding=(self.kernel_size-1)//2) 282 | return out 283 | -------------------------------------------------------------------------------- /cls/mmcls/models/utils/involution_naive.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import ConvModule 3 | 4 | 5 | class involution(nn.Module): 6 | 7 | def __init__(self, 8 | channels, 9 | kernel_size, 10 | stride): 11 | super(involution, self).__init__() 12 | self.kernel_size = kernel_size 13 | self.stride = stride 14 | self.channels = channels 15 | reduction_ratio = 4 16 | self.group_channels = 16 17 | self.groups = self.channels // self.group_channels 18 | self.conv1 = ConvModule( 19 | in_channels=channels, 20 | out_channels=channels // reduction_ratio, 21 | kernel_size=1, 22 | conv_cfg=None, 23 | norm_cfg=dict(type='BN'), 24 | act_cfg=dict(type='ReLU')) 25 | self.conv2 = ConvModule( 26 | in_channels=channels // reduction_ratio, 27 | out_channels=kernel_size**2 * self.groups, 28 | kernel_size=1, 29 | stride=1, 30 | conv_cfg=None, 31 | norm_cfg=None, 32 | act_cfg=None) 33 | if stride > 1: 34 | self.avgpool = nn.AvgPool2d(stride, stride) 35 | self.unfold = nn.Unfold(kernel_size, 1, (kernel_size-1)//2, stride) 36 | 37 | def forward(self, x): 38 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x))) 39 | b, c, h, w = weight.shape 40 | weight = weight.view(b, self.groups, self.kernel_size**2, h, w).unsqueeze(2) 41 | out = self.unfold(x).view(b, self.groups, self.group_channels, self.kernel_size**2, h, w) 42 | out = (weight * out).sum(dim=3).view(b, self.channels, h, w) 43 | return out 44 | -------------------------------------------------------------------------------- /det/configs/_base_/models/faster_rcnn_red50_fpn.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FasterRCNN', 3 | #pretrained='torchvision://resnet50', 4 | pretrained='/path/to/rednet50.pth', 5 | backbone=dict( 6 | type='RedNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 56 | # model training and testing settings 57 | train_cfg=dict( 58 | rpn=dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.7, 62 | neg_iou_thr=0.3, 63 | min_pos_iou=0.3, 64 | match_low_quality=True, 65 | ignore_iof_thr=-1), 66 | sampler=dict( 67 | type='RandomSampler', 68 | num=256, 69 | pos_fraction=0.5, 70 | neg_pos_ub=-1, 71 | add_gt_as_proposals=False), 72 | allowed_border=-1, 73 | pos_weight=-1, 74 | debug=False), 75 | rpn_proposal=dict( 76 | nms_across_levels=False, 77 | nms_pre=2000, 78 | nms_post=1000, 79 | max_num=1000, 80 | nms_thr=0.7, 81 | min_bbox_size=0), 82 | rcnn=dict( 83 | assigner=dict( 84 | type='MaxIoUAssigner', 85 | pos_iou_thr=0.5, 86 | neg_iou_thr=0.5, 87 | min_pos_iou=0.5, 88 | match_low_quality=False, 89 | ignore_iof_thr=-1), 90 | sampler=dict( 91 | type='RandomSampler', 92 | num=512, 93 | pos_fraction=0.25, 94 | neg_pos_ub=-1, 95 | add_gt_as_proposals=True), 96 | pos_weight=-1, 97 | debug=False)), 98 | test_cfg=dict( 99 | rpn=dict( 100 | nms_across_levels=False, 101 | nms_pre=1000, 102 | nms_post=1000, 103 | max_num=1000, 104 | nms_thr=0.7, 105 | min_bbox_size=0), 106 | rcnn=dict( 107 | score_thr=0.05, 108 | nms=dict(type='nms', iou_threshold=0.5), 109 | max_per_img=100) 110 | # soft-nms is also supported for rcnn testing 111 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 112 | )) 113 | -------------------------------------------------------------------------------- /det/configs/_base_/models/faster_rcnn_red50_neck_fpn.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FasterRCNN', 3 | #pretrained='torchvision://resnet50', 4 | pretrained='/path/to/rednet50.pth', 5 | backbone=dict( 6 | type='RedNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN_involution', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 56 | # model training and testing settings 57 | train_cfg=dict( 58 | rpn=dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.7, 62 | neg_iou_thr=0.3, 63 | min_pos_iou=0.3, 64 | match_low_quality=True, 65 | ignore_iof_thr=-1), 66 | sampler=dict( 67 | type='RandomSampler', 68 | num=256, 69 | pos_fraction=0.5, 70 | neg_pos_ub=-1, 71 | add_gt_as_proposals=False), 72 | allowed_border=-1, 73 | pos_weight=-1, 74 | debug=False), 75 | rpn_proposal=dict( 76 | nms_across_levels=False, 77 | nms_pre=2000, 78 | nms_post=1000, 79 | max_num=1000, 80 | nms_thr=0.7, 81 | min_bbox_size=0), 82 | rcnn=dict( 83 | assigner=dict( 84 | type='MaxIoUAssigner', 85 | pos_iou_thr=0.5, 86 | neg_iou_thr=0.5, 87 | min_pos_iou=0.5, 88 | match_low_quality=False, 89 | ignore_iof_thr=-1), 90 | sampler=dict( 91 | type='RandomSampler', 92 | num=512, 93 | pos_fraction=0.25, 94 | neg_pos_ub=-1, 95 | add_gt_as_proposals=True), 96 | pos_weight=-1, 97 | debug=False)), 98 | test_cfg=dict( 99 | rpn=dict( 100 | nms_across_levels=False, 101 | nms_pre=1000, 102 | nms_post=1000, 103 | max_num=1000, 104 | nms_thr=0.7, 105 | min_bbox_size=0), 106 | rcnn=dict( 107 | score_thr=0.05, 108 | nms=dict(type='nms', iou_threshold=0.5), 109 | max_per_img=100) 110 | # soft-nms is also supported for rcnn testing 111 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 112 | )) 113 | -------------------------------------------------------------------------------- /det/configs/_base_/models/faster_rcnn_red50_neck_fpn_head.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FasterRCNN', 3 | #pretrained='torchvision://resnet50', 4 | pretrained='/path/to/rednet50.pth', 5 | backbone=dict( 6 | type='RedNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN_involution', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead_involution', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 56 | # model training and testing settings 57 | train_cfg=dict( 58 | rpn=dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.7, 62 | neg_iou_thr=0.3, 63 | min_pos_iou=0.3, 64 | match_low_quality=True, 65 | ignore_iof_thr=-1), 66 | sampler=dict( 67 | type='RandomSampler', 68 | num=256, 69 | pos_fraction=0.5, 70 | neg_pos_ub=-1, 71 | add_gt_as_proposals=False), 72 | allowed_border=-1, 73 | pos_weight=-1, 74 | debug=False), 75 | rpn_proposal=dict( 76 | nms_across_levels=False, 77 | nms_pre=2000, 78 | nms_post=1000, 79 | max_num=1000, 80 | nms_thr=0.7, 81 | min_bbox_size=0), 82 | rcnn=dict( 83 | assigner=dict( 84 | type='MaxIoUAssigner', 85 | pos_iou_thr=0.5, 86 | neg_iou_thr=0.5, 87 | min_pos_iou=0.5, 88 | match_low_quality=False, 89 | ignore_iof_thr=-1), 90 | sampler=dict( 91 | type='RandomSampler', 92 | num=512, 93 | pos_fraction=0.25, 94 | neg_pos_ub=-1, 95 | add_gt_as_proposals=True), 96 | pos_weight=-1, 97 | debug=False)), 98 | test_cfg=dict( 99 | rpn=dict( 100 | nms_across_levels=False, 101 | nms_pre=1000, 102 | nms_post=1000, 103 | max_num=1000, 104 | nms_thr=0.7, 105 | min_bbox_size=0), 106 | rcnn=dict( 107 | score_thr=0.05, 108 | nms=dict(type='nms', iou_threshold=0.5), 109 | max_per_img=100) 110 | # soft-nms is also supported for rcnn testing 111 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 112 | )) 113 | -------------------------------------------------------------------------------- /det/configs/_base_/models/mask_rcnn_red50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | #pretrained='torchvision://resnet50', 5 | pretrained='/path/to/rednet50.pth', 6 | backbone=dict( 7 | type='RedNet', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='pytorch'), 15 | neck=dict( 16 | type='FPN', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | num_outs=5), 20 | rpn_head=dict( 21 | type='RPNHead', 22 | in_channels=256, 23 | feat_channels=256, 24 | anchor_generator=dict( 25 | type='AnchorGenerator', 26 | scales=[8], 27 | ratios=[0.5, 1.0, 2.0], 28 | strides=[4, 8, 16, 32, 64]), 29 | bbox_coder=dict( 30 | type='DeltaXYWHBBoxCoder', 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0]), 33 | loss_cls=dict( 34 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 35 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 36 | roi_head=dict( 37 | type='StandardRoIHead', 38 | bbox_roi_extractor=dict( 39 | type='SingleRoIExtractor', 40 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 41 | out_channels=256, 42 | featmap_strides=[4, 8, 16, 32]), 43 | bbox_head=dict( 44 | type='Shared2FCBBoxHead', 45 | in_channels=256, 46 | fc_out_channels=1024, 47 | roi_feat_size=7, 48 | num_classes=80, 49 | bbox_coder=dict( 50 | type='DeltaXYWHBBoxCoder', 51 | target_means=[0., 0., 0., 0.], 52 | target_stds=[0.1, 0.1, 0.2, 0.2]), 53 | reg_class_agnostic=False, 54 | loss_cls=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 56 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 57 | mask_roi_extractor=dict( 58 | type='SingleRoIExtractor', 59 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 60 | out_channels=256, 61 | featmap_strides=[4, 8, 16, 32]), 62 | mask_head=dict( 63 | type='FCNMaskHead', 64 | num_convs=4, 65 | in_channels=256, 66 | conv_out_channels=256, 67 | num_classes=80, 68 | loss_mask=dict( 69 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 70 | # model training and testing settings 71 | train_cfg=dict( 72 | rpn=dict( 73 | assigner=dict( 74 | type='MaxIoUAssigner', 75 | pos_iou_thr=0.7, 76 | neg_iou_thr=0.3, 77 | min_pos_iou=0.3, 78 | match_low_quality=True, 79 | ignore_iof_thr=-1), 80 | sampler=dict( 81 | type='RandomSampler', 82 | num=256, 83 | pos_fraction=0.5, 84 | neg_pos_ub=-1, 85 | add_gt_as_proposals=False), 86 | allowed_border=-1, 87 | pos_weight=-1, 88 | debug=False), 89 | rpn_proposal=dict( 90 | nms_across_levels=False, 91 | nms_pre=2000, 92 | nms_post=1000, 93 | max_num=1000, 94 | nms_thr=0.7, 95 | min_bbox_size=0), 96 | rcnn=dict( 97 | assigner=dict( 98 | type='MaxIoUAssigner', 99 | pos_iou_thr=0.5, 100 | neg_iou_thr=0.5, 101 | min_pos_iou=0.5, 102 | match_low_quality=True, 103 | ignore_iof_thr=-1), 104 | sampler=dict( 105 | type='RandomSampler', 106 | num=512, 107 | pos_fraction=0.25, 108 | neg_pos_ub=-1, 109 | add_gt_as_proposals=True), 110 | mask_size=28, 111 | pos_weight=-1, 112 | debug=False)), 113 | test_cfg=dict( 114 | rpn=dict( 115 | nms_across_levels=False, 116 | nms_pre=1000, 117 | nms_post=1000, 118 | max_num=1000, 119 | nms_thr=0.7, 120 | min_bbox_size=0), 121 | rcnn=dict( 122 | score_thr=0.05, 123 | nms=dict(type='nms', iou_threshold=0.5), 124 | max_per_img=100, 125 | mask_thr_binary=0.5))) 126 | -------------------------------------------------------------------------------- /det/configs/_base_/models/mask_rcnn_red50_neck_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | #pretrained='torchvision://resnet50', 5 | pretrained='/path/to/rednet50.pth', 6 | backbone=dict( 7 | type='RedNet', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='pytorch'), 15 | neck=dict( 16 | type='FPN_involution', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | num_outs=5), 20 | rpn_head=dict( 21 | type='RPNHead', 22 | in_channels=256, 23 | feat_channels=256, 24 | anchor_generator=dict( 25 | type='AnchorGenerator', 26 | scales=[8], 27 | ratios=[0.5, 1.0, 2.0], 28 | strides=[4, 8, 16, 32, 64]), 29 | bbox_coder=dict( 30 | type='DeltaXYWHBBoxCoder', 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0]), 33 | loss_cls=dict( 34 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 35 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 36 | roi_head=dict( 37 | type='StandardRoIHead', 38 | bbox_roi_extractor=dict( 39 | type='SingleRoIExtractor', 40 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 41 | out_channels=256, 42 | featmap_strides=[4, 8, 16, 32]), 43 | bbox_head=dict( 44 | type='Shared2FCBBoxHead', 45 | in_channels=256, 46 | fc_out_channels=1024, 47 | roi_feat_size=7, 48 | num_classes=80, 49 | bbox_coder=dict( 50 | type='DeltaXYWHBBoxCoder', 51 | target_means=[0., 0., 0., 0.], 52 | target_stds=[0.1, 0.1, 0.2, 0.2]), 53 | reg_class_agnostic=False, 54 | loss_cls=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 56 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 57 | mask_roi_extractor=dict( 58 | type='SingleRoIExtractor', 59 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 60 | out_channels=256, 61 | featmap_strides=[4, 8, 16, 32]), 62 | mask_head=dict( 63 | type='FCNMaskHead', 64 | num_convs=4, 65 | in_channels=256, 66 | conv_out_channels=256, 67 | num_classes=80, 68 | loss_mask=dict( 69 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 70 | # model training and testing settings 71 | train_cfg=dict( 72 | rpn=dict( 73 | assigner=dict( 74 | type='MaxIoUAssigner', 75 | pos_iou_thr=0.7, 76 | neg_iou_thr=0.3, 77 | min_pos_iou=0.3, 78 | match_low_quality=True, 79 | ignore_iof_thr=-1), 80 | sampler=dict( 81 | type='RandomSampler', 82 | num=256, 83 | pos_fraction=0.5, 84 | neg_pos_ub=-1, 85 | add_gt_as_proposals=False), 86 | allowed_border=-1, 87 | pos_weight=-1, 88 | debug=False), 89 | rpn_proposal=dict( 90 | nms_across_levels=False, 91 | nms_pre=2000, 92 | nms_post=1000, 93 | max_num=1000, 94 | nms_thr=0.7, 95 | min_bbox_size=0), 96 | rcnn=dict( 97 | assigner=dict( 98 | type='MaxIoUAssigner', 99 | pos_iou_thr=0.5, 100 | neg_iou_thr=0.5, 101 | min_pos_iou=0.5, 102 | match_low_quality=True, 103 | ignore_iof_thr=-1), 104 | sampler=dict( 105 | type='RandomSampler', 106 | num=512, 107 | pos_fraction=0.25, 108 | neg_pos_ub=-1, 109 | add_gt_as_proposals=True), 110 | mask_size=28, 111 | pos_weight=-1, 112 | debug=False)), 113 | test_cfg=dict( 114 | rpn=dict( 115 | nms_across_levels=False, 116 | nms_pre=1000, 117 | nms_post=1000, 118 | max_num=1000, 119 | nms_thr=0.7, 120 | min_bbox_size=0), 121 | rcnn=dict( 122 | score_thr=0.05, 123 | nms=dict(type='nms', iou_threshold=0.5), 124 | max_per_img=100, 125 | mask_thr_binary=0.5))) 126 | -------------------------------------------------------------------------------- /det/configs/_base_/models/mask_rcnn_red50_neck_fpn_head.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | #pretrained='torchvision://resnet50', 5 | pretrained='/path/to/rednet50.pth', 6 | backbone=dict( 7 | type='RedNet', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='pytorch'), 15 | neck=dict( 16 | type='FPN_involution', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | num_outs=5), 20 | rpn_head=dict( 21 | type='RPNHead_involution', 22 | in_channels=256, 23 | feat_channels=256, 24 | anchor_generator=dict( 25 | type='AnchorGenerator', 26 | scales=[8], 27 | ratios=[0.5, 1.0, 2.0], 28 | strides=[4, 8, 16, 32, 64]), 29 | bbox_coder=dict( 30 | type='DeltaXYWHBBoxCoder', 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0]), 33 | loss_cls=dict( 34 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 35 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 36 | roi_head=dict( 37 | type='StandardRoIHead', 38 | bbox_roi_extractor=dict( 39 | type='SingleRoIExtractor', 40 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 41 | out_channels=256, 42 | featmap_strides=[4, 8, 16, 32]), 43 | bbox_head=dict( 44 | type='Shared2FCBBoxHead', 45 | in_channels=256, 46 | fc_out_channels=1024, 47 | roi_feat_size=7, 48 | num_classes=80, 49 | bbox_coder=dict( 50 | type='DeltaXYWHBBoxCoder', 51 | target_means=[0., 0., 0., 0.], 52 | target_stds=[0.1, 0.1, 0.2, 0.2]), 53 | reg_class_agnostic=False, 54 | loss_cls=dict( 55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 56 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 57 | mask_roi_extractor=dict( 58 | type='SingleRoIExtractor', 59 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 60 | out_channels=256, 61 | featmap_strides=[4, 8, 16, 32]), 62 | mask_head=dict( 63 | type='FCNMaskHead_involution', 64 | num_convs=4, 65 | in_channels=256, 66 | conv_out_channels=256, 67 | num_classes=80, 68 | loss_mask=dict( 69 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 70 | # model training and testing settings 71 | train_cfg=dict( 72 | rpn=dict( 73 | assigner=dict( 74 | type='MaxIoUAssigner', 75 | pos_iou_thr=0.7, 76 | neg_iou_thr=0.3, 77 | min_pos_iou=0.3, 78 | match_low_quality=True, 79 | ignore_iof_thr=-1), 80 | sampler=dict( 81 | type='RandomSampler', 82 | num=256, 83 | pos_fraction=0.5, 84 | neg_pos_ub=-1, 85 | add_gt_as_proposals=False), 86 | allowed_border=-1, 87 | pos_weight=-1, 88 | debug=False), 89 | rpn_proposal=dict( 90 | nms_across_levels=False, 91 | nms_pre=2000, 92 | nms_post=1000, 93 | max_num=1000, 94 | nms_thr=0.7, 95 | min_bbox_size=0), 96 | rcnn=dict( 97 | assigner=dict( 98 | type='MaxIoUAssigner', 99 | pos_iou_thr=0.5, 100 | neg_iou_thr=0.5, 101 | min_pos_iou=0.5, 102 | match_low_quality=True, 103 | ignore_iof_thr=-1), 104 | sampler=dict( 105 | type='RandomSampler', 106 | num=512, 107 | pos_fraction=0.25, 108 | neg_pos_ub=-1, 109 | add_gt_as_proposals=True), 110 | mask_size=28, 111 | pos_weight=-1, 112 | debug=False)), 113 | test_cfg=dict( 114 | rpn=dict( 115 | nms_across_levels=False, 116 | nms_pre=1000, 117 | nms_post=1000, 118 | max_num=1000, 119 | nms_thr=0.7, 120 | min_bbox_size=0), 121 | rcnn=dict( 122 | score_thr=0.05, 123 | nms=dict(type='nms', iou_threshold=0.5), 124 | max_per_img=100, 125 | mask_thr_binary=0.5))) 126 | -------------------------------------------------------------------------------- /det/configs/_base_/models/retinanet_red50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | #pretrained='torchvision://resnet50', 5 | pretrained='/path/to/rednet50.pth', 6 | backbone=dict( 7 | type='RedNet', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='pytorch'), 15 | neck=dict( 16 | type='FPN', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | start_level=1, 20 | add_extra_convs='on_input', 21 | num_outs=5), 22 | bbox_head=dict( 23 | type='RetinaHead', 24 | num_classes=80, 25 | in_channels=256, 26 | stacked_convs=4, 27 | feat_channels=256, 28 | anchor_generator=dict( 29 | type='AnchorGenerator', 30 | octave_base_scale=4, 31 | scales_per_octave=3, 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[8, 16, 32, 64, 128]), 34 | bbox_coder=dict( 35 | type='DeltaXYWHBBoxCoder', 36 | target_means=[.0, .0, .0, .0], 37 | target_stds=[1.0, 1.0, 1.0, 1.0]), 38 | loss_cls=dict( 39 | type='FocalLoss', 40 | use_sigmoid=True, 41 | gamma=2.0, 42 | alpha=0.25, 43 | loss_weight=1.0), 44 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 45 | # training and testing settings 46 | train_cfg=dict( 47 | assigner=dict( 48 | type='MaxIoUAssigner', 49 | pos_iou_thr=0.5, 50 | neg_iou_thr=0.4, 51 | min_pos_iou=0, 52 | ignore_iof_thr=-1), 53 | allowed_border=-1, 54 | pos_weight=-1, 55 | debug=False), 56 | test_cfg=dict( 57 | nms_pre=1000, 58 | min_bbox_size=0, 59 | score_thr=0.05, 60 | nms=dict(type='nms', iou_threshold=0.5), 61 | max_per_img=100)) 62 | -------------------------------------------------------------------------------- /det/configs/_base_/models/retinanet_red50_neck_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | #pretrained='torchvision://resnet50', 5 | pretrained='/path/to/rednet50.pth', 6 | backbone=dict( 7 | type='RedNet', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | norm_eval=True, 14 | style='pytorch'), 15 | neck=dict( 16 | type='FPN_involution', 17 | in_channels=[256, 512, 1024, 2048], 18 | out_channels=256, 19 | start_level=1, 20 | add_extra_convs='on_input', 21 | num_outs=5), 22 | bbox_head=dict( 23 | type='RetinaHead', 24 | num_classes=80, 25 | in_channels=256, 26 | stacked_convs=4, 27 | feat_channels=256, 28 | anchor_generator=dict( 29 | type='AnchorGenerator', 30 | octave_base_scale=4, 31 | scales_per_octave=3, 32 | ratios=[0.5, 1.0, 2.0], 33 | strides=[8, 16, 32, 64, 128]), 34 | bbox_coder=dict( 35 | type='DeltaXYWHBBoxCoder', 36 | target_means=[.0, .0, .0, .0], 37 | target_stds=[1.0, 1.0, 1.0, 1.0]), 38 | loss_cls=dict( 39 | type='FocalLoss', 40 | use_sigmoid=True, 41 | gamma=2.0, 42 | alpha=0.25, 43 | loss_weight=1.0), 44 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 45 | # training and testing settings 46 | train_cfg=dict( 47 | assigner=dict( 48 | type='MaxIoUAssigner', 49 | pos_iou_thr=0.5, 50 | neg_iou_thr=0.4, 51 | min_pos_iou=0, 52 | ignore_iof_thr=-1), 53 | allowed_border=-1, 54 | pos_weight=-1, 55 | debug=False), 56 | test_cfg=dict( 57 | nms_pre=1000, 58 | min_bbox_size=0, 59 | score_thr=0.05, 60 | nms=dict(type='nms', iou_threshold=0.5), 61 | max_per_img=100)) 62 | -------------------------------------------------------------------------------- /det/configs/_base_/schedules/schedule_1x_warmup.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=2000, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | total_epochs = 12 12 | -------------------------------------------------------------------------------- /det/configs/involution/faster_rcnn_red50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_red50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /det/configs/involution/faster_rcnn_red50_neck_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_red50_neck_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /det/configs/involution/faster_rcnn_red50_neck_fpn_head_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/faster_rcnn_red50_neck_fpn_head.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py' 5 | ] 6 | optimizer_config = dict(grad_clip=dict(_delete_=True, max_norm=5, norm_type=2)) 7 | -------------------------------------------------------------------------------- /det/configs/involution/mask_rcnn_red50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_red50_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /det/configs/involution/mask_rcnn_red50_neck_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_red50_neck_fpn.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py' 5 | ] 6 | -------------------------------------------------------------------------------- /det/configs/involution/mask_rcnn_red50_neck_fpn_head_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/mask_rcnn_red50_neck_fpn_head.py', 3 | '../_base_/datasets/coco_instance.py', 4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py' 5 | ] 6 | optimizer_config = dict(grad_clip(dict(_delete_=True, max_norm=5, norm_type=2))) 7 | -------------------------------------------------------------------------------- /det/configs/involution/retinanet_red50_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/retinanet_red50_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py' 5 | ] 6 | # optimizer 7 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 8 | -------------------------------------------------------------------------------- /det/configs/involution/retinanet_red50_neck_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/retinanet_red50_neck_fpn.py', 3 | '../_base_/datasets/coco_detection.py', 4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py' 5 | ] 6 | # optimizer 7 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 8 | optimizer_config = dict(grad_clip=dict(_delete_=True, max_norm=40, norm_type=2)) 9 | -------------------------------------------------------------------------------- /det/mmdet/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import warnings 3 | 4 | from mmcv.cnn import VGG 5 | from mmcv.runner.hooks import HOOKS, Hook 6 | 7 | from mmdet.datasets.builder import PIPELINES 8 | from mmdet.datasets.pipelines import LoadAnnotations, LoadImageFromFile 9 | from mmdet.models.dense_heads import GARPNHead, RPNHead, RPNHead_involution 10 | from mmdet.models.roi_heads.mask_heads import FusedSemanticHead 11 | 12 | 13 | def replace_ImageToTensor(pipelines): 14 | """Replace the ImageToTensor transform in a data pipeline to 15 | DefaultFormatBundle, which is normally useful in batch inference. 16 | 17 | Args: 18 | pipelines (list[dict]): Data pipeline configs. 19 | 20 | Returns: 21 | list: The new pipeline list with all ImageToTensor replaced by 22 | DefaultFormatBundle. 23 | 24 | Examples: 25 | >>> pipelines = [ 26 | ... dict(type='LoadImageFromFile'), 27 | ... dict( 28 | ... type='MultiScaleFlipAug', 29 | ... img_scale=(1333, 800), 30 | ... flip=False, 31 | ... transforms=[ 32 | ... dict(type='Resize', keep_ratio=True), 33 | ... dict(type='RandomFlip'), 34 | ... dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]), 35 | ... dict(type='Pad', size_divisor=32), 36 | ... dict(type='ImageToTensor', keys=['img']), 37 | ... dict(type='Collect', keys=['img']), 38 | ... ]) 39 | ... ] 40 | >>> expected_pipelines = [ 41 | ... dict(type='LoadImageFromFile'), 42 | ... dict( 43 | ... type='MultiScaleFlipAug', 44 | ... img_scale=(1333, 800), 45 | ... flip=False, 46 | ... transforms=[ 47 | ... dict(type='Resize', keep_ratio=True), 48 | ... dict(type='RandomFlip'), 49 | ... dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]), 50 | ... dict(type='Pad', size_divisor=32), 51 | ... dict(type='DefaultFormatBundle'), 52 | ... dict(type='Collect', keys=['img']), 53 | ... ]) 54 | ... ] 55 | >>> assert expected_pipelines == replace_ImageToTensor(pipelines) 56 | """ 57 | pipelines = copy.deepcopy(pipelines) 58 | for i, pipeline in enumerate(pipelines): 59 | if pipeline['type'] == 'MultiScaleFlipAug': 60 | assert 'transforms' in pipeline 61 | pipeline['transforms'] = replace_ImageToTensor( 62 | pipeline['transforms']) 63 | elif pipeline['type'] == 'ImageToTensor': 64 | warnings.warn( 65 | '"ImageToTensor" pipeline is replaced by ' 66 | '"DefaultFormatBundle" for batch inference. It is ' 67 | 'recommended to manually replace it in the test ' 68 | 'data pipeline in your config file.', UserWarning) 69 | pipelines[i] = {'type': 'DefaultFormatBundle'} 70 | return pipelines 71 | 72 | 73 | def get_loading_pipeline(pipeline): 74 | """Only keep loading image and annotations related configuration. 75 | 76 | Args: 77 | pipeline (list[dict]): Data pipeline configs. 78 | 79 | Returns: 80 | list[dict]: The new pipeline list with only keep 81 | loading image and annotations related configuration. 82 | 83 | Examples: 84 | >>> pipelines = [ 85 | ... dict(type='LoadImageFromFile'), 86 | ... dict(type='LoadAnnotations', with_bbox=True), 87 | ... dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 88 | ... dict(type='RandomFlip', flip_ratio=0.5), 89 | ... dict(type='Normalize', **img_norm_cfg), 90 | ... dict(type='Pad', size_divisor=32), 91 | ... dict(type='DefaultFormatBundle'), 92 | ... dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) 93 | ... ] 94 | >>> expected_pipelines = [ 95 | ... dict(type='LoadImageFromFile'), 96 | ... dict(type='LoadAnnotations', with_bbox=True) 97 | ... ] 98 | >>> assert expected_pipelines ==\ 99 | ... get_loading_pipeline(pipelines) 100 | """ 101 | loading_pipeline_cfg = [] 102 | for cfg in pipeline: 103 | obj_cls = PIPELINES.get(cfg['type']) 104 | # TODO:use more elegant way to distinguish loading modules 105 | if obj_cls is not None and obj_cls in (LoadImageFromFile, 106 | LoadAnnotations): 107 | loading_pipeline_cfg.append(cfg) 108 | assert len(loading_pipeline_cfg) == 2, \ 109 | 'The data pipeline in your config file must include ' \ 110 | 'loading image and annotations related pipeline.' 111 | return loading_pipeline_cfg 112 | 113 | 114 | @HOOKS.register_module() 115 | class NumClassCheckHook(Hook): 116 | 117 | def _check_head(self, runner): 118 | """Check whether the `num_classes` in head matches the length of 119 | `CLASSSES` in `dataset`. 120 | 121 | Args: 122 | runner (obj:`EpochBasedRunner`): Epoch based Runner. 123 | """ 124 | model = runner.model 125 | dataset = runner.data_loader.dataset 126 | if dataset.CLASSES is None: 127 | runner.logger.warning( 128 | f'Please set `CLASSES` ' 129 | f'in the {dataset.__class__.__name__} and' 130 | f'check if it is consistent with the `num_classes` ' 131 | f'of head') 132 | else: 133 | for name, module in model.named_modules(): 134 | if hasattr(module, 'num_classes') and not isinstance( 135 | module, (RPNHead, RPNHead_involution, VGG, FusedSemanticHead, GARPNHead)): 136 | assert module.num_classes == len(dataset.CLASSES), \ 137 | (f'The `num_classes` ({module.num_classes}) in ' 138 | f'{module.__class__.__name__} of ' 139 | f'{model.__class__.__name__} does not matches ' 140 | f'the length of `CLASSES` ' 141 | f'{len(dataset.CLASSES)}) in ' 142 | f'{dataset.__class__.__name__}') 143 | 144 | def before_train_epoch(self, runner): 145 | """Check whether the training dataset is compatible with head. 146 | 147 | Args: 148 | runner (obj:`EpochBasedRunner`): Epoch based Runner. 149 | """ 150 | self._check_head(runner) 151 | 152 | def before_val_epoch(self, runner): 153 | """Check whether the dataset in val epoch is compatible with head. 154 | 155 | Args: 156 | runner (obj:`EpochBasedRunner`): Epoch based Runner. 157 | """ 158 | self._check_head(runner) 159 | -------------------------------------------------------------------------------- /det/mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .darknet import Darknet 2 | from .detectors_resnet import DetectoRS_ResNet 3 | from .detectors_resnext import DetectoRS_ResNeXt 4 | from .hourglass import HourglassNet 5 | from .hrnet import HRNet 6 | from .regnet import RegNet 7 | from .res2net import Res2Net 8 | from .resnest import ResNeSt 9 | from .resnet import ResNet, ResNetV1d 10 | from .resnext import ResNeXt 11 | from .ssd_vgg import SSDVGG 12 | from .trident_resnet import TridentResNet 13 | from .rednet import RedNet 14 | 15 | __all__ = [ 16 | 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net', 17 | 'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet', 18 | 'ResNeSt', 'TridentResNet', 19 | 'RedNet' 20 | ] 21 | -------------------------------------------------------------------------------- /det/mmdet/models/backbones/base_backbone.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABCMeta, abstractmethod 3 | 4 | import torch.nn as nn 5 | from mmcv.runner import load_checkpoint 6 | 7 | 8 | class BaseBackbone(nn.Module, metaclass=ABCMeta): 9 | """Base backbone. 10 | 11 | This class defines the basic functions of a backbone. 12 | Any backbone that inherits this class should at least 13 | define its own `forward` function. 14 | 15 | """ 16 | 17 | def __init__(self): 18 | super(BaseBackbone, self).__init__() 19 | 20 | def init_weights(self, pretrained=None): 21 | """Init backbone weights 22 | 23 | Args: 24 | pretrained (str | None): If pretrained is a string, then it 25 | initializes backbone weights by loading the pretrained 26 | checkpoint. If pretrained is None, then it follows default 27 | initializer or customized initializer in subclasses. 28 | """ 29 | if isinstance(pretrained, str): 30 | logger = logging.getLogger() 31 | load_checkpoint(self, pretrained, strict=False, logger=logger) 32 | elif pretrained is None: 33 | # use default initializer or customized initializer in subclasses 34 | pass 35 | else: 36 | raise TypeError('pretrained must be a str or None.' 37 | f' But received {type(pretrained)}.') 38 | 39 | @abstractmethod 40 | def forward(self, x): 41 | """Forward computation 42 | 43 | Args: 44 | x (tensor | tuple[tensor]): x could be a Torch.tensor or a tuple of 45 | Torch.tensor, containing input data for forward computation. 46 | """ 47 | pass 48 | 49 | def train(self, mode=True): 50 | """Set module status before forward computation 51 | 52 | Args: 53 | mode (bool): Whether it is train_mode or test_mode 54 | """ 55 | super(BaseBackbone, self).train(mode) 56 | -------------------------------------------------------------------------------- /det/mmdet/models/backbones/rednet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.utils.checkpoint as cp 3 | from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer, 4 | constant_init, kaiming_init) 5 | from mmcv.utils.parrots_wrapper import _BatchNorm 6 | 7 | from ..builder import BACKBONES 8 | from .base_backbone import BaseBackbone 9 | from ..utils.involution_cuda import involution 10 | 11 | 12 | class Bottleneck(nn.Module): 13 | """Bottleneck block for ResNet. 14 | 15 | Args: 16 | in_channels (int): Input channels of this block. 17 | out_channels (int): Output channels of this block. 18 | expansion (int): The ratio of ``out_channels/mid_channels`` where 19 | ``mid_channels`` is the input/output channels of conv2. Default: 4. 20 | stride (int): stride of the block. Default: 1 21 | dilation (int): dilation of convolution. Default: 1 22 | downsample (nn.Module): downsample operation on identity branch. 23 | Default: None. 24 | style (str): ``"pytorch"`` or ``"caffe"``. If set to "pytorch", the 25 | stride-two layer is the 3x3 conv layer, otherwise the stride-two 26 | layer is the first 1x1 conv layer. Default: "pytorch". 27 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 28 | memory while slowing down the training speed. 29 | conv_cfg (dict): dictionary to construct and config conv layer. 30 | Default: None 31 | norm_cfg (dict): dictionary to construct and config norm layer. 32 | Default: dict(type='BN') 33 | """ 34 | 35 | def __init__(self, 36 | in_channels, 37 | out_channels, 38 | expansion=4, 39 | stride=1, 40 | dilation=1, 41 | downsample=None, 42 | style='pytorch', 43 | with_cp=False, 44 | conv_cfg=None, 45 | norm_cfg=dict(type='BN')): 46 | super(Bottleneck, self).__init__() 47 | assert style in ['pytorch', 'caffe'] 48 | 49 | self.in_channels = in_channels 50 | self.out_channels = out_channels 51 | self.expansion = expansion 52 | assert out_channels % expansion == 0 53 | self.mid_channels = out_channels // expansion 54 | self.stride = stride 55 | self.dilation = dilation 56 | self.style = style 57 | self.with_cp = with_cp 58 | self.conv_cfg = conv_cfg 59 | self.norm_cfg = norm_cfg 60 | 61 | if self.style == 'pytorch': 62 | self.conv1_stride = 1 63 | self.conv2_stride = stride 64 | else: 65 | self.conv1_stride = stride 66 | self.conv2_stride = 1 67 | 68 | self.norm1_name, norm1 = build_norm_layer( 69 | norm_cfg, self.mid_channels, postfix=1) 70 | self.norm2_name, norm2 = build_norm_layer( 71 | norm_cfg, self.mid_channels, postfix=2) 72 | self.norm3_name, norm3 = build_norm_layer( 73 | norm_cfg, out_channels, postfix=3) 74 | 75 | self.conv1 = build_conv_layer( 76 | conv_cfg, 77 | in_channels, 78 | self.mid_channels, 79 | kernel_size=1, 80 | stride=self.conv1_stride, 81 | bias=False) 82 | self.add_module(self.norm1_name, norm1) 83 | self.conv2 = involution(self.mid_channels, 7, self.conv2_stride) 84 | 85 | self.add_module(self.norm2_name, norm2) 86 | self.conv3 = build_conv_layer( 87 | conv_cfg, 88 | self.mid_channels, 89 | out_channels, 90 | kernel_size=1, 91 | bias=False) 92 | self.add_module(self.norm3_name, norm3) 93 | 94 | self.relu = nn.ReLU(inplace=True) 95 | self.downsample = downsample 96 | 97 | @property 98 | def norm1(self): 99 | return getattr(self, self.norm1_name) 100 | 101 | @property 102 | def norm2(self): 103 | return getattr(self, self.norm2_name) 104 | 105 | @property 106 | def norm3(self): 107 | return getattr(self, self.norm3_name) 108 | 109 | def forward(self, x): 110 | 111 | def _inner_forward(x): 112 | identity = x 113 | 114 | out = self.conv1(x) 115 | out = self.norm1(out) 116 | out = self.relu(out) 117 | 118 | out = self.conv2(out) 119 | out = self.norm2(out) 120 | out = self.relu(out) 121 | 122 | out = self.conv3(out) 123 | out = self.norm3(out) 124 | 125 | if self.downsample is not None: 126 | identity = self.downsample(x) 127 | 128 | out += identity 129 | 130 | return out 131 | 132 | if self.with_cp and x.requires_grad: 133 | out = cp.checkpoint(_inner_forward, x) 134 | else: 135 | out = _inner_forward(x) 136 | 137 | out = self.relu(out) 138 | 139 | return out 140 | 141 | 142 | def get_expansion(block, expansion=None): 143 | """Get the expansion of a residual block. 144 | 145 | The block expansion will be obtained by the following order: 146 | 147 | 1. If ``expansion`` is given, just return it. 148 | 2. If ``block`` has the attribute ``expansion``, then return 149 | ``block.expansion``. 150 | 3. Return the default value according the the block type: 151 | 1 for ``BasicBlock`` and 4 for ``Bottleneck``. 152 | 153 | Args: 154 | block (class): The block class. 155 | expansion (int | None): The given expansion ratio. 156 | 157 | Returns: 158 | int: The expansion of the block. 159 | """ 160 | if isinstance(expansion, int): 161 | assert expansion > 0 162 | elif expansion is None: 163 | if hasattr(block, 'expansion'): 164 | expansion = block.expansion 165 | elif issubclass(block, Bottleneck): 166 | expansion = 4 167 | else: 168 | raise TypeError(f'expansion is not specified for {block.__name__}') 169 | else: 170 | raise TypeError('expansion must be an integer or None') 171 | 172 | return expansion 173 | 174 | 175 | class ResLayer(nn.Sequential): 176 | """ResLayer to build ResNet style backbone. 177 | 178 | Args: 179 | block (nn.Module): Residual block used to build ResLayer. 180 | num_blocks (int): Number of blocks. 181 | in_channels (int): Input channels of this block. 182 | out_channels (int): Output channels of this block. 183 | expansion (int, optional): The expansion for BasicBlock/Bottleneck. 184 | If not specified, it will firstly be obtained via 185 | ``block.expansion``. If the block has no attribute "expansion", 186 | the following default values will be used: 1 for BasicBlock and 187 | 4 for Bottleneck. Default: None. 188 | stride (int): stride of the first block. Default: 1. 189 | avg_down (bool): Use AvgPool instead of stride conv when 190 | downsampling in the bottleneck. Default: False 191 | conv_cfg (dict): dictionary to construct and config conv layer. 192 | Default: None 193 | norm_cfg (dict): dictionary to construct and config norm layer. 194 | Default: dict(type='BN') 195 | """ 196 | 197 | def __init__(self, 198 | block, 199 | num_blocks, 200 | in_channels, 201 | out_channels, 202 | expansion=None, 203 | stride=1, 204 | avg_down=False, 205 | conv_cfg=None, 206 | norm_cfg=dict(type='BN'), 207 | **kwargs): 208 | self.block = block 209 | self.expansion = get_expansion(block, expansion) 210 | 211 | downsample = None 212 | if stride != 1 or in_channels != out_channels: 213 | downsample = [] 214 | conv_stride = stride 215 | if avg_down and stride != 1: 216 | conv_stride = 1 217 | downsample.append( 218 | nn.AvgPool2d( 219 | kernel_size=stride, 220 | stride=stride, 221 | ceil_mode=True, 222 | count_include_pad=False)) 223 | downsample.extend([ 224 | build_conv_layer( 225 | conv_cfg, 226 | in_channels, 227 | out_channels, 228 | kernel_size=1, 229 | stride=conv_stride, 230 | bias=False), 231 | build_norm_layer(norm_cfg, out_channels)[1] 232 | ]) 233 | downsample = nn.Sequential(*downsample) 234 | 235 | layers = [] 236 | layers.append( 237 | block( 238 | in_channels=in_channels, 239 | out_channels=out_channels, 240 | expansion=self.expansion, 241 | stride=stride, 242 | downsample=downsample, 243 | conv_cfg=conv_cfg, 244 | norm_cfg=norm_cfg, 245 | **kwargs)) 246 | in_channels = out_channels 247 | for i in range(1, num_blocks): 248 | layers.append( 249 | block( 250 | in_channels=in_channels, 251 | out_channels=out_channels, 252 | expansion=self.expansion, 253 | stride=1, 254 | conv_cfg=conv_cfg, 255 | norm_cfg=norm_cfg, 256 | **kwargs)) 257 | super(ResLayer, self).__init__(*layers) 258 | 259 | 260 | @BACKBONES.register_module() 261 | class RedNet(BaseBackbone): 262 | """ResNet backbone. 263 | 264 | Please refer to the `paper `_ for 265 | details. 266 | 267 | Args: 268 | depth (int): Network depth, from {18, 34, 50, 101, 152}. 269 | in_channels (int): Number of input image channels. Default: 3. 270 | stem_channels (int): Output channels of the stem layer. Default: 64. 271 | base_channels (int): Middle channels of the first stage. Default: 64. 272 | num_stages (int): Stages of the network. Default: 4. 273 | strides (Sequence[int]): Strides of the first block of each stage. 274 | Default: ``(1, 2, 2, 2)``. 275 | dilations (Sequence[int]): Dilation of each stage. 276 | Default: ``(1, 1, 1, 1)``. 277 | out_indices (Sequence[int]): Output from which stages. If only one 278 | stage is specified, a single tensor (feature map) is returned, 279 | otherwise multiple stages are specified, a tuple of tensors will 280 | be returned. Default: ``(3, )``. 281 | style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two 282 | layer is the 3x3 conv layer, otherwise the stride-two layer is 283 | the first 1x1 conv layer. 284 | deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv. 285 | Default: False. 286 | avg_down (bool): Use AvgPool instead of stride conv when 287 | downsampling in the bottleneck. Default: False. 288 | frozen_stages (int): Stages to be frozen (stop grad and set eval mode). 289 | -1 means not freezing any parameters. Default: -1. 290 | conv_cfg (dict | None): The config dict for conv layers. Default: None. 291 | norm_cfg (dict): The config dict for norm layers. 292 | norm_eval (bool): Whether to set norm layers to eval mode, namely, 293 | freeze running stats (mean and var). Note: Effect on Batch Norm 294 | and its variants only. Default: False. 295 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 296 | memory while slowing down the training speed. Default: False. 297 | zero_init_residual (bool): Whether to use zero init for last norm layer 298 | in resblocks to let them behave as identity. Default: True. 299 | 300 | Example: 301 | >>> from mmcls.models import ResNet 302 | >>> import torch 303 | >>> self = ResNet(depth=18) 304 | >>> self.eval() 305 | >>> inputs = torch.rand(1, 3, 32, 32) 306 | >>> level_outputs = self.forward(inputs) 307 | >>> for level_out in level_outputs: 308 | ... print(tuple(level_out.shape)) 309 | (1, 64, 8, 8) 310 | (1, 128, 4, 4) 311 | (1, 256, 2, 2) 312 | (1, 512, 1, 1) 313 | """ 314 | 315 | arch_settings = { 316 | 26: (Bottleneck, (1, 2, 4, 1)), 317 | 38: (Bottleneck, (2, 3, 5, 2)), 318 | 50: (Bottleneck, (3, 4, 6, 3)), 319 | 101: (Bottleneck, (3, 4, 23, 3)), 320 | 152: (Bottleneck, (3, 8, 36, 3)) 321 | } 322 | 323 | def __init__(self, 324 | depth, 325 | in_channels=3, 326 | stem_channels=64, 327 | base_channels=64, 328 | expansion=None, 329 | num_stages=4, 330 | strides=(1, 2, 2, 2), 331 | dilations=(1, 1, 1, 1), 332 | out_indices=(3, ), 333 | style='pytorch', 334 | avg_down=False, 335 | frozen_stages=-1, 336 | conv_cfg=None, 337 | norm_cfg=dict(type='BN', requires_grad=True), 338 | norm_eval=False, 339 | with_cp=False, 340 | zero_init_residual=True): 341 | super(RedNet, self).__init__() 342 | if depth not in self.arch_settings: 343 | raise KeyError(f'invalid depth {depth} for resnet') 344 | self.depth = depth 345 | self.stem_channels = stem_channels 346 | self.base_channels = base_channels 347 | self.num_stages = num_stages 348 | assert num_stages >= 1 and num_stages <= 4 349 | self.strides = strides 350 | self.dilations = dilations 351 | assert len(strides) == len(dilations) == num_stages 352 | self.out_indices = out_indices 353 | assert max(out_indices) < num_stages 354 | self.style = style 355 | self.avg_down = avg_down 356 | self.frozen_stages = frozen_stages 357 | self.conv_cfg = conv_cfg 358 | self.norm_cfg = norm_cfg 359 | self.with_cp = with_cp 360 | self.norm_eval = norm_eval 361 | self.zero_init_residual = zero_init_residual 362 | self.block, stage_blocks = self.arch_settings[depth] 363 | self.stage_blocks = stage_blocks[:num_stages] 364 | self.expansion = get_expansion(self.block, expansion) 365 | 366 | self._make_stem_layer(in_channels, stem_channels) 367 | 368 | self.res_layers = [] 369 | _in_channels = stem_channels 370 | _out_channels = base_channels * self.expansion 371 | for i, num_blocks in enumerate(self.stage_blocks): 372 | stride = strides[i] 373 | dilation = dilations[i] 374 | res_layer = self.make_res_layer( 375 | block=self.block, 376 | num_blocks=num_blocks, 377 | in_channels=_in_channels, 378 | out_channels=_out_channels, 379 | expansion=self.expansion, 380 | stride=stride, 381 | dilation=dilation, 382 | style=self.style, 383 | avg_down=self.avg_down, 384 | with_cp=with_cp, 385 | conv_cfg=conv_cfg, 386 | norm_cfg=norm_cfg) 387 | _in_channels = _out_channels 388 | _out_channels *= 2 389 | layer_name = f'layer{i + 1}' 390 | self.add_module(layer_name, res_layer) 391 | self.res_layers.append(layer_name) 392 | 393 | self._freeze_stages() 394 | 395 | self.feat_dim = res_layer[-1].out_channels 396 | 397 | def make_res_layer(self, **kwargs): 398 | return ResLayer(**kwargs) 399 | 400 | @property 401 | def norm1(self): 402 | return getattr(self, self.norm1_name) 403 | 404 | def _make_stem_layer(self, in_channels, stem_channels): 405 | self.stem = nn.Sequential( 406 | ConvModule( 407 | in_channels, 408 | stem_channels // 2, 409 | kernel_size=3, 410 | stride=2, 411 | padding=1, 412 | conv_cfg=self.conv_cfg, 413 | norm_cfg=self.norm_cfg, 414 | inplace=True), 415 | involution(stem_channels // 2, 7, 1), 416 | nn.BatchNorm2d(stem_channels // 2), 417 | nn.ReLU(inplace=True), 418 | ConvModule( 419 | stem_channels // 2, 420 | stem_channels, 421 | kernel_size=3, 422 | stride=1, 423 | padding=1, 424 | conv_cfg=self.conv_cfg, 425 | norm_cfg=self.norm_cfg, 426 | inplace=True)) 427 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 428 | 429 | def _freeze_stages(self): 430 | if self.frozen_stages >= 0: 431 | self.stem.eval() 432 | for param in self.stem.parameters(): 433 | param.requires_grad = False 434 | 435 | for i in range(1, self.frozen_stages + 1): 436 | m = getattr(self, f'layer{i}') 437 | m.eval() 438 | for param in m.parameters(): 439 | param.requires_grad = False 440 | 441 | def init_weights(self, pretrained=None): 442 | super(RedNet, self).init_weights(pretrained) 443 | if pretrained is None: 444 | for m in self.modules(): 445 | if isinstance(m, nn.Conv2d): 446 | kaiming_init(m) 447 | elif isinstance(m, (_BatchNorm, nn.GroupNorm)): 448 | constant_init(m, 1) 449 | 450 | if self.zero_init_residual: 451 | for m in self.modules(): 452 | if isinstance(m, Bottleneck): 453 | constant_init(m.norm3, 0) 454 | 455 | def forward(self, x): 456 | x = self.stem(x) 457 | x = self.maxpool(x) 458 | outs = [] 459 | for i, layer_name in enumerate(self.res_layers): 460 | res_layer = getattr(self, layer_name) 461 | x = res_layer(x) 462 | if i in self.out_indices: 463 | outs.append(x) 464 | if len(outs) == 1: 465 | return outs[0] 466 | else: 467 | return tuple(outs) 468 | 469 | def train(self, mode=True): 470 | super(RedNet, self).train(mode) 471 | self._freeze_stages() 472 | if mode and self.norm_eval: 473 | for m in self.modules(): 474 | # trick: eval have effect on BatchNorm only 475 | if isinstance(m, _BatchNorm): 476 | m.eval() 477 | 478 | -------------------------------------------------------------------------------- /det/mmdet/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_free_head import AnchorFreeHead 2 | from .anchor_head import AnchorHead 3 | from .atss_head import ATSSHead 4 | from .cascade_rpn_head import CascadeRPNHead, StageCascadeRPNHead 5 | from .centripetal_head import CentripetalHead 6 | from .corner_head import CornerHead 7 | from .embedding_rpn_head import EmbeddingRPNHead 8 | from .fcos_head import FCOSHead 9 | from .fovea_head import FoveaHead 10 | from .free_anchor_retina_head import FreeAnchorRetinaHead 11 | from .fsaf_head import FSAFHead 12 | from .ga_retina_head import GARetinaHead 13 | from .ga_rpn_head import GARPNHead 14 | from .gfl_head import GFLHead 15 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead 16 | from .nasfcos_head import NASFCOSHead 17 | from .paa_head import PAAHead 18 | from .pisa_retinanet_head import PISARetinaHead 19 | from .pisa_ssd_head import PISASSDHead 20 | from .reppoints_head import RepPointsHead 21 | from .retina_head import RetinaHead 22 | from .retina_sepbn_head import RetinaSepBNHead 23 | from .rpn_head import RPNHead 24 | from .rpn_head_involution import RPNHead_involution 25 | from .sabl_retina_head import SABLRetinaHead 26 | from .ssd_head import SSDHead 27 | from .transformer_head import TransformerHead 28 | from .vfnet_head import VFNetHead 29 | from .yolact_head import YOLACTHead, YOLACTProtonet, YOLACTSegmHead 30 | from .yolo_head import YOLOV3Head 31 | 32 | __all__ = [ 33 | 'AnchorFreeHead', 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 34 | 'RPNHead', 'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead', 35 | 'SSDHead', 'FCOSHead', 'RepPointsHead', 'FoveaHead', 36 | 'FreeAnchorRetinaHead', 'ATSSHead', 'FSAFHead', 'NASFCOSHead', 37 | 'PISARetinaHead', 'PISASSDHead', 'GFLHead', 'CornerHead', 'YOLACTHead', 38 | 'YOLACTSegmHead', 'YOLACTProtonet', 'YOLOV3Head', 'PAAHead', 39 | 'SABLRetinaHead', 'CentripetalHead', 'VFNetHead', 'TransformerHead', 40 | 'StageCascadeRPNHead', 'CascadeRPNHead', 'EmbeddingRPNHead', 41 | 'RPNHead_involution' 42 | ] 43 | -------------------------------------------------------------------------------- /det/mmdet/models/dense_heads/rpn_head_involution.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import warnings 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from mmcv import ConfigDict 8 | from mmcv.cnn import normal_init 9 | from mmcv.ops import batched_nms 10 | 11 | from ..builder import HEADS 12 | from .anchor_head import AnchorHead 13 | from .rpn_test_mixin import RPNTestMixin 14 | from ..utils.involution_cuda import involution 15 | 16 | 17 | @HEADS.register_module() 18 | class RPNHead_involution(RPNTestMixin, AnchorHead): 19 | """RPN head. 20 | 21 | Args: 22 | in_channels (int): Number of channels in the input feature map. 23 | """ # noqa: W605 24 | 25 | def __init__(self, in_channels, **kwargs): 26 | super(RPNHead_involution, self).__init__(1, in_channels, **kwargs) 27 | 28 | def _init_layers(self): 29 | """Initialize layers of the head.""" 30 | self.rpn_conv = involution(self.in_channels, 7, 1) 31 | self.rpn_cls = nn.Conv2d(self.feat_channels, 32 | self.num_anchors * self.cls_out_channels, 1) 33 | self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1) 34 | 35 | def init_weights(self): 36 | """Initialize weights of the head.""" 37 | normal_init(self.rpn_conv, std=0.01) 38 | normal_init(self.rpn_cls, std=0.01) 39 | normal_init(self.rpn_reg, std=0.01) 40 | 41 | def forward_single(self, x): 42 | """Forward feature map of a single scale level.""" 43 | x = self.rpn_conv(x) 44 | x = F.relu(x, inplace=True) 45 | rpn_cls_score = self.rpn_cls(x) 46 | rpn_bbox_pred = self.rpn_reg(x) 47 | return rpn_cls_score, rpn_bbox_pred 48 | 49 | def loss(self, 50 | cls_scores, 51 | bbox_preds, 52 | gt_bboxes, 53 | img_metas, 54 | gt_bboxes_ignore=None): 55 | """Compute losses of the head. 56 | 57 | Args: 58 | cls_scores (list[Tensor]): Box scores for each scale level 59 | Has shape (N, num_anchors * num_classes, H, W) 60 | bbox_preds (list[Tensor]): Box energies / deltas for each scale 61 | level with shape (N, num_anchors * 4, H, W) 62 | gt_bboxes (list[Tensor]): Ground truth bboxes for each image with 63 | shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. 64 | img_metas (list[dict]): Meta information of each image, e.g., 65 | image size, scaling factor, etc. 66 | gt_bboxes_ignore (None | list[Tensor]): specify which bounding 67 | boxes can be ignored when computing the loss. 68 | 69 | Returns: 70 | dict[str, Tensor]: A dictionary of loss components. 71 | """ 72 | losses = super(RPNHead_involution, self).loss( 73 | cls_scores, 74 | bbox_preds, 75 | gt_bboxes, 76 | None, 77 | img_metas, 78 | gt_bboxes_ignore=gt_bboxes_ignore) 79 | return dict( 80 | loss_rpn_cls=losses['loss_cls'], loss_rpn_bbox=losses['loss_bbox']) 81 | 82 | def _get_bboxes(self, 83 | cls_scores, 84 | bbox_preds, 85 | mlvl_anchors, 86 | img_shapes, 87 | scale_factors, 88 | cfg, 89 | rescale=False): 90 | """Transform outputs for a single batch item into bbox predictions. 91 | 92 | Args: 93 | cls_scores (list[Tensor]): Box scores for each scale level 94 | Has shape (N, num_anchors * num_classes, H, W). 95 | bbox_preds (list[Tensor]): Box energies / deltas for each scale 96 | level with shape (N, num_anchors * 4, H, W). 97 | mlvl_anchors (list[Tensor]): Box reference for each scale level 98 | with shape (num_total_anchors, 4). 99 | img_shapes (list[tuple[int]]): Shape of the input image, 100 | (height, width, 3). 101 | scale_factors (list[ndarray]): Scale factor of the image arange as 102 | (w_scale, h_scale, w_scale, h_scale). 103 | cfg (mmcv.Config): Test / postprocessing configuration, 104 | if None, test_cfg would be used. 105 | rescale (bool): If True, return boxes in original image space. 106 | 107 | Returns: 108 | list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. 109 | The first item is an (n, 5) tensor, where the first 4 columns 110 | are bounding box positions (tl_x, tl_y, br_x, br_y) and the 111 | 5-th column is a score between 0 and 1. The second item is a 112 | (n,) tensor where each item is the predicted class labelof the 113 | corresponding box. 114 | """ 115 | cfg = self.test_cfg if cfg is None else cfg 116 | cfg = copy.deepcopy(cfg) 117 | # bboxes from different level should be independent during NMS, 118 | # level_ids are used as labels for batched NMS to separate them 119 | level_ids = [] 120 | mlvl_scores = [] 121 | mlvl_bbox_preds = [] 122 | mlvl_valid_anchors = [] 123 | batch_size = cls_scores[0].shape[0] 124 | nms_pre_tensor = torch.tensor( 125 | cfg.nms_pre, device=cls_scores[0].device, dtype=torch.long) 126 | for idx in range(len(cls_scores)): 127 | rpn_cls_score = cls_scores[idx] 128 | rpn_bbox_pred = bbox_preds[idx] 129 | assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] 130 | rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1) 131 | if self.use_sigmoid_cls: 132 | rpn_cls_score = rpn_cls_score.reshape(batch_size, -1) 133 | scores = rpn_cls_score.sigmoid() 134 | else: 135 | rpn_cls_score = rpn_cls_score.reshape(batch_size, -1, 2) 136 | # We set FG labels to [0, num_class-1] and BG label to 137 | # num_class in RPN head since mmdet v2.5, which is unified to 138 | # be consistent with other head since mmdet v2.0. In mmdet v2.0 139 | # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head. 140 | scores = rpn_cls_score.softmax(-1)[..., 0] 141 | rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).reshape( 142 | batch_size, -1, 4) 143 | anchors = mlvl_anchors[idx] 144 | anchors = anchors.expand_as(rpn_bbox_pred) 145 | if nms_pre_tensor > 0: 146 | # sort is faster than topk 147 | # _, topk_inds = scores.topk(cfg.nms_pre) 148 | # keep topk op for dynamic k in onnx model 149 | if torch.onnx.is_in_onnx_export(): 150 | # sort op will be converted to TopK in onnx 151 | # and k<=3480 in TensorRT 152 | scores_shape = torch._shape_as_tensor(scores) 153 | nms_pre = torch.where(scores_shape[1] < nms_pre_tensor, 154 | scores_shape[1], nms_pre_tensor) 155 | _, topk_inds = scores.topk(nms_pre) 156 | batch_inds = torch.arange(batch_size).view( 157 | -1, 1).expand_as(topk_inds) 158 | scores = scores[batch_inds, topk_inds] 159 | rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :] 160 | anchors = anchors[batch_inds, topk_inds, :] 161 | 162 | elif scores.shape[-1] > cfg.nms_pre: 163 | ranked_scores, rank_inds = scores.sort(descending=True) 164 | topk_inds = rank_inds[:, :cfg.nms_pre] 165 | scores = ranked_scores[:, :cfg.nms_pre] 166 | batch_inds = torch.arange(batch_size).view( 167 | -1, 1).expand_as(topk_inds) 168 | rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :] 169 | anchors = anchors[batch_inds, topk_inds, :] 170 | 171 | mlvl_scores.append(scores) 172 | mlvl_bbox_preds.append(rpn_bbox_pred) 173 | mlvl_valid_anchors.append(anchors) 174 | level_ids.append( 175 | scores.new_full(( 176 | batch_size, 177 | scores.size(1), 178 | ), 179 | idx, 180 | dtype=torch.long)) 181 | 182 | batch_mlvl_scores = torch.cat(mlvl_scores, dim=1) 183 | batch_mlvl_anchors = torch.cat(mlvl_valid_anchors, dim=1) 184 | batch_mlvl_rpn_bbox_pred = torch.cat(mlvl_bbox_preds, dim=1) 185 | batch_mlvl_proposals = self.bbox_coder.decode( 186 | batch_mlvl_anchors, batch_mlvl_rpn_bbox_pred, max_shape=img_shapes) 187 | batch_mlvl_ids = torch.cat(level_ids, dim=1) 188 | 189 | # deprecate arguments warning 190 | if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg: 191 | warnings.warn( 192 | 'In rpn_proposal or test_cfg, ' 193 | 'nms_thr has been moved to a dict named nms as ' 194 | 'iou_threshold, max_num has been renamed as max_per_img, ' 195 | 'name of original arguments and the way to specify ' 196 | 'iou_threshold of NMS will be deprecated.') 197 | if 'nms' not in cfg: 198 | cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr)) 199 | if 'max_num' in cfg: 200 | if 'max_per_img' in cfg: 201 | assert cfg.max_num == cfg.max_per_img, f'You ' \ 202 | f'set max_num and ' \ 203 | f'max_per_img at the same time, but get {cfg.max_num} ' \ 204 | f'and {cfg.max_per_img} respectively' \ 205 | 'Please delete max_num which will be deprecated.' 206 | else: 207 | cfg.max_per_img = cfg.max_num 208 | if 'nms_thr' in cfg: 209 | assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set' \ 210 | f' iou_threshold in nms and ' \ 211 | f'nms_thr at the same time, but get' \ 212 | f' {cfg.nms.iou_threshold} and {cfg.nms_thr}' \ 213 | f' respectively. Please delete the nms_thr ' \ 214 | f'which will be deprecated.' 215 | 216 | result_list = [] 217 | for (mlvl_proposals, mlvl_scores, 218 | mlvl_ids) in zip(batch_mlvl_proposals, batch_mlvl_scores, 219 | batch_mlvl_ids): 220 | # Skip nonzero op while exporting to ONNX 221 | if cfg.min_bbox_size > 0 and (not torch.onnx.is_in_onnx_export()): 222 | w = mlvl_proposals[:, 2] - mlvl_proposals[:, 0] 223 | h = mlvl_proposals[:, 3] - mlvl_proposals[:, 1] 224 | valid_ind = torch.nonzero( 225 | (w >= cfg.min_bbox_size) 226 | & (h >= cfg.min_bbox_size), 227 | as_tuple=False).squeeze() 228 | if valid_ind.sum().item() != len(mlvl_proposals): 229 | mlvl_proposals = mlvl_proposals[valid_ind, :] 230 | mlvl_scores = mlvl_scores[valid_ind] 231 | mlvl_ids = mlvl_ids[valid_ind] 232 | 233 | dets, keep = batched_nms(mlvl_proposals, mlvl_scores, mlvl_ids, 234 | cfg.nms) 235 | result_list.append(dets[:cfg.max_per_img]) 236 | return result_list 237 | -------------------------------------------------------------------------------- /det/mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .bfp import BFP 2 | from .channel_mapper import ChannelMapper 3 | from .fpn import FPN 4 | from .fpn_involution import FPN_involution 5 | from .fpn_carafe import FPN_CARAFE 6 | from .hrfpn import HRFPN 7 | from .nas_fpn import NASFPN 8 | from .nasfcos_fpn import NASFCOS_FPN 9 | from .pafpn import PAFPN 10 | from .rfp import RFP 11 | from .yolo_neck import YOLOV3Neck 12 | 13 | __all__ = [ 14 | 'FPN', 'BFP', 'ChannelMapper', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN', 15 | 'NASFCOS_FPN', 'RFP', 'YOLOV3Neck', 16 | 'FPN_involution' 17 | ] 18 | -------------------------------------------------------------------------------- /det/mmdet/models/necks/fpn_involution.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import ConvModule, xavier_init 4 | from mmcv.runner import auto_fp16 5 | 6 | from ..builder import NECKS 7 | from ..utils.involution_cuda import involution 8 | 9 | 10 | @NECKS.register_module() 11 | class FPN_involution(nn.Module): 12 | r"""Feature Pyramid Network. 13 | 14 | This is an implementation of paper `Feature Pyramid Networks for Object 15 | Detection `_. 16 | 17 | Args: 18 | in_channels (List[int]): Number of input channels per scale. 19 | out_channels (int): Number of output channels (used at each scale) 20 | num_outs (int): Number of output scales. 21 | start_level (int): Index of the start input backbone level used to 22 | build the feature pyramid. Default: 0. 23 | end_level (int): Index of the end input backbone level (exclusive) to 24 | build the feature pyramid. Default: -1, which means the last level. 25 | add_extra_convs (bool | str): If bool, it decides whether to add conv 26 | layers on top of the original feature maps. Default to False. 27 | If True, its actual mode is specified by `extra_convs_on_inputs`. 28 | If str, it specifies the source feature map of the extra convs. 29 | Only the following options are allowed 30 | 31 | - 'on_input': Last feat map of neck inputs (i.e. backbone feature). 32 | - 'on_lateral': Last feature map after lateral convs. 33 | - 'on_output': The last output feature map after fpn convs. 34 | extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs 35 | on the original feature from the backbone. If True, 36 | it is equivalent to `add_extra_convs='on_input'`. If False, it is 37 | equivalent to set `add_extra_convs='on_output'`. Default to True. 38 | relu_before_extra_convs (bool): Whether to apply relu before the extra 39 | conv. Default: False. 40 | no_norm_on_lateral (bool): Whether to apply norm on lateral. 41 | Default: False. 42 | conv_cfg (dict): Config dict for convolution layer. Default: None. 43 | norm_cfg (dict): Config dict for normalization layer. Default: None. 44 | act_cfg (str): Config dict for activation layer in ConvModule. 45 | Default: None. 46 | upsample_cfg (dict): Config dict for interpolate layer. 47 | Default: `dict(mode='nearest')` 48 | 49 | Example: 50 | >>> import torch 51 | >>> in_channels = [2, 3, 5, 7] 52 | >>> scales = [340, 170, 84, 43] 53 | >>> inputs = [torch.rand(1, c, s, s) 54 | ... for c, s in zip(in_channels, scales)] 55 | >>> self = FPN(in_channels, 11, len(in_channels)).eval() 56 | >>> outputs = self.forward(inputs) 57 | >>> for i in range(len(outputs)): 58 | ... print(f'outputs[{i}].shape = {outputs[i].shape}') 59 | outputs[0].shape = torch.Size([1, 11, 340, 340]) 60 | outputs[1].shape = torch.Size([1, 11, 170, 170]) 61 | outputs[2].shape = torch.Size([1, 11, 84, 84]) 62 | outputs[3].shape = torch.Size([1, 11, 43, 43]) 63 | """ 64 | 65 | def __init__(self, 66 | in_channels, 67 | out_channels, 68 | num_outs, 69 | start_level=0, 70 | end_level=-1, 71 | add_extra_convs=False, 72 | extra_convs_on_inputs=True, 73 | relu_before_extra_convs=False, 74 | no_norm_on_lateral=False, 75 | conv_cfg=None, 76 | norm_cfg=None, 77 | act_cfg=None, 78 | upsample_cfg=dict(mode='nearest')): 79 | super(FPN_involution, self).__init__() 80 | assert isinstance(in_channels, list) 81 | self.in_channels = in_channels 82 | self.out_channels = out_channels 83 | self.num_ins = len(in_channels) 84 | self.num_outs = num_outs 85 | self.relu_before_extra_convs = relu_before_extra_convs 86 | self.no_norm_on_lateral = no_norm_on_lateral 87 | self.fp16_enabled = False 88 | self.upsample_cfg = upsample_cfg.copy() 89 | 90 | if end_level == -1: 91 | self.backbone_end_level = self.num_ins 92 | assert num_outs >= self.num_ins - start_level 93 | else: 94 | # if end_level < inputs, no extra level is allowed 95 | self.backbone_end_level = end_level 96 | assert end_level <= len(in_channels) 97 | assert num_outs == end_level - start_level 98 | self.start_level = start_level 99 | self.end_level = end_level 100 | self.add_extra_convs = add_extra_convs 101 | assert isinstance(add_extra_convs, (str, bool)) 102 | if isinstance(add_extra_convs, str): 103 | # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' 104 | assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') 105 | elif add_extra_convs: # True 106 | if extra_convs_on_inputs: 107 | # For compatibility with previous release 108 | # TODO: deprecate `extra_convs_on_inputs` 109 | self.add_extra_convs = 'on_input' 110 | else: 111 | self.add_extra_convs = 'on_output' 112 | 113 | self.lateral_convs = nn.ModuleList() 114 | self.fpn_convs = nn.ModuleList() 115 | 116 | for i in range(self.start_level, self.backbone_end_level): 117 | l_conv = ConvModule( 118 | in_channels[i], 119 | out_channels, 120 | 1, 121 | conv_cfg=conv_cfg, 122 | norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, 123 | act_cfg=act_cfg, 124 | inplace=False) 125 | fpn_conv = involution(out_channels, 7, 1) 126 | 127 | self.lateral_convs.append(l_conv) 128 | self.fpn_convs.append(fpn_conv) 129 | 130 | # add extra conv layers (e.g., RetinaNet) 131 | extra_levels = num_outs - self.backbone_end_level + self.start_level 132 | if self.add_extra_convs and extra_levels >= 1: 133 | for i in range(extra_levels): 134 | if i == 0 and self.add_extra_convs == 'on_input': 135 | in_channels = self.in_channels[self.backbone_end_level - 1] 136 | else: 137 | in_channels = out_channels 138 | extra_fpn_conv = ConvModule( 139 | in_channels, 140 | out_channels, 141 | 3, 142 | stride=2, 143 | padding=1, 144 | conv_cfg=conv_cfg, 145 | norm_cfg=norm_cfg, 146 | act_cfg=act_cfg, 147 | inplace=False) 148 | self.fpn_convs.append(extra_fpn_conv) 149 | 150 | # default init_weights for conv(msra) and norm in ConvModule 151 | def init_weights(self): 152 | """Initialize the weights of FPN module.""" 153 | for m in self.modules(): 154 | if isinstance(m, nn.Conv2d): 155 | xavier_init(m, distribution='uniform') 156 | 157 | @auto_fp16() 158 | def forward(self, inputs): 159 | """Forward function.""" 160 | assert len(inputs) == len(self.in_channels) 161 | 162 | # build laterals 163 | laterals = [ 164 | lateral_conv(inputs[i + self.start_level]) 165 | for i, lateral_conv in enumerate(self.lateral_convs) 166 | ] 167 | 168 | # build top-down path 169 | used_backbone_levels = len(laterals) 170 | for i in range(used_backbone_levels - 1, 0, -1): 171 | # In some cases, fixing `scale factor` (e.g. 2) is preferred, but 172 | # it cannot co-exist with `size` in `F.interpolate`. 173 | if 'scale_factor' in self.upsample_cfg: 174 | laterals[i - 1] += F.interpolate(laterals[i], 175 | **self.upsample_cfg) 176 | else: 177 | prev_shape = laterals[i - 1].shape[2:] 178 | laterals[i - 1] += F.interpolate( 179 | laterals[i], size=prev_shape, **self.upsample_cfg) 180 | 181 | # build outputs 182 | # part 1: from original levels 183 | outs = [ 184 | self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) 185 | ] 186 | # part 2: add extra levels 187 | if self.num_outs > len(outs): 188 | # use max pool to get more levels on top of outputs 189 | # (e.g., Faster R-CNN, Mask R-CNN) 190 | if not self.add_extra_convs: 191 | for i in range(self.num_outs - used_backbone_levels): 192 | outs.append(F.max_pool2d(outs[-1], 1, stride=2)) 193 | # add conv layers on top of original feature maps (RetinaNet) 194 | else: 195 | if self.add_extra_convs == 'on_input': 196 | extra_source = inputs[self.backbone_end_level - 1] 197 | elif self.add_extra_convs == 'on_lateral': 198 | extra_source = laterals[-1] 199 | elif self.add_extra_convs == 'on_output': 200 | extra_source = outs[-1] 201 | else: 202 | raise NotImplementedError 203 | outs.append(self.fpn_convs[used_backbone_levels](extra_source)) 204 | for i in range(used_backbone_levels + 1, self.num_outs): 205 | if self.relu_before_extra_convs: 206 | outs.append(self.fpn_convs[i](F.relu(outs[-1]))) 207 | else: 208 | outs.append(self.fpn_convs[i](outs[-1])) 209 | return tuple(outs) 210 | -------------------------------------------------------------------------------- /det/mmdet/models/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_roi_head import BaseRoIHead 2 | from .bbox_heads import (BBoxHead, ConvFCBBoxHead, DoubleConvFCBBoxHead, 3 | SCNetBBoxHead, Shared2FCBBoxHead, 4 | Shared4Conv1FCBBoxHead) 5 | from .cascade_roi_head import CascadeRoIHead 6 | from .double_roi_head import DoubleHeadRoIHead 7 | from .dynamic_roi_head import DynamicRoIHead 8 | from .grid_roi_head import GridRoIHead 9 | from .htc_roi_head import HybridTaskCascadeRoIHead 10 | from .mask_heads import (CoarseMaskHead, FCNMaskHead, FeatureRelayHead, 11 | FusedSemanticHead, GlobalContextHead, GridHead, 12 | HTCMaskHead, MaskIoUHead, MaskPointHead, 13 | SCNetMaskHead, SCNetSemanticHead, FCNMaskHead_involution) 14 | from .mask_scoring_roi_head import MaskScoringRoIHead 15 | from .pisa_roi_head import PISARoIHead 16 | from .point_rend_roi_head import PointRendRoIHead 17 | from .roi_extractors import SingleRoIExtractor 18 | from .scnet_roi_head import SCNetRoIHead 19 | from .shared_heads import ResLayer 20 | from .sparse_roi_head import SparseRoIHead 21 | from .standard_roi_head import StandardRoIHead 22 | from .trident_roi_head import TridentRoIHead 23 | 24 | __all__ = [ 25 | 'BaseRoIHead', 'CascadeRoIHead', 'DoubleHeadRoIHead', 'MaskScoringRoIHead', 26 | 'HybridTaskCascadeRoIHead', 'GridRoIHead', 'ResLayer', 'BBoxHead', 27 | 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 'StandardRoIHead', 28 | 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'FCNMaskHead', 29 | 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 'MaskIoUHead', 30 | 'SingleRoIExtractor', 'PISARoIHead', 'PointRendRoIHead', 'MaskPointHead', 31 | 'CoarseMaskHead', 'DynamicRoIHead', 'SparseRoIHead', 'TridentRoIHead', 32 | 'SCNetRoIHead', 'SCNetMaskHead', 'SCNetSemanticHead', 'SCNetBBoxHead', 33 | 'FeatureRelayHead', 'GlobalContextHead', 34 | 'FCNMaskHead_involution' 35 | ] 36 | -------------------------------------------------------------------------------- /det/mmdet/models/roi_heads/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .coarse_mask_head import CoarseMaskHead 2 | from .fcn_mask_head import FCNMaskHead 3 | from .fcn_mask_head_involution import FCNMaskHead_involution 4 | from .feature_relay_head import FeatureRelayHead 5 | from .fused_semantic_head import FusedSemanticHead 6 | from .global_context_head import GlobalContextHead 7 | from .grid_head import GridHead 8 | from .htc_mask_head import HTCMaskHead 9 | from .mask_point_head import MaskPointHead 10 | from .maskiou_head import MaskIoUHead 11 | from .scnet_mask_head import SCNetMaskHead 12 | from .scnet_semantic_head import SCNetSemanticHead 13 | 14 | __all__ = [ 15 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 16 | 'MaskIoUHead', 'CoarseMaskHead', 'MaskPointHead', 'SCNetMaskHead', 17 | 'SCNetSemanticHead', 'GlobalContextHead', 'FeatureRelayHead', 18 | 'FCNMaskHead_involution' 19 | ] 20 | -------------------------------------------------------------------------------- /det/mmdet/models/roi_heads/mask_heads/fcn_mask_head_involution.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from mmcv.cnn import Conv2d, ConvModule, build_upsample_layer 6 | from mmcv.ops.carafe import CARAFEPack 7 | from mmcv.runner import auto_fp16, force_fp32 8 | from torch.nn.modules.utils import _pair 9 | 10 | from mmdet.core import mask_target 11 | from mmdet.models.builder import HEADS, build_loss 12 | from mmdet.models.utils.involution_cuda import involution 13 | 14 | BYTES_PER_FLOAT = 4 15 | # TODO: This memory limit may be too much or too little. It would be better to 16 | # determine it based on available resources. 17 | GPU_MEM_LIMIT = 1024**3 # 1 GB memory limit 18 | 19 | 20 | @HEADS.register_module() 21 | class FCNMaskHead_involution(nn.Module): 22 | 23 | def __init__(self, 24 | num_convs=4, 25 | roi_feat_size=14, 26 | in_channels=256, 27 | conv_kernel_size=3, 28 | conv_out_channels=256, 29 | num_classes=80, 30 | class_agnostic=False, 31 | upsample_cfg=dict(type='deconv', scale_factor=2), 32 | conv_cfg=None, 33 | norm_cfg=None, 34 | loss_mask=dict( 35 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)): 36 | super(FCNMaskHead_involution, self).__init__() 37 | self.upsample_cfg = upsample_cfg.copy() 38 | if self.upsample_cfg['type'] not in [ 39 | None, 'deconv', 'nearest', 'bilinear', 'carafe' 40 | ]: 41 | raise ValueError( 42 | f'Invalid upsample method {self.upsample_cfg["type"]}, ' 43 | 'accepted methods are "deconv", "nearest", "bilinear", ' 44 | '"carafe"') 45 | self.num_convs = num_convs 46 | # WARN: roi_feat_size is reserved and not used 47 | self.roi_feat_size = _pair(roi_feat_size) 48 | self.in_channels = in_channels 49 | self.conv_kernel_size = conv_kernel_size 50 | self.conv_out_channels = conv_out_channels 51 | self.upsample_method = self.upsample_cfg.get('type') 52 | self.scale_factor = self.upsample_cfg.pop('scale_factor', None) 53 | self.num_classes = num_classes 54 | self.class_agnostic = class_agnostic 55 | self.conv_cfg = conv_cfg 56 | self.norm_cfg = norm_cfg 57 | self.fp16_enabled = False 58 | self.loss_mask = build_loss(loss_mask) 59 | 60 | self.convs = nn.ModuleList() 61 | for i in range(self.num_convs): 62 | in_channels = ( 63 | self.in_channels if i == 0 else self.conv_out_channels) 64 | padding = (self.conv_kernel_size - 1) // 2 65 | self.convs.append(nn.Sequential( 66 | involution(in_channels, 7, 1), 67 | nn.ReLU(inplace=True))) 68 | # ConvModule( 69 | # in_channels, 70 | # self.conv_out_channels, 71 | # self.conv_kernel_size, 72 | # padding=padding, 73 | # conv_cfg=conv_cfg, 74 | # norm_cfg=norm_cfg)) 75 | upsample_in_channels = ( 76 | self.conv_out_channels if self.num_convs > 0 else in_channels) 77 | upsample_cfg_ = self.upsample_cfg.copy() 78 | if self.upsample_method is None: 79 | self.upsample = None 80 | elif self.upsample_method == 'deconv': 81 | upsample_cfg_.update( 82 | in_channels=upsample_in_channels, 83 | out_channels=self.conv_out_channels, 84 | kernel_size=self.scale_factor, 85 | stride=self.scale_factor) 86 | self.upsample = build_upsample_layer(upsample_cfg_) 87 | elif self.upsample_method == 'carafe': 88 | upsample_cfg_.update( 89 | channels=upsample_in_channels, scale_factor=self.scale_factor) 90 | self.upsample = build_upsample_layer(upsample_cfg_) 91 | else: 92 | # suppress warnings 93 | align_corners = (None 94 | if self.upsample_method == 'nearest' else False) 95 | upsample_cfg_.update( 96 | scale_factor=self.scale_factor, 97 | mode=self.upsample_method, 98 | align_corners=align_corners) 99 | self.upsample = build_upsample_layer(upsample_cfg_) 100 | 101 | out_channels = 1 if self.class_agnostic else self.num_classes 102 | logits_in_channel = ( 103 | self.conv_out_channels 104 | if self.upsample_method == 'deconv' else upsample_in_channels) 105 | self.conv_logits = Conv2d(logits_in_channel, out_channels, 1) 106 | self.relu = nn.ReLU(inplace=True) 107 | self.debug_imgs = None 108 | 109 | def init_weights(self): 110 | for m in [self.upsample, self.conv_logits]: 111 | if m is None: 112 | continue 113 | elif isinstance(m, CARAFEPack): 114 | m.init_weights() 115 | else: 116 | nn.init.kaiming_normal_( 117 | m.weight, mode='fan_out', nonlinearity='relu') 118 | nn.init.constant_(m.bias, 0) 119 | 120 | @auto_fp16() 121 | def forward(self, x): 122 | for conv in self.convs: 123 | x = conv(x) 124 | if self.upsample is not None: 125 | x = self.upsample(x) 126 | if self.upsample_method == 'deconv': 127 | x = self.relu(x) 128 | mask_pred = self.conv_logits(x) 129 | return mask_pred 130 | 131 | def get_targets(self, sampling_results, gt_masks, rcnn_train_cfg): 132 | pos_proposals = [res.pos_bboxes for res in sampling_results] 133 | pos_assigned_gt_inds = [ 134 | res.pos_assigned_gt_inds for res in sampling_results 135 | ] 136 | mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds, 137 | gt_masks, rcnn_train_cfg) 138 | return mask_targets 139 | 140 | @force_fp32(apply_to=('mask_pred', )) 141 | def loss(self, mask_pred, mask_targets, labels): 142 | """ 143 | Example: 144 | >>> from mmdet.models.roi_heads.mask_heads.fcn_mask_head import * # NOQA 145 | >>> N = 7 # N = number of extracted ROIs 146 | >>> C, H, W = 11, 32, 32 147 | >>> # Create example instance of FCN Mask Head. 148 | >>> # There are lots of variations depending on the configuration 149 | >>> self = FCNMaskHead(num_classes=C, num_convs=1) 150 | >>> inputs = torch.rand(N, self.in_channels, H, W) 151 | >>> mask_pred = self.forward(inputs) 152 | >>> sf = self.scale_factor 153 | >>> labels = torch.randint(0, C, size=(N,)) 154 | >>> # With the default properties the mask targets should indicate 155 | >>> # a (potentially soft) single-class label 156 | >>> mask_targets = torch.rand(N, H * sf, W * sf) 157 | >>> loss = self.loss(mask_pred, mask_targets, labels) 158 | >>> print('loss = {!r}'.format(loss)) 159 | """ 160 | loss = dict() 161 | if mask_pred.size(0) == 0: 162 | loss_mask = mask_pred.sum() 163 | else: 164 | if self.class_agnostic: 165 | loss_mask = self.loss_mask(mask_pred, mask_targets, 166 | torch.zeros_like(labels)) 167 | else: 168 | loss_mask = self.loss_mask(mask_pred, mask_targets, labels) 169 | loss['loss_mask'] = loss_mask 170 | return loss 171 | 172 | def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, 173 | ori_shape, scale_factor, rescale): 174 | """Get segmentation masks from mask_pred and bboxes. 175 | 176 | Args: 177 | mask_pred (Tensor or ndarray): shape (n, #class, h, w). 178 | For single-scale testing, mask_pred is the direct output of 179 | model, whose type is Tensor, while for multi-scale testing, 180 | it will be converted to numpy array outside of this method. 181 | det_bboxes (Tensor): shape (n, 4/5) 182 | det_labels (Tensor): shape (n, ) 183 | rcnn_test_cfg (dict): rcnn testing config 184 | ori_shape (Tuple): original image height and width, shape (2,) 185 | scale_factor(float | Tensor): If ``rescale is True``, box 186 | coordinates are divided by this scale factor to fit 187 | ``ori_shape``. 188 | rescale (bool): If True, the resulting masks will be rescaled to 189 | ``ori_shape``. 190 | 191 | Returns: 192 | list[list]: encoded masks. The c-th item in the outer list 193 | corresponds to the c-th class. Given the c-th outer list, the 194 | i-th item in that inner list is the mask for the i-th box with 195 | class label c. 196 | 197 | Example: 198 | >>> import mmcv 199 | >>> from mmdet.models.roi_heads.mask_heads.fcn_mask_head import * # NOQA 200 | >>> N = 7 # N = number of extracted ROIs 201 | >>> C, H, W = 11, 32, 32 202 | >>> # Create example instance of FCN Mask Head. 203 | >>> self = FCNMaskHead(num_classes=C, num_convs=0) 204 | >>> inputs = torch.rand(N, self.in_channels, H, W) 205 | >>> mask_pred = self.forward(inputs) 206 | >>> # Each input is associated with some bounding box 207 | >>> det_bboxes = torch.Tensor([[1, 1, 42, 42 ]] * N) 208 | >>> det_labels = torch.randint(0, C, size=(N,)) 209 | >>> rcnn_test_cfg = mmcv.Config({'mask_thr_binary': 0, }) 210 | >>> ori_shape = (H * 4, W * 4) 211 | >>> scale_factor = torch.FloatTensor((1, 1)) 212 | >>> rescale = False 213 | >>> # Encoded masks are a list for each category. 214 | >>> encoded_masks = self.get_seg_masks( 215 | >>> mask_pred, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, 216 | >>> scale_factor, rescale 217 | >>> ) 218 | >>> assert len(encoded_masks) == C 219 | >>> assert sum(list(map(len, encoded_masks))) == N 220 | """ 221 | if isinstance(mask_pred, torch.Tensor): 222 | mask_pred = mask_pred.sigmoid() 223 | else: 224 | mask_pred = det_bboxes.new_tensor(mask_pred) 225 | 226 | device = mask_pred.device 227 | cls_segms = [[] for _ in range(self.num_classes) 228 | ] # BG is not included in num_classes 229 | bboxes = det_bboxes[:, :4] 230 | labels = det_labels 231 | 232 | if rescale: 233 | img_h, img_w = ori_shape[:2] 234 | else: 235 | if isinstance(scale_factor, float): 236 | img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32) 237 | img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32) 238 | else: 239 | w_scale, h_scale = scale_factor[0], scale_factor[1] 240 | img_h = np.round(ori_shape[0] * h_scale.item()).astype( 241 | np.int32) 242 | img_w = np.round(ori_shape[1] * w_scale.item()).astype( 243 | np.int32) 244 | scale_factor = 1.0 245 | 246 | if not isinstance(scale_factor, (float, torch.Tensor)): 247 | scale_factor = bboxes.new_tensor(scale_factor) 248 | bboxes = bboxes / scale_factor 249 | 250 | if torch.onnx.is_in_onnx_export(): 251 | # TODO: Remove after F.grid_sample is supported. 252 | from torchvision.models.detection.roi_heads \ 253 | import paste_masks_in_image 254 | masks = paste_masks_in_image(mask_pred, bboxes, ori_shape[:2]) 255 | thr = rcnn_test_cfg.get('mask_thr_binary', 0) 256 | if thr > 0: 257 | masks = masks >= thr 258 | return masks 259 | 260 | N = len(mask_pred) 261 | # The actual implementation split the input into chunks, 262 | # and paste them chunk by chunk. 263 | if device.type == 'cpu': 264 | # CPU is most efficient when they are pasted one by one with 265 | # skip_empty=True, so that it performs minimal number of 266 | # operations. 267 | num_chunks = N 268 | else: 269 | # GPU benefits from parallelism for larger chunks, 270 | # but may have memory issue 271 | num_chunks = int( 272 | np.ceil(N * img_h * img_w * BYTES_PER_FLOAT / GPU_MEM_LIMIT)) 273 | assert (num_chunks <= 274 | N), 'Default GPU_MEM_LIMIT is too small; try increasing it' 275 | chunks = torch.chunk(torch.arange(N, device=device), num_chunks) 276 | 277 | threshold = rcnn_test_cfg.mask_thr_binary 278 | im_mask = torch.zeros( 279 | N, 280 | img_h, 281 | img_w, 282 | device=device, 283 | dtype=torch.bool if threshold >= 0 else torch.uint8) 284 | 285 | if not self.class_agnostic: 286 | mask_pred = mask_pred[range(N), labels][:, None] 287 | 288 | for inds in chunks: 289 | masks_chunk, spatial_inds = _do_paste_mask( 290 | mask_pred[inds], 291 | bboxes[inds], 292 | img_h, 293 | img_w, 294 | skip_empty=device.type == 'cpu') 295 | 296 | if threshold >= 0: 297 | masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool) 298 | else: 299 | # for visualization and debugging 300 | masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8) 301 | 302 | im_mask[(inds, ) + spatial_inds] = masks_chunk 303 | 304 | for i in range(N): 305 | cls_segms[labels[i]].append(im_mask[i].detach().cpu().numpy()) 306 | return cls_segms 307 | 308 | 309 | def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True): 310 | """Paste instance masks acoording to boxes. 311 | 312 | This implementation is modified from 313 | https://github.com/facebookresearch/detectron2/ 314 | 315 | Args: 316 | masks (Tensor): N, 1, H, W 317 | boxes (Tensor): N, 4 318 | img_h (int): Height of the image to be pasted. 319 | img_w (int): Width of the image to be pasted. 320 | skip_empty (bool): Only paste masks within the region that 321 | tightly bound all boxes, and returns the results this region only. 322 | An important optimization for CPU. 323 | 324 | Returns: 325 | tuple: (Tensor, tuple). The first item is mask tensor, the second one 326 | is the slice object. 327 | If skip_empty == False, the whole image will be pasted. It will 328 | return a mask of shape (N, img_h, img_w) and an empty tuple. 329 | If skip_empty == True, only area around the mask will be pasted. 330 | A mask of shape (N, h', w') and its start and end coordinates 331 | in the original image will be returned. 332 | """ 333 | # On GPU, paste all masks together (up to chunk size) 334 | # by using the entire image to sample the masks 335 | # Compared to pasting them one by one, 336 | # this has more operations but is faster on COCO-scale dataset. 337 | device = masks.device 338 | if skip_empty: 339 | x0_int, y0_int = torch.clamp( 340 | boxes.min(dim=0).values.floor()[:2] - 1, 341 | min=0).to(dtype=torch.int32) 342 | x1_int = torch.clamp( 343 | boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32) 344 | y1_int = torch.clamp( 345 | boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32) 346 | else: 347 | x0_int, y0_int = 0, 0 348 | x1_int, y1_int = img_w, img_h 349 | x0, y0, x1, y1 = torch.split(boxes, 1, dim=1) # each is Nx1 350 | 351 | N = masks.shape[0] 352 | 353 | img_y = torch.arange( 354 | y0_int, y1_int, device=device, dtype=torch.float32) + 0.5 355 | img_x = torch.arange( 356 | x0_int, x1_int, device=device, dtype=torch.float32) + 0.5 357 | img_y = (img_y - y0) / (y1 - y0) * 2 - 1 358 | img_x = (img_x - x0) / (x1 - x0) * 2 - 1 359 | # img_x, img_y have shapes (N, w), (N, h) 360 | if torch.isinf(img_x).any(): 361 | inds = torch.where(torch.isinf(img_x)) 362 | img_x[inds] = 0 363 | if torch.isinf(img_y).any(): 364 | inds = torch.where(torch.isinf(img_y)) 365 | img_y[inds] = 0 366 | 367 | gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1)) 368 | gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1)) 369 | grid = torch.stack([gx, gy], dim=3) 370 | 371 | if torch.onnx.is_in_onnx_export(): 372 | raise RuntimeError( 373 | 'Exporting F.grid_sample from Pytorch to ONNX is not supported.') 374 | img_masks = F.grid_sample( 375 | masks.to(dtype=torch.float32), grid, align_corners=False) 376 | 377 | if skip_empty: 378 | return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int)) 379 | else: 380 | return img_masks[:, 0], () 381 | -------------------------------------------------------------------------------- /det/mmdet/models/utils/involution_cuda.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | import torch 3 | from torch.nn.modules.utils import _pair 4 | import torch.nn.functional as F 5 | import torch.nn as nn 6 | from mmcv.cnn import ConvModule 7 | 8 | 9 | from collections import namedtuple 10 | import cupy 11 | from string import Template 12 | 13 | 14 | Stream = namedtuple('Stream', ['ptr']) 15 | 16 | 17 | def Dtype(t): 18 | if isinstance(t, torch.cuda.FloatTensor): 19 | return 'float' 20 | elif isinstance(t, torch.cuda.DoubleTensor): 21 | return 'double' 22 | 23 | 24 | @cupy._util.memoize(for_each_device=True) 25 | def load_kernel(kernel_name, code, **kwargs): 26 | code = Template(code).substitute(**kwargs) 27 | kernel_code = cupy.cuda.compile_with_cache(code) 28 | return kernel_code.get_function(kernel_name) 29 | 30 | 31 | CUDA_NUM_THREADS = 1024 32 | 33 | kernel_loop = ''' 34 | #define CUDA_KERNEL_LOOP(i, n) \ 35 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 36 | i < (n); \ 37 | i += blockDim.x * gridDim.x) 38 | ''' 39 | 40 | 41 | def GET_BLOCKS(N): 42 | return (N + CUDA_NUM_THREADS - 1) // CUDA_NUM_THREADS 43 | 44 | 45 | _involution_kernel = kernel_loop + ''' 46 | extern "C" 47 | __global__ void involution_forward_kernel( 48 | const ${Dtype}* bottom_data, const ${Dtype}* weight_data, ${Dtype}* top_data) { 49 | CUDA_KERNEL_LOOP(index, ${nthreads}) { 50 | const int n = index / ${channels} / ${top_height} / ${top_width}; 51 | const int c = (index / ${top_height} / ${top_width}) % ${channels}; 52 | const int h = (index / ${top_width}) % ${top_height}; 53 | const int w = index % ${top_width}; 54 | const int g = c / (${channels} / ${groups}); 55 | ${Dtype} value = 0; 56 | #pragma unroll 57 | for (int kh = 0; kh < ${kernel_h}; ++kh) { 58 | #pragma unroll 59 | for (int kw = 0; kw < ${kernel_w}; ++kw) { 60 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h}; 61 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w}; 62 | if ((h_in >= 0) && (h_in < ${bottom_height}) 63 | && (w_in >= 0) && (w_in < ${bottom_width})) { 64 | const int offset = ((n * ${channels} + c) * ${bottom_height} + h_in) 65 | * ${bottom_width} + w_in; 66 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h) 67 | * ${top_width} + w; 68 | value += weight_data[offset_weight] * bottom_data[offset]; 69 | } 70 | } 71 | } 72 | top_data[index] = value; 73 | } 74 | } 75 | ''' 76 | 77 | 78 | _involution_kernel_backward_grad_input = kernel_loop + ''' 79 | extern "C" 80 | __global__ void involution_backward_grad_input_kernel( 81 | const ${Dtype}* const top_diff, const ${Dtype}* const weight_data, ${Dtype}* const bottom_diff) { 82 | CUDA_KERNEL_LOOP(index, ${nthreads}) { 83 | const int n = index / ${channels} / ${bottom_height} / ${bottom_width}; 84 | const int c = (index / ${bottom_height} / ${bottom_width}) % ${channels}; 85 | const int h = (index / ${bottom_width}) % ${bottom_height}; 86 | const int w = index % ${bottom_width}; 87 | const int g = c / (${channels} / ${groups}); 88 | ${Dtype} value = 0; 89 | #pragma unroll 90 | for (int kh = 0; kh < ${kernel_h}; ++kh) { 91 | #pragma unroll 92 | for (int kw = 0; kw < ${kernel_w}; ++kw) { 93 | const int h_out_s = h + ${pad_h} - kh * ${dilation_h}; 94 | const int w_out_s = w + ${pad_w} - kw * ${dilation_w}; 95 | if (((h_out_s % ${stride_h}) == 0) && ((w_out_s % ${stride_w}) == 0)) { 96 | const int h_out = h_out_s / ${stride_h}; 97 | const int w_out = w_out_s / ${stride_w}; 98 | if ((h_out >= 0) && (h_out < ${top_height}) 99 | && (w_out >= 0) && (w_out < ${top_width})) { 100 | const int offset = ((n * ${channels} + c) * ${top_height} + h_out) 101 | * ${top_width} + w_out; 102 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h_out) 103 | * ${top_width} + w_out; 104 | value += weight_data[offset_weight] * top_diff[offset]; 105 | } 106 | } 107 | } 108 | } 109 | bottom_diff[index] = value; 110 | } 111 | } 112 | ''' 113 | 114 | 115 | _involution_kernel_backward_grad_weight = kernel_loop + ''' 116 | extern "C" 117 | __global__ void involution_backward_grad_weight_kernel( 118 | const ${Dtype}* const top_diff, const ${Dtype}* const bottom_data, ${Dtype}* const buffer_data) { 119 | CUDA_KERNEL_LOOP(index, ${nthreads}) { 120 | const int h = (index / ${top_width}) % ${top_height}; 121 | const int w = index % ${top_width}; 122 | const int kh = (index / ${kernel_w} / ${top_height} / ${top_width}) 123 | % ${kernel_h}; 124 | const int kw = (index / ${top_height} / ${top_width}) % ${kernel_w}; 125 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h}; 126 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w}; 127 | if ((h_in >= 0) && (h_in < ${bottom_height}) 128 | && (w_in >= 0) && (w_in < ${bottom_width})) { 129 | const int g = (index / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${groups}; 130 | const int n = (index / ${groups} / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${num}; 131 | ${Dtype} value = 0; 132 | #pragma unroll 133 | for (int c = g * (${channels} / ${groups}); c < (g + 1) * (${channels} / ${groups}); ++c) { 134 | const int top_offset = ((n * ${channels} + c) * ${top_height} + h) 135 | * ${top_width} + w; 136 | const int bottom_offset = ((n * ${channels} + c) * ${bottom_height} + h_in) 137 | * ${bottom_width} + w_in; 138 | value += top_diff[top_offset] * bottom_data[bottom_offset]; 139 | } 140 | buffer_data[index] = value; 141 | } else { 142 | buffer_data[index] = 0; 143 | } 144 | } 145 | } 146 | ''' 147 | 148 | 149 | class _involution(Function): 150 | @staticmethod 151 | def forward(ctx, input, weight, stride, padding, dilation): 152 | assert input.dim() == 4 and input.is_cuda 153 | assert weight.dim() == 6 and weight.is_cuda 154 | batch_size, channels, height, width = input.size() 155 | kernel_h, kernel_w = weight.size()[2:4] 156 | output_h = int((height + 2 * padding[0] - (dilation[0] * (kernel_h - 1) + 1)) / stride[0] + 1) 157 | output_w = int((width + 2 * padding[1] - (dilation[1] * (kernel_w - 1) + 1)) / stride[1] + 1) 158 | 159 | output = input.new(batch_size, channels, output_h, output_w) 160 | n = output.numel() 161 | 162 | with torch.cuda.device_of(input): 163 | f = load_kernel('involution_forward_kernel', _involution_kernel, Dtype=Dtype(input), nthreads=n, 164 | num=batch_size, channels=channels, groups=weight.size()[1], 165 | bottom_height=height, bottom_width=width, 166 | top_height=output_h, top_width=output_w, 167 | kernel_h=kernel_h, kernel_w=kernel_w, 168 | stride_h=stride[0], stride_w=stride[1], 169 | dilation_h=dilation[0], dilation_w=dilation[1], 170 | pad_h=padding[0], pad_w=padding[1]) 171 | f(block=(CUDA_NUM_THREADS,1,1), 172 | grid=(GET_BLOCKS(n),1,1), 173 | args=[input.data_ptr(), weight.data_ptr(), output.data_ptr()], 174 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) 175 | 176 | ctx.save_for_backward(input, weight) 177 | ctx.stride, ctx.padding, ctx.dilation = stride, padding, dilation 178 | return output 179 | 180 | @staticmethod 181 | def backward(ctx, grad_output): 182 | assert grad_output.is_cuda and grad_output.is_contiguous() 183 | input, weight = ctx.saved_tensors 184 | stride, padding, dilation = ctx.stride, ctx.padding, ctx.dilation 185 | 186 | batch_size, channels, height, width = input.size() 187 | kernel_h, kernel_w = weight.size()[2:4] 188 | output_h, output_w = grad_output.size()[2:] 189 | 190 | grad_input, grad_weight = None, None 191 | 192 | opt = dict(Dtype=Dtype(grad_output), 193 | num=batch_size, channels=channels, groups=weight.size()[1], 194 | bottom_height=height, bottom_width=width, 195 | top_height=output_h, top_width=output_w, 196 | kernel_h=kernel_h, kernel_w=kernel_w, 197 | stride_h=stride[0], stride_w=stride[1], 198 | dilation_h=dilation[0], dilation_w=dilation[1], 199 | pad_h=padding[0], pad_w=padding[1]) 200 | 201 | with torch.cuda.device_of(input): 202 | if ctx.needs_input_grad[0]: 203 | grad_input = input.new(input.size()) 204 | 205 | n = grad_input.numel() 206 | opt['nthreads'] = n 207 | 208 | f = load_kernel('involution_backward_grad_input_kernel', 209 | _involution_kernel_backward_grad_input, **opt) 210 | f(block=(CUDA_NUM_THREADS,1,1), 211 | grid=(GET_BLOCKS(n),1,1), 212 | args=[grad_output.data_ptr(), weight.data_ptr(), grad_input.data_ptr()], 213 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) 214 | 215 | if ctx.needs_input_grad[1]: 216 | grad_weight = weight.new(weight.size()) 217 | 218 | n = grad_weight.numel() 219 | opt['nthreads'] = n 220 | 221 | f = load_kernel('involution_backward_grad_weight_kernel', 222 | _involution_kernel_backward_grad_weight, **opt) 223 | f(block=(CUDA_NUM_THREADS,1,1), 224 | grid=(GET_BLOCKS(n),1,1), 225 | args=[grad_output.data_ptr(), input.data_ptr(), grad_weight.data_ptr()], 226 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) 227 | 228 | return grad_input, grad_weight, None, None, None 229 | 230 | 231 | def _involution_cuda(input, weight, bias=None, stride=1, padding=0, dilation=1): 232 | """ involution kernel 233 | """ 234 | assert input.size(0) == weight.size(0) 235 | assert input.size(-2)//stride == weight.size(-2) 236 | assert input.size(-1)//stride == weight.size(-1) 237 | if input.is_cuda: 238 | out = _involution.apply(input, weight, _pair(stride), _pair(padding), _pair(dilation)) 239 | if bias is not None: 240 | out += bias.view(1,-1,1,1) 241 | else: 242 | raise NotImplementedError 243 | return out 244 | 245 | 246 | class involution(nn.Module): 247 | 248 | def __init__(self, 249 | channels, 250 | kernel_size, 251 | stride): 252 | super(involution, self).__init__() 253 | self.kernel_size = kernel_size 254 | self.stride = stride 255 | self.channels = channels 256 | reduction_ratio = 4 257 | self.group_channels = 16 258 | self.groups = self.channels // self.group_channels 259 | self.conv1 = ConvModule( 260 | in_channels=channels, 261 | out_channels=channels // reduction_ratio, 262 | kernel_size=1, 263 | conv_cfg=None, 264 | norm_cfg=dict(type='BN'), 265 | act_cfg=dict(type='ReLU')) 266 | self.conv2 = ConvModule( 267 | in_channels=channels // reduction_ratio, 268 | out_channels=kernel_size**2 * self.groups, 269 | kernel_size=1, 270 | stride=1, 271 | conv_cfg=None, 272 | norm_cfg=None, 273 | act_cfg=None) 274 | if stride > 1: 275 | self.avgpool = nn.AvgPool2d(stride, stride) 276 | 277 | def forward(self, x): 278 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x))) 279 | b, c, h, w = weight.shape 280 | weight = weight.view(b, self.groups, self.kernel_size, self.kernel_size, h, w) 281 | out = _involution_cuda(x, weight, stride=self.stride, padding=(self.kernel_size-1)//2) 282 | return out 283 | -------------------------------------------------------------------------------- /det/mmdet/models/utils/involution_naive.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import ConvModule 3 | 4 | 5 | class involution(nn.Module): 6 | 7 | def __init__(self, 8 | channels, 9 | kernel_size, 10 | stride): 11 | super(involution, self).__init__() 12 | self.kernel_size = kernel_size 13 | self.stride = stride 14 | self.channels = channels 15 | reduction_ratio = 4 16 | self.group_channels = 16 17 | self.groups = self.channels // self.group_channels 18 | self.conv1 = ConvModule( 19 | in_channels=channels, 20 | out_channels=channels // reduction_ratio, 21 | kernel_size=1, 22 | conv_cfg=None, 23 | norm_cfg=dict(type='BN'), 24 | act_cfg=dict(type='ReLU')) 25 | self.conv2 = ConvModule( 26 | in_channels=channels // reduction_ratio, 27 | out_channels=kernel_size**2 * self.groups, 28 | kernel_size=1, 29 | stride=1, 30 | conv_cfg=None, 31 | norm_cfg=None, 32 | act_cfg=None) 33 | if stride > 1: 34 | self.avgpool = nn.AvgPool2d(stride, stride) 35 | self.unfold = nn.Unfold(kernel_size, 1, (kernel_size-1)//2, stride) 36 | 37 | def forward(self, x): 38 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x))) 39 | b, c, h, w = weight.shape 40 | weight = weight.view(b, self.groups, self.kernel_size**2, h, w).unsqueeze(2) 41 | out = self.unfold(x).view(b, self.groups, self.group_channels, self.kernel_size**2, h, w) 42 | out = (weight * out).sum(dim=3).view(b, self.channels, h, w) 43 | return out 44 | -------------------------------------------------------------------------------- /fig/complexity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d-li14/involution/21c3158fcbb4ecda8ed4626fcae8b01be511a598/fig/complexity.png -------------------------------------------------------------------------------- /fig/involution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d-li14/involution/21c3158fcbb4ecda8ed4626fcae8b01be511a598/fig/involution.png -------------------------------------------------------------------------------- /fig/parameter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/d-li14/involution/21c3158fcbb4ecda8ed4626fcae8b01be511a598/fig/parameter.png -------------------------------------------------------------------------------- /seg/configs/_base_/models/fpn_red50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | #pretrained='open-mmlab://resnet50_v1c', 6 | pretrained='/path/to/rednet50.pth', 7 | backbone=dict( 8 | type='RedNet', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | #dilations=(1, 1, 1, 1), 13 | #strides=(1, 2, 2, 2), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch'), 17 | #contract_dilation=True), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[256, 512, 1024, 2048], 21 | out_channels=256, 22 | num_outs=4), 23 | decode_head=dict( 24 | type='FPNHead', 25 | in_channels=[256, 256, 256, 256], 26 | in_index=[0, 1, 2, 3], 27 | feature_strides=[4, 8, 16, 32], 28 | channels=128, 29 | dropout_ratio=0.1, 30 | num_classes=19, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict(), 37 | test_cfg=dict(mode='whole')) 38 | -------------------------------------------------------------------------------- /seg/configs/_base_/models/fpn_red50_neck.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | #pretrained='open-mmlab://resnet50_v1c', 6 | pretrained='/path/to/rednet50.pth', 7 | backbone=dict( 8 | type='RedNet', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | #dilations=(1, 1, 1, 1), 13 | #strides=(1, 2, 2, 2), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch'), 17 | #contract_dilation=True), 18 | neck=dict( 19 | type='FPN_involution', 20 | in_channels=[256, 512, 1024, 2048], 21 | out_channels=256, 22 | num_outs=4), 23 | decode_head=dict( 24 | type='FPNHead', 25 | in_channels=[256, 256, 256, 256], 26 | in_index=[0, 1, 2, 3], 27 | feature_strides=[4, 8, 16, 32], 28 | channels=128, 29 | dropout_ratio=0.1, 30 | num_classes=19, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict(), 37 | test_cfg=dict(mode='whole')) 38 | -------------------------------------------------------------------------------- /seg/configs/_base_/models/upernet_red50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | #pretrained='open-mmlab://resnet50_v1c', 6 | pretrained='/path/to/rednet50.pth', 7 | backbone=dict( 8 | type='RedNet', 9 | depth=50, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | #dilations=(1, 1, 1, 1), 13 | #strides=(1, 2, 2, 2), 14 | norm_cfg=norm_cfg, 15 | norm_eval=False, 16 | style='pytorch'), 17 | #contract_dilation=True), 18 | decode_head=dict( 19 | type='UPerHead', 20 | in_channels=[256, 512, 1024, 2048], 21 | in_index=[0, 1, 2, 3], 22 | pool_scales=(1, 2, 3, 6), 23 | channels=512, 24 | dropout_ratio=0.1, 25 | num_classes=19, 26 | norm_cfg=norm_cfg, 27 | align_corners=False, 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 30 | auxiliary_head=dict( 31 | type='FCNHead', 32 | in_channels=1024, 33 | in_index=2, 34 | channels=256, 35 | num_convs=1, 36 | concat_input=False, 37 | dropout_ratio=0.1, 38 | num_classes=19, 39 | norm_cfg=norm_cfg, 40 | align_corners=False, 41 | loss_decode=dict( 42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), 43 | # model training and testing settings 44 | train_cfg=dict(), 45 | test_cfg=dict(mode='whole')) 46 | -------------------------------------------------------------------------------- /seg/configs/involution/fpn_red50_512x1024_80k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/fpn_red50.py', '../_base_/datasets/cityscapes.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | -------------------------------------------------------------------------------- /seg/configs/involution/fpn_red50_neck_512x1024_80k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/fpn_red50_neck.py', '../_base_/datasets/cityscapes.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | -------------------------------------------------------------------------------- /seg/configs/involution/upernet_red50_512x1024_80k_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/upernet_red50.py', '../_base_/datasets/cityscapes.py', 3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' 4 | ] 5 | -------------------------------------------------------------------------------- /seg/mmseg/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .cgnet import CGNet 2 | from .fast_scnn import FastSCNN 3 | from .hrnet import HRNet 4 | from .mobilenet_v2 import MobileNetV2 5 | from .mobilenet_v3 import MobileNetV3 6 | from .resnest import ResNeSt 7 | from .resnet import ResNet, ResNetV1c, ResNetV1d 8 | from .resnext import ResNeXt 9 | from .unet import UNet 10 | from .rednet import RedNet 11 | 12 | __all__ = [ 13 | 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN', 14 | 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', 15 | 'RedNet' 16 | ] 17 | -------------------------------------------------------------------------------- /seg/mmseg/models/backbones/base_backbone.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABCMeta, abstractmethod 3 | 4 | import torch.nn as nn 5 | from mmcv.runner import load_checkpoint 6 | 7 | 8 | class BaseBackbone(nn.Module, metaclass=ABCMeta): 9 | """Base backbone. 10 | 11 | This class defines the basic functions of a backbone. 12 | Any backbone that inherits this class should at least 13 | define its own `forward` function. 14 | 15 | """ 16 | 17 | def __init__(self): 18 | super(BaseBackbone, self).__init__() 19 | 20 | def init_weights(self, pretrained=None): 21 | """Init backbone weights 22 | 23 | Args: 24 | pretrained (str | None): If pretrained is a string, then it 25 | initializes backbone weights by loading the pretrained 26 | checkpoint. If pretrained is None, then it follows default 27 | initializer or customized initializer in subclasses. 28 | """ 29 | if isinstance(pretrained, str): 30 | logger = logging.getLogger() 31 | load_checkpoint(self, pretrained, strict=False, logger=logger) 32 | elif pretrained is None: 33 | # use default initializer or customized initializer in subclasses 34 | pass 35 | else: 36 | raise TypeError('pretrained must be a str or None.' 37 | f' But received {type(pretrained)}.') 38 | 39 | @abstractmethod 40 | def forward(self, x): 41 | """Forward computation 42 | 43 | Args: 44 | x (tensor | tuple[tensor]): x could be a Torch.tensor or a tuple of 45 | Torch.tensor, containing input data for forward computation. 46 | """ 47 | pass 48 | 49 | def train(self, mode=True): 50 | """Set module status before forward computation 51 | 52 | Args: 53 | mode (bool): Whether it is train_mode or test_mode 54 | """ 55 | super(BaseBackbone, self).train(mode) 56 | -------------------------------------------------------------------------------- /seg/mmseg/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | from .fpn_involution import FPN_involution 3 | 4 | __all__ = ['FPN', 'FPN_involution'] 5 | -------------------------------------------------------------------------------- /seg/mmseg/models/necks/fpn_involution.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import ConvModule, xavier_init 4 | 5 | from ..builder import NECKS 6 | from ..utils.involution_cuda import involution 7 | 8 | 9 | @NECKS.register_module() 10 | class FPN_involution(nn.Module): 11 | """Feature Pyramid Network. 12 | 13 | This is an implementation of - Feature Pyramid Networks for Object 14 | Detection (https://arxiv.org/abs/1612.03144) 15 | 16 | Args: 17 | in_channels (List[int]): Number of input channels per scale. 18 | out_channels (int): Number of output channels (used at each scale) 19 | num_outs (int): Number of output scales. 20 | start_level (int): Index of the start input backbone level used to 21 | build the feature pyramid. Default: 0. 22 | end_level (int): Index of the end input backbone level (exclusive) to 23 | build the feature pyramid. Default: -1, which means the last level. 24 | add_extra_convs (bool | str): If bool, it decides whether to add conv 25 | layers on top of the original feature maps. Default to False. 26 | If True, its actual mode is specified by `extra_convs_on_inputs`. 27 | If str, it specifies the source feature map of the extra convs. 28 | Only the following options are allowed 29 | 30 | - 'on_input': Last feat map of neck inputs (i.e. backbone feature). 31 | - 'on_lateral': Last feature map after lateral convs. 32 | - 'on_output': The last output feature map after fpn convs. 33 | extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs 34 | on the original feature from the backbone. If True, 35 | it is equivalent to `add_extra_convs='on_input'`. If False, it is 36 | equivalent to set `add_extra_convs='on_output'`. Default to True. 37 | relu_before_extra_convs (bool): Whether to apply relu before the extra 38 | conv. Default: False. 39 | no_norm_on_lateral (bool): Whether to apply norm on lateral. 40 | Default: False. 41 | conv_cfg (dict): Config dict for convolution layer. Default: None. 42 | norm_cfg (dict): Config dict for normalization layer. Default: None. 43 | act_cfg (str): Config dict for activation layer in ConvModule. 44 | Default: None. 45 | upsample_cfg (dict): Config dict for interpolate layer. 46 | Default: `dict(mode='nearest')` 47 | 48 | Example: 49 | >>> import torch 50 | >>> in_channels = [2, 3, 5, 7] 51 | >>> scales = [340, 170, 84, 43] 52 | >>> inputs = [torch.rand(1, c, s, s) 53 | ... for c, s in zip(in_channels, scales)] 54 | >>> self = FPN(in_channels, 11, len(in_channels)).eval() 55 | >>> outputs = self.forward(inputs) 56 | >>> for i in range(len(outputs)): 57 | ... print(f'outputs[{i}].shape = {outputs[i].shape}') 58 | outputs[0].shape = torch.Size([1, 11, 340, 340]) 59 | outputs[1].shape = torch.Size([1, 11, 170, 170]) 60 | outputs[2].shape = torch.Size([1, 11, 84, 84]) 61 | outputs[3].shape = torch.Size([1, 11, 43, 43]) 62 | """ 63 | 64 | def __init__(self, 65 | in_channels, 66 | out_channels, 67 | num_outs, 68 | start_level=0, 69 | end_level=-1, 70 | add_extra_convs=False, 71 | extra_convs_on_inputs=False, 72 | relu_before_extra_convs=False, 73 | no_norm_on_lateral=False, 74 | conv_cfg=None, 75 | norm_cfg=None, 76 | act_cfg=None, 77 | upsample_cfg=dict(mode='nearest')): 78 | super(FPN_involution, self).__init__() 79 | assert isinstance(in_channels, list) 80 | self.in_channels = in_channels 81 | self.out_channels = out_channels 82 | self.num_ins = len(in_channels) 83 | self.num_outs = num_outs 84 | self.relu_before_extra_convs = relu_before_extra_convs 85 | self.no_norm_on_lateral = no_norm_on_lateral 86 | self.fp16_enabled = False 87 | self.upsample_cfg = upsample_cfg.copy() 88 | 89 | if end_level == -1: 90 | self.backbone_end_level = self.num_ins 91 | assert num_outs >= self.num_ins - start_level 92 | else: 93 | # if end_level < inputs, no extra level is allowed 94 | self.backbone_end_level = end_level 95 | assert end_level <= len(in_channels) 96 | assert num_outs == end_level - start_level 97 | self.start_level = start_level 98 | self.end_level = end_level 99 | self.add_extra_convs = add_extra_convs 100 | assert isinstance(add_extra_convs, (str, bool)) 101 | if isinstance(add_extra_convs, str): 102 | # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' 103 | assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') 104 | elif add_extra_convs: # True 105 | if extra_convs_on_inputs: 106 | # For compatibility with previous release 107 | # TODO: deprecate `extra_convs_on_inputs` 108 | self.add_extra_convs = 'on_input' 109 | else: 110 | self.add_extra_convs = 'on_output' 111 | 112 | self.lateral_convs = nn.ModuleList() 113 | self.fpn_convs = nn.ModuleList() 114 | 115 | for i in range(self.start_level, self.backbone_end_level): 116 | l_conv = ConvModule( 117 | in_channels[i], 118 | out_channels, 119 | 1, 120 | conv_cfg=conv_cfg, 121 | norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, 122 | act_cfg=act_cfg, 123 | inplace=False) 124 | fpn_conv = involution(out_channels, 7, 1) 125 | #ConvModule( 126 | #out_channels, 127 | #out_channels, 128 | #3, 129 | #padding=1, 130 | #conv_cfg=conv_cfg, 131 | #norm_cfg=norm_cfg, 132 | #act_cfg=act_cfg, 133 | #inplace=False) 134 | 135 | self.lateral_convs.append(l_conv) 136 | self.fpn_convs.append(fpn_conv) 137 | 138 | # add extra conv layers (e.g., RetinaNet) 139 | extra_levels = num_outs - self.backbone_end_level + self.start_level 140 | if self.add_extra_convs and extra_levels >= 1: 141 | for i in range(extra_levels): 142 | if i == 0 and self.add_extra_convs == 'on_input': 143 | in_channels = self.in_channels[self.backbone_end_level - 1] 144 | else: 145 | in_channels = out_channels 146 | extra_fpn_conv = ConvModule( 147 | in_channels, 148 | out_channels, 149 | 3, 150 | stride=2, 151 | padding=1, 152 | conv_cfg=conv_cfg, 153 | norm_cfg=norm_cfg, 154 | act_cfg=act_cfg, 155 | inplace=False) 156 | self.fpn_convs.append(extra_fpn_conv) 157 | 158 | # default init_weights for conv(msra) and norm in ConvModule 159 | def init_weights(self): 160 | for m in self.modules(): 161 | if isinstance(m, nn.Conv2d): 162 | xavier_init(m, distribution='uniform') 163 | 164 | def forward(self, inputs): 165 | assert len(inputs) == len(self.in_channels) 166 | 167 | # build laterals 168 | laterals = [ 169 | lateral_conv(inputs[i + self.start_level]) 170 | for i, lateral_conv in enumerate(self.lateral_convs) 171 | ] 172 | 173 | # build top-down path 174 | used_backbone_levels = len(laterals) 175 | for i in range(used_backbone_levels - 1, 0, -1): 176 | # In some cases, fixing `scale factor` (e.g. 2) is preferred, but 177 | # it cannot co-exist with `size` in `F.interpolate`. 178 | if 'scale_factor' in self.upsample_cfg: 179 | laterals[i - 1] += F.interpolate(laterals[i], 180 | **self.upsample_cfg) 181 | else: 182 | prev_shape = laterals[i - 1].shape[2:] 183 | laterals[i - 1] += F.interpolate( 184 | laterals[i], size=prev_shape, **self.upsample_cfg) 185 | 186 | # build outputs 187 | # part 1: from original levels 188 | outs = [ 189 | self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) 190 | ] 191 | # part 2: add extra levels 192 | if self.num_outs > len(outs): 193 | # use max pool to get more levels on top of outputs 194 | # (e.g., Faster R-CNN, Mask R-CNN) 195 | if not self.add_extra_convs: 196 | for i in range(self.num_outs - used_backbone_levels): 197 | outs.append(F.max_pool2d(outs[-1], 1, stride=2)) 198 | # add conv layers on top of original feature maps (RetinaNet) 199 | else: 200 | if self.add_extra_convs == 'on_input': 201 | extra_source = inputs[self.backbone_end_level - 1] 202 | elif self.add_extra_convs == 'on_lateral': 203 | extra_source = laterals[-1] 204 | elif self.add_extra_convs == 'on_output': 205 | extra_source = outs[-1] 206 | else: 207 | raise NotImplementedError 208 | outs.append(self.fpn_convs[used_backbone_levels](extra_source)) 209 | for i in range(used_backbone_levels + 1, self.num_outs): 210 | if self.relu_before_extra_convs: 211 | outs.append(self.fpn_convs[i](F.relu(outs[-1]))) 212 | else: 213 | outs.append(self.fpn_convs[i](outs[-1])) 214 | return tuple(outs) 215 | -------------------------------------------------------------------------------- /seg/mmseg/models/utils/involution_cuda.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | import torch 3 | from torch.nn.modules.utils import _pair 4 | import torch.nn.functional as F 5 | import torch.nn as nn 6 | from mmcv.cnn import ConvModule 7 | 8 | 9 | from collections import namedtuple 10 | import cupy 11 | from string import Template 12 | 13 | 14 | Stream = namedtuple('Stream', ['ptr']) 15 | 16 | 17 | def Dtype(t): 18 | if isinstance(t, torch.cuda.FloatTensor): 19 | return 'float' 20 | elif isinstance(t, torch.cuda.DoubleTensor): 21 | return 'double' 22 | 23 | 24 | @cupy._util.memoize(for_each_device=True) 25 | def load_kernel(kernel_name, code, **kwargs): 26 | code = Template(code).substitute(**kwargs) 27 | kernel_code = cupy.cuda.compile_with_cache(code) 28 | return kernel_code.get_function(kernel_name) 29 | 30 | 31 | CUDA_NUM_THREADS = 1024 32 | 33 | kernel_loop = ''' 34 | #define CUDA_KERNEL_LOOP(i, n) \ 35 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 36 | i < (n); \ 37 | i += blockDim.x * gridDim.x) 38 | ''' 39 | 40 | 41 | def GET_BLOCKS(N): 42 | return (N + CUDA_NUM_THREADS - 1) // CUDA_NUM_THREADS 43 | 44 | 45 | _involution_kernel = kernel_loop + ''' 46 | extern "C" 47 | __global__ void involution_forward_kernel( 48 | const ${Dtype}* bottom_data, const ${Dtype}* weight_data, ${Dtype}* top_data) { 49 | CUDA_KERNEL_LOOP(index, ${nthreads}) { 50 | const int n = index / ${channels} / ${top_height} / ${top_width}; 51 | const int c = (index / ${top_height} / ${top_width}) % ${channels}; 52 | const int h = (index / ${top_width}) % ${top_height}; 53 | const int w = index % ${top_width}; 54 | const int g = c / (${channels} / ${groups}); 55 | ${Dtype} value = 0; 56 | #pragma unroll 57 | for (int kh = 0; kh < ${kernel_h}; ++kh) { 58 | #pragma unroll 59 | for (int kw = 0; kw < ${kernel_w}; ++kw) { 60 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h}; 61 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w}; 62 | if ((h_in >= 0) && (h_in < ${bottom_height}) 63 | && (w_in >= 0) && (w_in < ${bottom_width})) { 64 | const int offset = ((n * ${channels} + c) * ${bottom_height} + h_in) 65 | * ${bottom_width} + w_in; 66 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h) 67 | * ${top_width} + w; 68 | value += weight_data[offset_weight] * bottom_data[offset]; 69 | } 70 | } 71 | } 72 | top_data[index] = value; 73 | } 74 | } 75 | ''' 76 | 77 | 78 | _involution_kernel_backward_grad_input = kernel_loop + ''' 79 | extern "C" 80 | __global__ void involution_backward_grad_input_kernel( 81 | const ${Dtype}* const top_diff, const ${Dtype}* const weight_data, ${Dtype}* const bottom_diff) { 82 | CUDA_KERNEL_LOOP(index, ${nthreads}) { 83 | const int n = index / ${channels} / ${bottom_height} / ${bottom_width}; 84 | const int c = (index / ${bottom_height} / ${bottom_width}) % ${channels}; 85 | const int h = (index / ${bottom_width}) % ${bottom_height}; 86 | const int w = index % ${bottom_width}; 87 | const int g = c / (${channels} / ${groups}); 88 | ${Dtype} value = 0; 89 | #pragma unroll 90 | for (int kh = 0; kh < ${kernel_h}; ++kh) { 91 | #pragma unroll 92 | for (int kw = 0; kw < ${kernel_w}; ++kw) { 93 | const int h_out_s = h + ${pad_h} - kh * ${dilation_h}; 94 | const int w_out_s = w + ${pad_w} - kw * ${dilation_w}; 95 | if (((h_out_s % ${stride_h}) == 0) && ((w_out_s % ${stride_w}) == 0)) { 96 | const int h_out = h_out_s / ${stride_h}; 97 | const int w_out = w_out_s / ${stride_w}; 98 | if ((h_out >= 0) && (h_out < ${top_height}) 99 | && (w_out >= 0) && (w_out < ${top_width})) { 100 | const int offset = ((n * ${channels} + c) * ${top_height} + h_out) 101 | * ${top_width} + w_out; 102 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h_out) 103 | * ${top_width} + w_out; 104 | value += weight_data[offset_weight] * top_diff[offset]; 105 | } 106 | } 107 | } 108 | } 109 | bottom_diff[index] = value; 110 | } 111 | } 112 | ''' 113 | 114 | 115 | _involution_kernel_backward_grad_weight = kernel_loop + ''' 116 | extern "C" 117 | __global__ void involution_backward_grad_weight_kernel( 118 | const ${Dtype}* const top_diff, const ${Dtype}* const bottom_data, ${Dtype}* const buffer_data) { 119 | CUDA_KERNEL_LOOP(index, ${nthreads}) { 120 | const int h = (index / ${top_width}) % ${top_height}; 121 | const int w = index % ${top_width}; 122 | const int kh = (index / ${kernel_w} / ${top_height} / ${top_width}) 123 | % ${kernel_h}; 124 | const int kw = (index / ${top_height} / ${top_width}) % ${kernel_w}; 125 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h}; 126 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w}; 127 | if ((h_in >= 0) && (h_in < ${bottom_height}) 128 | && (w_in >= 0) && (w_in < ${bottom_width})) { 129 | const int g = (index / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${groups}; 130 | const int n = (index / ${groups} / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${num}; 131 | ${Dtype} value = 0; 132 | #pragma unroll 133 | for (int c = g * (${channels} / ${groups}); c < (g + 1) * (${channels} / ${groups}); ++c) { 134 | const int top_offset = ((n * ${channels} + c) * ${top_height} + h) 135 | * ${top_width} + w; 136 | const int bottom_offset = ((n * ${channels} + c) * ${bottom_height} + h_in) 137 | * ${bottom_width} + w_in; 138 | value += top_diff[top_offset] * bottom_data[bottom_offset]; 139 | } 140 | buffer_data[index] = value; 141 | } else { 142 | buffer_data[index] = 0; 143 | } 144 | } 145 | } 146 | ''' 147 | 148 | 149 | class _involution(Function): 150 | @staticmethod 151 | def forward(ctx, input, weight, stride, padding, dilation): 152 | assert input.dim() == 4 and input.is_cuda 153 | assert weight.dim() == 6 and weight.is_cuda 154 | batch_size, channels, height, width = input.size() 155 | kernel_h, kernel_w = weight.size()[2:4] 156 | output_h = int((height + 2 * padding[0] - (dilation[0] * (kernel_h - 1) + 1)) / stride[0] + 1) 157 | output_w = int((width + 2 * padding[1] - (dilation[1] * (kernel_w - 1) + 1)) / stride[1] + 1) 158 | 159 | output = input.new(batch_size, channels, output_h, output_w) 160 | n = output.numel() 161 | 162 | with torch.cuda.device_of(input): 163 | f = load_kernel('involution_forward_kernel', _involution_kernel, Dtype=Dtype(input), nthreads=n, 164 | num=batch_size, channels=channels, groups=weight.size()[1], 165 | bottom_height=height, bottom_width=width, 166 | top_height=output_h, top_width=output_w, 167 | kernel_h=kernel_h, kernel_w=kernel_w, 168 | stride_h=stride[0], stride_w=stride[1], 169 | dilation_h=dilation[0], dilation_w=dilation[1], 170 | pad_h=padding[0], pad_w=padding[1]) 171 | f(block=(CUDA_NUM_THREADS,1,1), 172 | grid=(GET_BLOCKS(n),1,1), 173 | args=[input.data_ptr(), weight.data_ptr(), output.data_ptr()], 174 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) 175 | 176 | ctx.save_for_backward(input, weight) 177 | ctx.stride, ctx.padding, ctx.dilation = stride, padding, dilation 178 | return output 179 | 180 | @staticmethod 181 | def backward(ctx, grad_output): 182 | assert grad_output.is_cuda and grad_output.is_contiguous() 183 | input, weight = ctx.saved_tensors 184 | stride, padding, dilation = ctx.stride, ctx.padding, ctx.dilation 185 | 186 | batch_size, channels, height, width = input.size() 187 | kernel_h, kernel_w = weight.size()[2:4] 188 | output_h, output_w = grad_output.size()[2:] 189 | 190 | grad_input, grad_weight = None, None 191 | 192 | opt = dict(Dtype=Dtype(grad_output), 193 | num=batch_size, channels=channels, groups=weight.size()[1], 194 | bottom_height=height, bottom_width=width, 195 | top_height=output_h, top_width=output_w, 196 | kernel_h=kernel_h, kernel_w=kernel_w, 197 | stride_h=stride[0], stride_w=stride[1], 198 | dilation_h=dilation[0], dilation_w=dilation[1], 199 | pad_h=padding[0], pad_w=padding[1]) 200 | 201 | with torch.cuda.device_of(input): 202 | if ctx.needs_input_grad[0]: 203 | grad_input = input.new(input.size()) 204 | 205 | n = grad_input.numel() 206 | opt['nthreads'] = n 207 | 208 | f = load_kernel('involution_backward_grad_input_kernel', 209 | _involution_kernel_backward_grad_input, **opt) 210 | f(block=(CUDA_NUM_THREADS,1,1), 211 | grid=(GET_BLOCKS(n),1,1), 212 | args=[grad_output.data_ptr(), weight.data_ptr(), grad_input.data_ptr()], 213 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) 214 | 215 | if ctx.needs_input_grad[1]: 216 | grad_weight = weight.new(weight.size()) 217 | 218 | n = grad_weight.numel() 219 | opt['nthreads'] = n 220 | 221 | f = load_kernel('involution_backward_grad_weight_kernel', 222 | _involution_kernel_backward_grad_weight, **opt) 223 | f(block=(CUDA_NUM_THREADS,1,1), 224 | grid=(GET_BLOCKS(n),1,1), 225 | args=[grad_output.data_ptr(), input.data_ptr(), grad_weight.data_ptr()], 226 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) 227 | 228 | return grad_input, grad_weight, None, None, None 229 | 230 | 231 | def _involution_cuda(input, weight, bias=None, stride=1, padding=0, dilation=1): 232 | """ involution kernel 233 | """ 234 | assert input.size(0) == weight.size(0) 235 | assert input.size(-2)//stride == weight.size(-2) 236 | assert input.size(-1)//stride == weight.size(-1) 237 | if input.is_cuda: 238 | out = _involution.apply(input, weight, _pair(stride), _pair(padding), _pair(dilation)) 239 | if bias is not None: 240 | out += bias.view(1,-1,1,1) 241 | else: 242 | raise NotImplementedError 243 | return out 244 | 245 | 246 | class involution(nn.Module): 247 | 248 | def __init__(self, 249 | channels, 250 | kernel_size, 251 | stride): 252 | super(involution, self).__init__() 253 | self.kernel_size = kernel_size 254 | self.stride = stride 255 | self.channels = channels 256 | reduction_ratio = 4 257 | self.group_channels = 16 258 | self.groups = self.channels // self.group_channels 259 | self.conv1 = ConvModule( 260 | in_channels=channels, 261 | out_channels=channels // reduction_ratio, 262 | kernel_size=1, 263 | conv_cfg=None, 264 | norm_cfg=dict(type='BN'), 265 | act_cfg=dict(type='ReLU')) 266 | self.conv2 = ConvModule( 267 | in_channels=channels // reduction_ratio, 268 | out_channels=kernel_size**2 * self.groups, 269 | kernel_size=1, 270 | stride=1, 271 | conv_cfg=None, 272 | norm_cfg=None, 273 | act_cfg=None) 274 | if stride > 1: 275 | self.avgpool = nn.AvgPool2d(stride, stride) 276 | 277 | def forward(self, x): 278 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x))) 279 | b, c, h, w = weight.shape 280 | weight = weight.view(b, self.groups, self.kernel_size, self.kernel_size, h, w) 281 | out = _involution_cuda(x, weight, stride=self.stride, padding=(self.kernel_size-1)//2) 282 | return out 283 | -------------------------------------------------------------------------------- /seg/mmseg/models/utils/involution_naive.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import ConvModule 3 | 4 | 5 | class involution(nn.Module): 6 | 7 | def __init__(self, 8 | channels, 9 | kernel_size, 10 | stride): 11 | super(involution, self).__init__() 12 | self.kernel_size = kernel_size 13 | self.stride = stride 14 | self.channels = channels 15 | reduction_ratio = 4 16 | self.group_channels = 16 17 | self.groups = self.channels // self.group_channels 18 | self.conv1 = ConvModule( 19 | in_channels=channels, 20 | out_channels=channels // reduction_ratio, 21 | kernel_size=1, 22 | conv_cfg=None, 23 | norm_cfg=dict(type='BN'), 24 | act_cfg=dict(type='ReLU')) 25 | self.conv2 = ConvModule( 26 | in_channels=channels // reduction_ratio, 27 | out_channels=kernel_size**2 * self.groups, 28 | kernel_size=1, 29 | stride=1, 30 | conv_cfg=None, 31 | norm_cfg=None, 32 | act_cfg=None) 33 | if stride > 1: 34 | self.avgpool = nn.AvgPool2d(stride, stride) 35 | self.unfold = nn.Unfold(kernel_size, 1, (kernel_size-1)//2, stride) 36 | 37 | def forward(self, x): 38 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x))) 39 | b, c, h, w = weight.shape 40 | weight = weight.view(b, self.groups, self.kernel_size**2, h, w).unsqueeze(2) 41 | out = self.unfold(x).view(b, self.groups, self.group_channels, self.kernel_size**2, h, w) 42 | out = (weight * out).sum(dim=3).view(b, self.channels, h, w) 43 | return out 44 | --------------------------------------------------------------------------------