├── .gitignore
├── LICENSE
├── README.md
├── cls
├── configs
│ ├── _base_
│ │ ├── models
│ │ │ ├── rednet101.py
│ │ │ ├── rednet152.py
│ │ │ ├── rednet26.py
│ │ │ ├── rednet38.py
│ │ │ └── rednet50.py
│ │ └── schedules
│ │ │ └── imagenet_bs2048_coslr_130e.py
│ └── rednet
│ │ ├── rednet101_b32x64_warmup_coslr_imagenet.py
│ │ ├── rednet152_b32x64_warmup_coslr_imagenet.py
│ │ ├── rednet26_b32x64_warmup_coslr_imagenet.py
│ │ ├── rednet38_b32x64_warmup_coslr_imagenet.py
│ │ └── rednet50_b32x64_warmup_coslr_imagenet.py
└── mmcls
│ └── models
│ ├── backbones
│ ├── __init__.py
│ └── rednet.py
│ └── utils
│ ├── involution_cuda.py
│ └── involution_naive.py
├── det
├── configs
│ ├── _base_
│ │ ├── models
│ │ │ ├── faster_rcnn_red50_fpn.py
│ │ │ ├── faster_rcnn_red50_neck_fpn.py
│ │ │ ├── faster_rcnn_red50_neck_fpn_head.py
│ │ │ ├── mask_rcnn_red50_fpn.py
│ │ │ ├── mask_rcnn_red50_neck_fpn.py
│ │ │ ├── mask_rcnn_red50_neck_fpn_head.py
│ │ │ ├── retinanet_red50_fpn.py
│ │ │ └── retinanet_red50_neck_fpn.py
│ │ └── schedules
│ │ │ └── schedule_1x_warmup.py
│ └── involution
│ │ ├── faster_rcnn_red50_fpn_1x_coco.py
│ │ ├── faster_rcnn_red50_neck_fpn_1x_coco.py
│ │ ├── faster_rcnn_red50_neck_fpn_head_1x_coco.py
│ │ ├── mask_rcnn_red50_fpn_1x_coco.py
│ │ ├── mask_rcnn_red50_neck_fpn_1x_coco.py
│ │ ├── mask_rcnn_red50_neck_fpn_head_1x_coco.py
│ │ ├── retinanet_red50_fpn_1x_coco.py
│ │ └── retinanet_red50_neck_fpn_1x_coco.py
└── mmdet
│ ├── datasets
│ └── utils.py
│ └── models
│ ├── backbones
│ ├── __init__.py
│ ├── base_backbone.py
│ └── rednet.py
│ ├── dense_heads
│ ├── __init__.py
│ └── rpn_head_involution.py
│ ├── necks
│ ├── __init__.py
│ └── fpn_involution.py
│ ├── roi_heads
│ ├── __init__.py
│ └── mask_heads
│ │ ├── __init__.py
│ │ └── fcn_mask_head_involution.py
│ └── utils
│ ├── involution_cuda.py
│ └── involution_naive.py
├── fig
├── complexity.png
├── involution.png
└── parameter.png
└── seg
├── configs
├── _base_
│ └── models
│ │ ├── fpn_red50.py
│ │ ├── fpn_red50_neck.py
│ │ └── upernet_red50.py
└── involution
│ ├── fpn_red50_512x1024_80k_cityscapes.py
│ ├── fpn_red50_neck_512x1024_80k_cityscapes.py
│ └── upernet_red50_512x1024_80k_cityscapes.py
└── mmseg
└── models
├── backbones
├── __init__.py
├── base_backbone.py
└── rednet.py
├── necks
├── __init__.py
└── fpn_involution.py
└── utils
├── involution_cuda.py
└── involution_naive.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Duo Li
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # involution
2 |
3 | Official implementation of a neural operator as described in [Involution: Inverting the Inherence of Convolution for Visual Recognition](https://arxiv.org/abs/2103.06255) (CVPR'21)
4 |
5 | By [Duo Li](https://duoli.org/), [Jie Hu](https://github.com/hujie-frank), [Changhu Wang](https://scholar.google.com/citations?user=DsVZkjAAAAAJ), [Xiangtai Li](https://github.com/lxtGH), [Qi She](https://scholar.google.com/citations?user=iHoGTt4AAAAJ), [Lei Zhu](https://github.com/zh460045050), [Tong Zhang](http://tongzhang-ml.org/), and [Qifeng Chen](https://cqf.io/)
6 |
7 |

8 |
9 | **TL; DR.** `involution` is a general-purpose neural primitive that is versatile for a spectrum of deep learning models on different vision tasks. `involution` bridges `convolution` and `self-attention` in design, while being more efficient and effective than `convolution`, simpler than `self-attention` in form.
10 |
11 | 

12 |
13 | If you find our work useful in your research, please cite:
14 | ```
15 | @InProceedings{Li_2021_CVPR,
16 | author = {Li, Duo and Hu, Jie and Wang, Changhu and Li, Xiangtai and She, Qi and Zhu, Lei and Zhang, Tong and Chen, Qifeng},
17 | title = {Involution: Inverting the Inherence of Convolution for Visual Recognition},
18 | booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
19 | month = {June},
20 | year = {2021}
21 | }
22 | ```
23 |
24 | ## Getting Started
25 |
26 | This repository is fully built upon the [OpenMMLab](https://openmmlab.com/) toolkits. For each individual task, the config and model files follow the same directory organization as [mmcls](https://github.com/open-mmlab/mmclassification), [mmdet](https://github.com/open-mmlab/mmdetection), and [mmseg](https://github.com/open-mmlab/mmsegmentation) respectively, so just copy-and-paste them to the corresponding locations to get started.
27 |
28 | For example, in terms of evaluating detectors
29 | ```shell
30 | git clone https://github.com/open-mmlab/mmdetection # and install
31 |
32 | # copy model files
33 | cp det/mmdet/models/backbones/* mmdetection/mmdet/models/backbones
34 | cp det/mmdet/models/necks/* mmdetection/mmdet/models/necks
35 | cp det/mmdet/models/dense_heads/* mmdetection/mmdet/models/dense_heads
36 | cp det/mmdet/models/roi_heads/* mmdetection/mmdet/models/roi_heads
37 | cp det/mmdet/models/roi_heads/mask_heads/* mmdetection/mmdet/models/roi_heads/mask_heads
38 | cp det/mmdet/models/utils/* mmdetection/mmdet/models/utils
39 | cp det/mmdet/datasets/* mmdetection/mmdet/datasets
40 |
41 | # copy config files
42 | cp det/configs/_base_/models/* mmdetection/configs/_base_/models
43 | cp det/configs/_base_/schedules/* mmdetection/configs/_base_/schedules
44 | cp det/configs/involution mmdetection/configs -r
45 |
46 | # evaluate checkpoints
47 | cd mmdetection
48 | bash tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}]
49 | ```
50 |
51 | For more detailed guidance, please refer to the original [mmcls](https://github.com/open-mmlab/mmclassification), [mmdet](https://github.com/open-mmlab/mmdetection), and [mmseg](https://github.com/open-mmlab/mmsegmentation) tutorials.
52 |
53 | Currently, we provide an memory-efficient implementation of the involuton operator based on [CuPy](https://cupy.dev/). Please install this library in advance. A customized CUDA kernel would bring about further acceleration on the hardware. Any contribution from the community regarding this is welcomed!
54 |
55 | ## Model Zoo
56 |
57 | The parameters/FLOPs↓ and performance↑ compared to the convolution baselines are marked in the parentheses. Part of these checkpoints are obtained in our reimplementation runs, whose performance may show slight differences with those reported in our paper. Models are trained with 64 GPUs on ImageNet, 8 GPUs on COCO, and 4 GPUs on Cityscapes.
58 |
59 | ### Image Classification on ImageNet
60 |
61 | | Model | Params(M) | FLOPs(G) | Top-1 (%) | Top-5 (%) | Config | Download |
62 | |:---------------------:|:---------:|:--------:|:---------:|:---------:|:---------:|:--------:|
63 | | RedNet-26 | 9.23(32.8%↓) | 1.73(29.2%↓) | 75.96 | 93.19 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet26_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EWmTnvB1cqtIi-OI4HfxGBgBKzO0w_qc3CnErHhNfBitlg?e=XPws5X) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EVJ_eDMSsr1JqhInx67OCxcB-P54pj3o5mGO_rYVsRSk3A?e=70tJAc) |
64 | | RedNet-38 | 12.39(36.7%↓) | 2.22(31.3%↓) | 77.48 | 93.57 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet38_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ETZIquU7P3lDvru0OAPiTYIBAt-B__2LpP_NeB4sR0hJsg?e=b9Rbl0) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/Ed62YcJgC-NCp72NpEsMLGABkb7f-EkCQ1X-RyLmAMYoUQ?e=Hqetbj) |
65 | | RedNet-50 | 15.54(39.5%↓) | 2.71(34.1%↓) | 78.35 | 94.13 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet50_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EZjRG3qUMu5IuR7YH4Giyc8B6koPvu6s8rOlIG8-BuFevg?e=f4ce5G) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ETL5NxDwnQpCldbJb906aOABjjuhZSquxKzK5xYQm-6Bhw?e=lOzEEf) |
66 | | RedNet-101 | 25.65(42.6%↓) | 4.74(40.5%↓) | 78.92 | 94.35 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet101_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EXAuVXdXz1xAg5eG-dkvwTUBkds2IOK1kglHtkMeGz5z_A?e=vHvh5y) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EbbiBxdZoZJFmTPSg9hW3BIBLRmRpfPa70nu8pi_8ddOSw?e=CdAV86) |
67 | | RedNet-152 | 33.99(43.5%↓) | 6.79(41.4%↓) | 79.12 | 94.38 | [config](https://github.com/d-li14/involution/blob/main/cls/configs/rednet/rednet152_b32x64_warmup_coslr_imagenet.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ERxcS4wXUCtPl4uUnPoT9vcByzhLA0eHgDE-fw_EESfP0w?e=x0dZWB) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EYr2Yx-p4w1AuT-Q3E7M2m0BFhAGDoYvxps09vYy4Cnj3A?e=XGxzPF) |
68 |
69 | Before finetuning on the following downstream tasks, download the ImageNet pre-trained [RedNet-50 weights](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EaVInpb6TGJApN6QCAWwKJAB3cK9Iz55QfJgmhhaV7yuHw?e=yuWxyI) and set the `pretrained` argument in `det/configs/_base_/models/*.py` or `seg/configs/_base_/models/*.py` to your local path.
70 |
71 | ### Object Detection and Instance Segmentation on COCO
72 |
73 | #### Faster R-CNN
74 | | Backbone | Neck | Head | Style | Lr schd | Params(M) | FLOPs(G) | box AP | Config | Download |
75 | | :-------------: | :---------: | :---------: | :-----: | :-----: |:---------:|:--------:| :----: | :------: | :--------: |
76 | | RedNet-50-FPN | convolution | convolution | pytorch | 1x | 31.6(23.9%↓) | 177.9(14.1%↓) | 39.5(1.8↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/faster_rcnn_red50_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ESOJAF74jK5HrevtBdMDku0Bgf71nC7F4UcMmGWER5z1_w?e=qGPdA5) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ESYSpzei_INMn1wu5qa0Su8B9YxXf_rOtib5xHjb1y2alA?e=Qn3lyd) |
77 | | RedNet-50-FPN | involution | convolution | pytorch | 1x | 29.5(28.9%↓) | 135.0(34.8%↓) | 40.2(2.5↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/faster_rcnn_red50_neck_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EV90stAJIXxEnDRe0QM0lvwB_jm9jwqwHoBOVVOqosPHJw?e=0QoikN) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/Ec8z-SZbJTxJrAJ3FLq0PSsB1Q7T1dXLvhfHmegQqH7rqA?e=5O9jDY) |
78 | | RedNet-50-FPN | involution | involution | pytorch | 1x | 29.0(30.1%↓) | 91.5(55.8%↓) | 39.2(1.5↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/faster_rcnn_red50_neck_fpn_head_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EeTwxsehR5VLhvf5TbTr8WwBmiNUwUeuXtbdOJlg0mFkmw?e=DL3gWX) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EUBsDdHQ10BKp8wW2aj2GHYBzhHtmW2BP65PIhn3KcSYqA?e=6dmNn7) |
79 |
80 | #### Mask R-CNN
81 | | Backbone | Neck | Head | Style | Lr schd | Params(M) | FLOPs(G) | box AP | mask AP | Config | Download |
82 | | :-------------: | :---------: | :---------: | :-----: | :-----: |:---------:|:--------:| :----: | :-----: | :------: | :--------: |
83 | | RedNet-50-FPN | convolution | convolution | pytorch | 1x | 34.2(22.6%↓) | 224.2(11.5%↓) | 39.9(1.5↑) | 35.7(0.6↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/mask_rcnn_red50_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EdheYm71X2pFu427_557zqcBmuKaLKEoU5R0Z2Kwo2alvg?e=qXShyW) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EQK-5qH_XxhHn4QnxmQbJ4cBL3sz9HqjS0EoybT2s1751g?e=4gpwK2) |
84 | | RedNet-50-FPN | involution | convolution | pytorch | 1x | 32.2(27.1%↓) | 181.3(28.5%↓) | 40.8(2.4↑) | 36.4(1.3↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/mask_rcnn_red50_neck_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EYYgUzXjJ3VBrscng-5QW_oB9wFK-dcqSDYB-LUXldFweg?e=idFEgd) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ETWdfYuhjY5AlGkUH11rLl4BLk9zsyKgwAbay47TYzIU-w?e=6ey6cD) |
85 | | RedNet-50-FPN | involution | involution | pytorch | 1x | 29.5(33.3%↓) | 104.6(58.7%↓) | 39.6(1.2↑) | 35.1(0.0↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/mask_rcnn_red50_neck_fpn_head_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EZwtdWXX8sBLp7L__TrmkykBPEe7kJInbkbUblP3PxuURQ?e=09l25P) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/Ebevxbj_0OtNkb3uCdpM0aoBeMQUABiQ0bDfZ9P9Jw1AZA?e=ZUcbUo) |
86 |
87 | #### RetinaNet
88 | | Backbone | Neck | Style | Lr schd | Params(M) | FLOPs(G) | box AP | Config | Download |
89 | | :-------------: | :---------: | :-----: | :-----: |:---------:|:--------:| :----: | :------: | :--------: |
90 | | RedNet-50-FPN | convolution | pytorch | 1x | 27.8(26.3%↓) | 210.1(12.2%↓) | 38.2(1.6↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/retinanet_red50_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EfUY9orEyCVCsYMlcDhIZ2wBBDw7k1HqfTm9u11KfTopmA?e=4Jhu79) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EQQ_EVDmVg1FlfgpAu9NF5wB6xe6qnqaYWKJw9lL7kRxdw?e=fXxjPg) |
91 | | RedNet-50-FPN | involution | pytorch | 1x | 26.3(30.2%↓) | 199.9(16.5%↓) | 38.2(1.6↑) | [config](https://github.com/d-li14/involution/blob/main/det/configs/involution/retinanet_red50_neck_fpn_1x_coco.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EedZ3bMWZkJIvKjyLkTZHksBc_8wdOMHhFZA7RDewjPO8g?e=jsSjYI) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/ES7chxQh5-lGr5--GqroMScBKNTNACyvosdVuThPvkZGkg?e=CrlN9F) |
92 |
93 |
94 | ### Semantic Segmentation on Cityscapes
95 |
96 | | Method | Backbone | Neck | Crop Size | Lr schd | Params(M) | FLOPs(G) | mIoU | Config | download |
97 | |--------|----------|------|-----------|--------:|:---------:|:--------:|------:|:------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
98 | | FPN | RedNet-50 | convolution | 512x1024 | 80000 | 18.5(35.1%↓) | 293.9(19.0%↓) | 78.0(3.6↑) | [config](https://github.com/d-li14/involution/blob/main/seg/configs/involution/fpn_red50_512x1024_80k_cityscapes.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EYstjiI28SJPohJE54wapFUBW5Wc95Di2Rsh0vf6K79vPw?e=lOvbkZ) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EXdupIgFuAlFuH854wThyXcBQTyL7YhK3wPYcR98rw7PJg?e=MyXx2w) |
99 | | FPN | RedNet-50 | involution | 512x1024 | 80000 | 16.4(42.5%↓) | 205.2(43.4%↓) | 79.1(4.7↑) | [config](https://github.com/d-li14/involution/blob/main/seg/configs/involution/fpn_red50_neck_512x1024_80k_cityscapes.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EZzDyESh0ElFp2pIFL1xN70BAj1EyvhFyqi0g7Mp1OZxog?e=F7kZYH) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EXcP_3ujO_1Juj8ap7rqDJ8BWZDCyJL86BWjeZiJ_FfLOw?e=47lvtq) |
100 | | UPerNet| RedNet-50 | convolution | 512x1024 | 80000 | 56.4(15.1%↓) | 1825.6(3.6%↓) | 80.6(2.4↑) | [config](https://github.com/d-li14/involution/blob/main/seg/configs/involution/upernet_red50_512x1024_80k_cityscapes.py) | [model](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/Eb8-frsvSuNAm7qQ6-H2DtEBdACuf-mUOBhvE3YIOiobmA?e=Ibb2cN) | [log](https://hkustconnect-my.sharepoint.com/:u:/g/personal/dlibh_connect_ust_hk/EWhyFAZpxfRBoFi1myoT-RMB6-HeaP7NjSv88YQve4bZkg?e=wC8ccl) |
101 |
--------------------------------------------------------------------------------
/cls/configs/_base_/models/rednet101.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='ImageClassifier',
4 | backbone=dict(
5 | type='RedNet',
6 | depth=101,
7 | num_stages=4,
8 | out_indices=(3, ),
9 | style='pytorch'),
10 | neck=dict(type='GlobalAveragePooling'),
11 | head=dict(
12 | type='LinearClsHead',
13 | num_classes=1000,
14 | in_channels=2048,
15 | loss=dict(
16 | type='LabelSmoothLoss',
17 | loss_weight=1.0,
18 | label_smooth_val=0.1,
19 | num_classes=1000),
20 | topk=(1, 5),
21 | ))
22 |
--------------------------------------------------------------------------------
/cls/configs/_base_/models/rednet152.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='ImageClassifier',
4 | backbone=dict(
5 | type='RedNet',
6 | depth=152,
7 | num_stages=4,
8 | out_indices=(3, ),
9 | style='pytorch'),
10 | neck=dict(type='GlobalAveragePooling'),
11 | head=dict(
12 | type='LinearClsHead',
13 | num_classes=1000,
14 | in_channels=2048,
15 | loss=dict(
16 | type='LabelSmoothLoss',
17 | loss_weight=1.0,
18 | label_smooth_val=0.1,
19 | num_classes=1000),
20 | topk=(1, 5),
21 | ))
22 |
--------------------------------------------------------------------------------
/cls/configs/_base_/models/rednet26.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='ImageClassifier',
4 | backbone=dict(
5 | type='RedNet',
6 | depth=26,
7 | num_stages=4,
8 | out_indices=(3, ),
9 | style='pytorch'),
10 | neck=dict(type='GlobalAveragePooling'),
11 | head=dict(
12 | type='LinearClsHead',
13 | num_classes=1000,
14 | in_channels=2048,
15 | loss=dict(
16 | type='LabelSmoothLoss',
17 | loss_weight=1.0,
18 | label_smooth_val=0.1,
19 | num_classes=1000),
20 | topk=(1, 5),
21 | ))
22 |
--------------------------------------------------------------------------------
/cls/configs/_base_/models/rednet38.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='ImageClassifier',
4 | backbone=dict(
5 | type='RedNet',
6 | depth=38,
7 | num_stages=4,
8 | out_indices=(3, ),
9 | style='pytorch'),
10 | neck=dict(type='GlobalAveragePooling'),
11 | head=dict(
12 | type='LinearClsHead',
13 | num_classes=1000,
14 | in_channels=2048,
15 | loss=dict(
16 | type='LabelSmoothLoss',
17 | loss_weight=1.0,
18 | label_smooth_val=0.1,
19 | num_classes=1000),
20 | topk=(1, 5),
21 | ))
22 |
--------------------------------------------------------------------------------
/cls/configs/_base_/models/rednet50.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='ImageClassifier',
4 | backbone=dict(
5 | type='RedNet',
6 | depth=50,
7 | num_stages=4,
8 | out_indices=(3, ),
9 | style='pytorch'),
10 | neck=dict(type='GlobalAveragePooling'),
11 | head=dict(
12 | type='LinearClsHead',
13 | num_classes=1000,
14 | in_channels=2048,
15 | loss=dict(
16 | type='LabelSmoothLoss',
17 | loss_weight=1.0,
18 | label_smooth_val=0.1,
19 | num_classes=1000),
20 | topk=(1, 5),
21 | ))
22 |
--------------------------------------------------------------------------------
/cls/configs/_base_/schedules/imagenet_bs2048_coslr_130e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(
3 | type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True)
4 | optimizer_config = dict(grad_clip=None)
5 | # learning policy
6 | lr_config = dict(
7 | policy='CosineAnnealing',
8 | min_lr=0,
9 | warmup='linear',
10 | warmup_iters=3130,
11 | warmup_ratio=0.25)
12 | runner = dict(type='EpochBasedRunner', max_epochs=130)
13 |
--------------------------------------------------------------------------------
/cls/configs/rednet/rednet101_b32x64_warmup_coslr_imagenet.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/rednet101.py', '../_base_/datasets/imagenet_bs32.py',
3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py',
4 | '../_base_/default_runtime.py'
5 | ]
6 |
--------------------------------------------------------------------------------
/cls/configs/rednet/rednet152_b32x64_warmup_coslr_imagenet.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/rednet152.py', '../_base_/datasets/imagenet_bs32.py',
3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py',
4 | '../_base_/default_runtime.py'
5 | ]
6 |
--------------------------------------------------------------------------------
/cls/configs/rednet/rednet26_b32x64_warmup_coslr_imagenet.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/rednet26.py', '../_base_/datasets/imagenet_bs32.py',
3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py',
4 | '../_base_/default_runtime.py'
5 | ]
6 |
--------------------------------------------------------------------------------
/cls/configs/rednet/rednet38_b32x64_warmup_coslr_imagenet.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/rednet38.py', '../_base_/datasets/imagenet_bs32.py',
3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py',
4 | '../_base_/default_runtime.py'
5 | ]
6 |
--------------------------------------------------------------------------------
/cls/configs/rednet/rednet50_b32x64_warmup_coslr_imagenet.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/rednet50.py', '../_base_/datasets/imagenet_bs32.py',
3 | '../_base_/schedules/imagenet_bs2048_coslr_130e.py',
4 | '../_base_/default_runtime.py'
5 | ]
6 |
--------------------------------------------------------------------------------
/cls/mmcls/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .alexnet import AlexNet
2 | from .lenet import LeNet5
3 | from .mobilenet_v2 import MobileNetV2
4 | from .mobilenet_v3 import MobileNetv3
5 | from .regnet import RegNet
6 | from .resnest import ResNeSt
7 | from .resnet import ResNet, ResNetV1d
8 | from .resnet_cifar import ResNet_CIFAR
9 | from .resnext import ResNeXt
10 | from .seresnet import SEResNet
11 | from .seresnext import SEResNeXt
12 | from .shufflenet_v1 import ShuffleNetV1
13 | from .shufflenet_v2 import ShuffleNetV2
14 | from .vgg import VGG
15 | from .rednet import RedNet
16 |
17 | __all__ = [
18 | 'LeNet5', 'AlexNet', 'VGG', 'RegNet', 'ResNet', 'ResNeXt', 'ResNetV1d',
19 | 'ResNeSt', 'ResNet_CIFAR', 'SEResNet', 'SEResNeXt', 'ShuffleNetV1',
20 | 'ShuffleNetV2', 'MobileNetV2', 'MobileNetv3',
21 | 'RedNet'
22 | ]
23 |
--------------------------------------------------------------------------------
/cls/mmcls/models/backbones/rednet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.utils.checkpoint as cp
3 | from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer,
4 | constant_init, kaiming_init)
5 | from mmcv.utils.parrots_wrapper import _BatchNorm
6 |
7 | from ..builder import BACKBONES
8 | from .base_backbone import BaseBackbone
9 | from ..utils.involution_cuda import involution
10 |
11 |
12 | class Bottleneck(nn.Module):
13 | """Bottleneck block for ResNet.
14 |
15 | Args:
16 | in_channels (int): Input channels of this block.
17 | out_channels (int): Output channels of this block.
18 | expansion (int): The ratio of ``out_channels/mid_channels`` where
19 | ``mid_channels`` is the input/output channels of conv2. Default: 4.
20 | stride (int): stride of the block. Default: 1
21 | dilation (int): dilation of convolution. Default: 1
22 | downsample (nn.Module): downsample operation on identity branch.
23 | Default: None.
24 | style (str): ``"pytorch"`` or ``"caffe"``. If set to "pytorch", the
25 | stride-two layer is the 3x3 conv layer, otherwise the stride-two
26 | layer is the first 1x1 conv layer. Default: "pytorch".
27 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some
28 | memory while slowing down the training speed.
29 | conv_cfg (dict): dictionary to construct and config conv layer.
30 | Default: None
31 | norm_cfg (dict): dictionary to construct and config norm layer.
32 | Default: dict(type='BN')
33 | """
34 |
35 | def __init__(self,
36 | in_channels,
37 | out_channels,
38 | expansion=4,
39 | stride=1,
40 | dilation=1,
41 | downsample=None,
42 | style='pytorch',
43 | with_cp=False,
44 | conv_cfg=None,
45 | norm_cfg=dict(type='BN')):
46 | super(Bottleneck, self).__init__()
47 | assert style in ['pytorch', 'caffe']
48 |
49 | self.in_channels = in_channels
50 | self.out_channels = out_channels
51 | self.expansion = expansion
52 | assert out_channels % expansion == 0
53 | self.mid_channels = out_channels // expansion
54 | self.stride = stride
55 | self.dilation = dilation
56 | self.style = style
57 | self.with_cp = with_cp
58 | self.conv_cfg = conv_cfg
59 | self.norm_cfg = norm_cfg
60 |
61 | if self.style == 'pytorch':
62 | self.conv1_stride = 1
63 | self.conv2_stride = stride
64 | else:
65 | self.conv1_stride = stride
66 | self.conv2_stride = 1
67 |
68 | self.norm1_name, norm1 = build_norm_layer(
69 | norm_cfg, self.mid_channels, postfix=1)
70 | self.norm2_name, norm2 = build_norm_layer(
71 | norm_cfg, self.mid_channels, postfix=2)
72 | self.norm3_name, norm3 = build_norm_layer(
73 | norm_cfg, out_channels, postfix=3)
74 |
75 | self.conv1 = build_conv_layer(
76 | conv_cfg,
77 | in_channels,
78 | self.mid_channels,
79 | kernel_size=1,
80 | stride=self.conv1_stride,
81 | bias=False)
82 | self.add_module(self.norm1_name, norm1)
83 | self.conv2 = involution(self.mid_channels, 7, self.conv2_stride)
84 |
85 | self.add_module(self.norm2_name, norm2)
86 | self.conv3 = build_conv_layer(
87 | conv_cfg,
88 | self.mid_channels,
89 | out_channels,
90 | kernel_size=1,
91 | bias=False)
92 | self.add_module(self.norm3_name, norm3)
93 |
94 | self.relu = nn.ReLU(inplace=True)
95 | self.downsample = downsample
96 |
97 | @property
98 | def norm1(self):
99 | return getattr(self, self.norm1_name)
100 |
101 | @property
102 | def norm2(self):
103 | return getattr(self, self.norm2_name)
104 |
105 | @property
106 | def norm3(self):
107 | return getattr(self, self.norm3_name)
108 |
109 | def forward(self, x):
110 |
111 | def _inner_forward(x):
112 | identity = x
113 |
114 | out = self.conv1(x)
115 | out = self.norm1(out)
116 | out = self.relu(out)
117 |
118 | out = self.conv2(out)
119 | out = self.norm2(out)
120 | out = self.relu(out)
121 |
122 | out = self.conv3(out)
123 | out = self.norm3(out)
124 |
125 | if self.downsample is not None:
126 | identity = self.downsample(x)
127 |
128 | out += identity
129 |
130 | return out
131 |
132 | if self.with_cp and x.requires_grad:
133 | out = cp.checkpoint(_inner_forward, x)
134 | else:
135 | out = _inner_forward(x)
136 |
137 | out = self.relu(out)
138 |
139 | return out
140 |
141 |
142 | def get_expansion(block, expansion=None):
143 | """Get the expansion of a residual block.
144 |
145 | The block expansion will be obtained by the following order:
146 |
147 | 1. If ``expansion`` is given, just return it.
148 | 2. If ``block`` has the attribute ``expansion``, then return
149 | ``block.expansion``.
150 | 3. Return the default value according the the block type:
151 | 1 for ``BasicBlock`` and 4 for ``Bottleneck``.
152 |
153 | Args:
154 | block (class): The block class.
155 | expansion (int | None): The given expansion ratio.
156 |
157 | Returns:
158 | int: The expansion of the block.
159 | """
160 | if isinstance(expansion, int):
161 | assert expansion > 0
162 | elif expansion is None:
163 | if hasattr(block, 'expansion'):
164 | expansion = block.expansion
165 | elif issubclass(block, Bottleneck):
166 | expansion = 4
167 | else:
168 | raise TypeError(f'expansion is not specified for {block.__name__}')
169 | else:
170 | raise TypeError('expansion must be an integer or None')
171 |
172 | return expansion
173 |
174 |
175 | class ResLayer(nn.Sequential):
176 | """ResLayer to build ResNet style backbone.
177 |
178 | Args:
179 | block (nn.Module): Residual block used to build ResLayer.
180 | num_blocks (int): Number of blocks.
181 | in_channels (int): Input channels of this block.
182 | out_channels (int): Output channels of this block.
183 | expansion (int, optional): The expansion for BasicBlock/Bottleneck.
184 | If not specified, it will firstly be obtained via
185 | ``block.expansion``. If the block has no attribute "expansion",
186 | the following default values will be used: 1 for BasicBlock and
187 | 4 for Bottleneck. Default: None.
188 | stride (int): stride of the first block. Default: 1.
189 | avg_down (bool): Use AvgPool instead of stride conv when
190 | downsampling in the bottleneck. Default: False
191 | conv_cfg (dict): dictionary to construct and config conv layer.
192 | Default: None
193 | norm_cfg (dict): dictionary to construct and config norm layer.
194 | Default: dict(type='BN')
195 | """
196 |
197 | def __init__(self,
198 | block,
199 | num_blocks,
200 | in_channels,
201 | out_channels,
202 | expansion=None,
203 | stride=1,
204 | avg_down=False,
205 | conv_cfg=None,
206 | norm_cfg=dict(type='BN'),
207 | **kwargs):
208 | self.block = block
209 | self.expansion = get_expansion(block, expansion)
210 |
211 | downsample = None
212 | if stride != 1 or in_channels != out_channels:
213 | downsample = []
214 | conv_stride = stride
215 | if avg_down and stride != 1:
216 | conv_stride = 1
217 | downsample.append(
218 | nn.AvgPool2d(
219 | kernel_size=stride,
220 | stride=stride,
221 | ceil_mode=True,
222 | count_include_pad=False))
223 | downsample.extend([
224 | build_conv_layer(
225 | conv_cfg,
226 | in_channels,
227 | out_channels,
228 | kernel_size=1,
229 | stride=conv_stride,
230 | bias=False),
231 | build_norm_layer(norm_cfg, out_channels)[1]
232 | ])
233 | downsample = nn.Sequential(*downsample)
234 |
235 | layers = []
236 | layers.append(
237 | block(
238 | in_channels=in_channels,
239 | out_channels=out_channels,
240 | expansion=self.expansion,
241 | stride=stride,
242 | downsample=downsample,
243 | conv_cfg=conv_cfg,
244 | norm_cfg=norm_cfg,
245 | **kwargs))
246 | in_channels = out_channels
247 | for i in range(1, num_blocks):
248 | layers.append(
249 | block(
250 | in_channels=in_channels,
251 | out_channels=out_channels,
252 | expansion=self.expansion,
253 | stride=1,
254 | conv_cfg=conv_cfg,
255 | norm_cfg=norm_cfg,
256 | **kwargs))
257 | super(ResLayer, self).__init__(*layers)
258 |
259 |
260 | @BACKBONES.register_module()
261 | class RedNet(BaseBackbone):
262 | """ResNet backbone.
263 |
264 | Please refer to the `paper `_ for
265 | details.
266 |
267 | Args:
268 | depth (int): Network depth, from {18, 34, 50, 101, 152}.
269 | in_channels (int): Number of input image channels. Default: 3.
270 | stem_channels (int): Output channels of the stem layer. Default: 64.
271 | base_channels (int): Middle channels of the first stage. Default: 64.
272 | num_stages (int): Stages of the network. Default: 4.
273 | strides (Sequence[int]): Strides of the first block of each stage.
274 | Default: ``(1, 2, 2, 2)``.
275 | dilations (Sequence[int]): Dilation of each stage.
276 | Default: ``(1, 1, 1, 1)``.
277 | out_indices (Sequence[int]): Output from which stages. If only one
278 | stage is specified, a single tensor (feature map) is returned,
279 | otherwise multiple stages are specified, a tuple of tensors will
280 | be returned. Default: ``(3, )``.
281 | style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
282 | layer is the 3x3 conv layer, otherwise the stride-two layer is
283 | the first 1x1 conv layer.
284 | deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
285 | Default: False.
286 | avg_down (bool): Use AvgPool instead of stride conv when
287 | downsampling in the bottleneck. Default: False.
288 | frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
289 | -1 means not freezing any parameters. Default: -1.
290 | conv_cfg (dict | None): The config dict for conv layers. Default: None.
291 | norm_cfg (dict): The config dict for norm layers.
292 | norm_eval (bool): Whether to set norm layers to eval mode, namely,
293 | freeze running stats (mean and var). Note: Effect on Batch Norm
294 | and its variants only. Default: False.
295 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some
296 | memory while slowing down the training speed. Default: False.
297 | zero_init_residual (bool): Whether to use zero init for last norm layer
298 | in resblocks to let them behave as identity. Default: True.
299 |
300 | Example:
301 | >>> from mmcls.models import ResNet
302 | >>> import torch
303 | >>> self = ResNet(depth=18)
304 | >>> self.eval()
305 | >>> inputs = torch.rand(1, 3, 32, 32)
306 | >>> level_outputs = self.forward(inputs)
307 | >>> for level_out in level_outputs:
308 | ... print(tuple(level_out.shape))
309 | (1, 64, 8, 8)
310 | (1, 128, 4, 4)
311 | (1, 256, 2, 2)
312 | (1, 512, 1, 1)
313 | """
314 |
315 | arch_settings = {
316 | 26: (Bottleneck, (1, 2, 4, 1)),
317 | 38: (Bottleneck, (2, 3, 5, 2)),
318 | 50: (Bottleneck, (3, 4, 6, 3)),
319 | 101: (Bottleneck, (3, 4, 23, 3)),
320 | 152: (Bottleneck, (3, 8, 36, 3))
321 | }
322 |
323 | def __init__(self,
324 | depth,
325 | in_channels=3,
326 | stem_channels=64,
327 | base_channels=64,
328 | expansion=None,
329 | num_stages=4,
330 | strides=(1, 2, 2, 2),
331 | dilations=(1, 1, 1, 1),
332 | out_indices=(3, ),
333 | style='pytorch',
334 | avg_down=False,
335 | frozen_stages=-1,
336 | conv_cfg=None,
337 | norm_cfg=dict(type='BN', requires_grad=True),
338 | norm_eval=False,
339 | with_cp=False,
340 | zero_init_residual=True):
341 | super(RedNet, self).__init__()
342 | if depth not in self.arch_settings:
343 | raise KeyError(f'invalid depth {depth} for resnet')
344 | self.depth = depth
345 | self.stem_channels = stem_channels
346 | self.base_channels = base_channels
347 | self.num_stages = num_stages
348 | assert num_stages >= 1 and num_stages <= 4
349 | self.strides = strides
350 | self.dilations = dilations
351 | assert len(strides) == len(dilations) == num_stages
352 | self.out_indices = out_indices
353 | assert max(out_indices) < num_stages
354 | self.style = style
355 | self.avg_down = avg_down
356 | self.frozen_stages = frozen_stages
357 | self.conv_cfg = conv_cfg
358 | self.norm_cfg = norm_cfg
359 | self.with_cp = with_cp
360 | self.norm_eval = norm_eval
361 | self.zero_init_residual = zero_init_residual
362 | self.block, stage_blocks = self.arch_settings[depth]
363 | self.stage_blocks = stage_blocks[:num_stages]
364 | self.expansion = get_expansion(self.block, expansion)
365 |
366 | self._make_stem_layer(in_channels, stem_channels)
367 |
368 | self.res_layers = []
369 | _in_channels = stem_channels
370 | _out_channels = base_channels * self.expansion
371 | for i, num_blocks in enumerate(self.stage_blocks):
372 | stride = strides[i]
373 | dilation = dilations[i]
374 | res_layer = self.make_res_layer(
375 | block=self.block,
376 | num_blocks=num_blocks,
377 | in_channels=_in_channels,
378 | out_channels=_out_channels,
379 | expansion=self.expansion,
380 | stride=stride,
381 | dilation=dilation,
382 | style=self.style,
383 | avg_down=self.avg_down,
384 | with_cp=with_cp,
385 | conv_cfg=conv_cfg,
386 | norm_cfg=norm_cfg)
387 | _in_channels = _out_channels
388 | _out_channels *= 2
389 | layer_name = f'layer{i + 1}'
390 | self.add_module(layer_name, res_layer)
391 | self.res_layers.append(layer_name)
392 |
393 | self._freeze_stages()
394 |
395 | self.feat_dim = res_layer[-1].out_channels
396 |
397 | def make_res_layer(self, **kwargs):
398 | return ResLayer(**kwargs)
399 |
400 | @property
401 | def norm1(self):
402 | return getattr(self, self.norm1_name)
403 |
404 | def _make_stem_layer(self, in_channels, stem_channels):
405 | self.stem = nn.Sequential(
406 | ConvModule(
407 | in_channels,
408 | stem_channels // 2,
409 | kernel_size=3,
410 | stride=2,
411 | padding=1,
412 | conv_cfg=self.conv_cfg,
413 | norm_cfg=self.norm_cfg,
414 | inplace=True),
415 | involution(stem_channels // 2, 3, 1),
416 | nn.BatchNorm2d(stem_channels // 2),
417 | nn.ReLU(inplace=True),
418 | ConvModule(
419 | stem_channels // 2,
420 | stem_channels,
421 | kernel_size=3,
422 | stride=1,
423 | padding=1,
424 | conv_cfg=self.conv_cfg,
425 | norm_cfg=self.norm_cfg,
426 | inplace=True))
427 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
428 |
429 | def _freeze_stages(self):
430 | if self.frozen_stages >= 0:
431 | self.stem.eval()
432 | for param in self.stem.parameters():
433 | param.requires_grad = False
434 |
435 | for i in range(1, self.frozen_stages + 1):
436 | m = getattr(self, f'layer{i}')
437 | m.eval()
438 | for param in m.parameters():
439 | param.requires_grad = False
440 |
441 | def init_weights(self, pretrained=None):
442 | super(RedNet, self).init_weights(pretrained)
443 | if pretrained is None:
444 | for m in self.modules():
445 | if isinstance(m, nn.Conv2d):
446 | kaiming_init(m)
447 | elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
448 | constant_init(m, 1)
449 |
450 | if self.zero_init_residual:
451 | for m in self.modules():
452 | if isinstance(m, Bottleneck):
453 | constant_init(m.norm3, 0)
454 |
455 | def forward(self, x):
456 | x = self.stem(x)
457 | x = self.maxpool(x)
458 | outs = []
459 | for i, layer_name in enumerate(self.res_layers):
460 | res_layer = getattr(self, layer_name)
461 | x = res_layer(x)
462 | if i in self.out_indices:
463 | outs.append(x)
464 | if len(outs) == 1:
465 | return outs[0]
466 | else:
467 | return tuple(outs)
468 |
469 | def train(self, mode=True):
470 | super(RedNet, self).train(mode)
471 | self._freeze_stages()
472 | if mode and self.norm_eval:
473 | for m in self.modules():
474 | # trick: eval have effect on BatchNorm only
475 | if isinstance(m, _BatchNorm):
476 | m.eval()
477 |
478 |
--------------------------------------------------------------------------------
/cls/mmcls/models/utils/involution_cuda.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Function
2 | import torch
3 | from torch.nn.modules.utils import _pair
4 | import torch.nn.functional as F
5 | import torch.nn as nn
6 | from mmcv.cnn import ConvModule
7 |
8 |
9 | from collections import namedtuple
10 | import cupy
11 | from string import Template
12 |
13 |
14 | Stream = namedtuple('Stream', ['ptr'])
15 |
16 |
17 | def Dtype(t):
18 | if isinstance(t, torch.cuda.FloatTensor):
19 | return 'float'
20 | elif isinstance(t, torch.cuda.DoubleTensor):
21 | return 'double'
22 |
23 |
24 | @cupy._util.memoize(for_each_device=True)
25 | def load_kernel(kernel_name, code, **kwargs):
26 | code = Template(code).substitute(**kwargs)
27 | kernel_code = cupy.cuda.compile_with_cache(code)
28 | return kernel_code.get_function(kernel_name)
29 |
30 |
31 | CUDA_NUM_THREADS = 1024
32 |
33 | kernel_loop = '''
34 | #define CUDA_KERNEL_LOOP(i, n) \
35 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
36 | i < (n); \
37 | i += blockDim.x * gridDim.x)
38 | '''
39 |
40 |
41 | def GET_BLOCKS(N):
42 | return (N + CUDA_NUM_THREADS - 1) // CUDA_NUM_THREADS
43 |
44 |
45 | _involution_kernel = kernel_loop + '''
46 | extern "C"
47 | __global__ void involution_forward_kernel(
48 | const ${Dtype}* bottom_data, const ${Dtype}* weight_data, ${Dtype}* top_data) {
49 | CUDA_KERNEL_LOOP(index, ${nthreads}) {
50 | const int n = index / ${channels} / ${top_height} / ${top_width};
51 | const int c = (index / ${top_height} / ${top_width}) % ${channels};
52 | const int h = (index / ${top_width}) % ${top_height};
53 | const int w = index % ${top_width};
54 | const int g = c / (${channels} / ${groups});
55 | ${Dtype} value = 0;
56 | #pragma unroll
57 | for (int kh = 0; kh < ${kernel_h}; ++kh) {
58 | #pragma unroll
59 | for (int kw = 0; kw < ${kernel_w}; ++kw) {
60 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h};
61 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w};
62 | if ((h_in >= 0) && (h_in < ${bottom_height})
63 | && (w_in >= 0) && (w_in < ${bottom_width})) {
64 | const int offset = ((n * ${channels} + c) * ${bottom_height} + h_in)
65 | * ${bottom_width} + w_in;
66 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h)
67 | * ${top_width} + w;
68 | value += weight_data[offset_weight] * bottom_data[offset];
69 | }
70 | }
71 | }
72 | top_data[index] = value;
73 | }
74 | }
75 | '''
76 |
77 |
78 | _involution_kernel_backward_grad_input = kernel_loop + '''
79 | extern "C"
80 | __global__ void involution_backward_grad_input_kernel(
81 | const ${Dtype}* const top_diff, const ${Dtype}* const weight_data, ${Dtype}* const bottom_diff) {
82 | CUDA_KERNEL_LOOP(index, ${nthreads}) {
83 | const int n = index / ${channels} / ${bottom_height} / ${bottom_width};
84 | const int c = (index / ${bottom_height} / ${bottom_width}) % ${channels};
85 | const int h = (index / ${bottom_width}) % ${bottom_height};
86 | const int w = index % ${bottom_width};
87 | const int g = c / (${channels} / ${groups});
88 | ${Dtype} value = 0;
89 | #pragma unroll
90 | for (int kh = 0; kh < ${kernel_h}; ++kh) {
91 | #pragma unroll
92 | for (int kw = 0; kw < ${kernel_w}; ++kw) {
93 | const int h_out_s = h + ${pad_h} - kh * ${dilation_h};
94 | const int w_out_s = w + ${pad_w} - kw * ${dilation_w};
95 | if (((h_out_s % ${stride_h}) == 0) && ((w_out_s % ${stride_w}) == 0)) {
96 | const int h_out = h_out_s / ${stride_h};
97 | const int w_out = w_out_s / ${stride_w};
98 | if ((h_out >= 0) && (h_out < ${top_height})
99 | && (w_out >= 0) && (w_out < ${top_width})) {
100 | const int offset = ((n * ${channels} + c) * ${top_height} + h_out)
101 | * ${top_width} + w_out;
102 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h_out)
103 | * ${top_width} + w_out;
104 | value += weight_data[offset_weight] * top_diff[offset];
105 | }
106 | }
107 | }
108 | }
109 | bottom_diff[index] = value;
110 | }
111 | }
112 | '''
113 |
114 |
115 | _involution_kernel_backward_grad_weight = kernel_loop + '''
116 | extern "C"
117 | __global__ void involution_backward_grad_weight_kernel(
118 | const ${Dtype}* const top_diff, const ${Dtype}* const bottom_data, ${Dtype}* const buffer_data) {
119 | CUDA_KERNEL_LOOP(index, ${nthreads}) {
120 | const int h = (index / ${top_width}) % ${top_height};
121 | const int w = index % ${top_width};
122 | const int kh = (index / ${kernel_w} / ${top_height} / ${top_width})
123 | % ${kernel_h};
124 | const int kw = (index / ${top_height} / ${top_width}) % ${kernel_w};
125 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h};
126 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w};
127 | if ((h_in >= 0) && (h_in < ${bottom_height})
128 | && (w_in >= 0) && (w_in < ${bottom_width})) {
129 | const int g = (index / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${groups};
130 | const int n = (index / ${groups} / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${num};
131 | ${Dtype} value = 0;
132 | #pragma unroll
133 | for (int c = g * (${channels} / ${groups}); c < (g + 1) * (${channels} / ${groups}); ++c) {
134 | const int top_offset = ((n * ${channels} + c) * ${top_height} + h)
135 | * ${top_width} + w;
136 | const int bottom_offset = ((n * ${channels} + c) * ${bottom_height} + h_in)
137 | * ${bottom_width} + w_in;
138 | value += top_diff[top_offset] * bottom_data[bottom_offset];
139 | }
140 | buffer_data[index] = value;
141 | } else {
142 | buffer_data[index] = 0;
143 | }
144 | }
145 | }
146 | '''
147 |
148 |
149 | class _involution(Function):
150 | @staticmethod
151 | def forward(ctx, input, weight, stride, padding, dilation):
152 | assert input.dim() == 4 and input.is_cuda
153 | assert weight.dim() == 6 and weight.is_cuda
154 | batch_size, channels, height, width = input.size()
155 | kernel_h, kernel_w = weight.size()[2:4]
156 | output_h = int((height + 2 * padding[0] - (dilation[0] * (kernel_h - 1) + 1)) / stride[0] + 1)
157 | output_w = int((width + 2 * padding[1] - (dilation[1] * (kernel_w - 1) + 1)) / stride[1] + 1)
158 |
159 | output = input.new(batch_size, channels, output_h, output_w)
160 | n = output.numel()
161 |
162 | with torch.cuda.device_of(input):
163 | f = load_kernel('involution_forward_kernel', _involution_kernel, Dtype=Dtype(input), nthreads=n,
164 | num=batch_size, channels=channels, groups=weight.size()[1],
165 | bottom_height=height, bottom_width=width,
166 | top_height=output_h, top_width=output_w,
167 | kernel_h=kernel_h, kernel_w=kernel_w,
168 | stride_h=stride[0], stride_w=stride[1],
169 | dilation_h=dilation[0], dilation_w=dilation[1],
170 | pad_h=padding[0], pad_w=padding[1])
171 | f(block=(CUDA_NUM_THREADS,1,1),
172 | grid=(GET_BLOCKS(n),1,1),
173 | args=[input.data_ptr(), weight.data_ptr(), output.data_ptr()],
174 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
175 |
176 | ctx.save_for_backward(input, weight)
177 | ctx.stride, ctx.padding, ctx.dilation = stride, padding, dilation
178 | return output
179 |
180 | @staticmethod
181 | def backward(ctx, grad_output):
182 | assert grad_output.is_cuda and grad_output.is_contiguous()
183 | input, weight = ctx.saved_tensors
184 | stride, padding, dilation = ctx.stride, ctx.padding, ctx.dilation
185 |
186 | batch_size, channels, height, width = input.size()
187 | kernel_h, kernel_w = weight.size()[2:4]
188 | output_h, output_w = grad_output.size()[2:]
189 |
190 | grad_input, grad_weight = None, None
191 |
192 | opt = dict(Dtype=Dtype(grad_output),
193 | num=batch_size, channels=channels, groups=weight.size()[1],
194 | bottom_height=height, bottom_width=width,
195 | top_height=output_h, top_width=output_w,
196 | kernel_h=kernel_h, kernel_w=kernel_w,
197 | stride_h=stride[0], stride_w=stride[1],
198 | dilation_h=dilation[0], dilation_w=dilation[1],
199 | pad_h=padding[0], pad_w=padding[1])
200 |
201 | with torch.cuda.device_of(input):
202 | if ctx.needs_input_grad[0]:
203 | grad_input = input.new(input.size())
204 |
205 | n = grad_input.numel()
206 | opt['nthreads'] = n
207 |
208 | f = load_kernel('involution_backward_grad_input_kernel',
209 | _involution_kernel_backward_grad_input, **opt)
210 | f(block=(CUDA_NUM_THREADS,1,1),
211 | grid=(GET_BLOCKS(n),1,1),
212 | args=[grad_output.data_ptr(), weight.data_ptr(), grad_input.data_ptr()],
213 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
214 |
215 | if ctx.needs_input_grad[1]:
216 | grad_weight = weight.new(weight.size())
217 |
218 | n = grad_weight.numel()
219 | opt['nthreads'] = n
220 |
221 | f = load_kernel('involution_backward_grad_weight_kernel',
222 | _involution_kernel_backward_grad_weight, **opt)
223 | f(block=(CUDA_NUM_THREADS,1,1),
224 | grid=(GET_BLOCKS(n),1,1),
225 | args=[grad_output.data_ptr(), input.data_ptr(), grad_weight.data_ptr()],
226 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
227 |
228 | return grad_input, grad_weight, None, None, None
229 |
230 |
231 | def _involution_cuda(input, weight, bias=None, stride=1, padding=0, dilation=1):
232 | """ involution kernel
233 | """
234 | assert input.size(0) == weight.size(0)
235 | assert input.size(-2)//stride == weight.size(-2)
236 | assert input.size(-1)//stride == weight.size(-1)
237 | if input.is_cuda:
238 | out = _involution.apply(input, weight, _pair(stride), _pair(padding), _pair(dilation))
239 | if bias is not None:
240 | out += bias.view(1,-1,1,1)
241 | else:
242 | raise NotImplementedError
243 | return out
244 |
245 |
246 | class involution(nn.Module):
247 |
248 | def __init__(self,
249 | channels,
250 | kernel_size,
251 | stride):
252 | super(involution, self).__init__()
253 | self.kernel_size = kernel_size
254 | self.stride = stride
255 | self.channels = channels
256 | reduction_ratio = 4
257 | self.group_channels = 16
258 | self.groups = self.channels // self.group_channels
259 | self.conv1 = ConvModule(
260 | in_channels=channels,
261 | out_channels=channels // reduction_ratio,
262 | kernel_size=1,
263 | conv_cfg=None,
264 | norm_cfg=dict(type='BN'),
265 | act_cfg=dict(type='ReLU'))
266 | self.conv2 = ConvModule(
267 | in_channels=channels // reduction_ratio,
268 | out_channels=kernel_size**2 * self.groups,
269 | kernel_size=1,
270 | stride=1,
271 | conv_cfg=None,
272 | norm_cfg=None,
273 | act_cfg=None)
274 | if stride > 1:
275 | self.avgpool = nn.AvgPool2d(stride, stride)
276 |
277 | def forward(self, x):
278 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
279 | b, c, h, w = weight.shape
280 | weight = weight.view(b, self.groups, self.kernel_size, self.kernel_size, h, w)
281 | out = _involution_cuda(x, weight, stride=self.stride, padding=(self.kernel_size-1)//2)
282 | return out
283 |
--------------------------------------------------------------------------------
/cls/mmcls/models/utils/involution_naive.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from mmcv.cnn import ConvModule
3 |
4 |
5 | class involution(nn.Module):
6 |
7 | def __init__(self,
8 | channels,
9 | kernel_size,
10 | stride):
11 | super(involution, self).__init__()
12 | self.kernel_size = kernel_size
13 | self.stride = stride
14 | self.channels = channels
15 | reduction_ratio = 4
16 | self.group_channels = 16
17 | self.groups = self.channels // self.group_channels
18 | self.conv1 = ConvModule(
19 | in_channels=channels,
20 | out_channels=channels // reduction_ratio,
21 | kernel_size=1,
22 | conv_cfg=None,
23 | norm_cfg=dict(type='BN'),
24 | act_cfg=dict(type='ReLU'))
25 | self.conv2 = ConvModule(
26 | in_channels=channels // reduction_ratio,
27 | out_channels=kernel_size**2 * self.groups,
28 | kernel_size=1,
29 | stride=1,
30 | conv_cfg=None,
31 | norm_cfg=None,
32 | act_cfg=None)
33 | if stride > 1:
34 | self.avgpool = nn.AvgPool2d(stride, stride)
35 | self.unfold = nn.Unfold(kernel_size, 1, (kernel_size-1)//2, stride)
36 |
37 | def forward(self, x):
38 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
39 | b, c, h, w = weight.shape
40 | weight = weight.view(b, self.groups, self.kernel_size**2, h, w).unsqueeze(2)
41 | out = self.unfold(x).view(b, self.groups, self.group_channels, self.kernel_size**2, h, w)
42 | out = (weight * out).sum(dim=3).view(b, self.channels, h, w)
43 | return out
44 |
--------------------------------------------------------------------------------
/det/configs/_base_/models/faster_rcnn_red50_fpn.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='FasterRCNN',
3 | #pretrained='torchvision://resnet50',
4 | pretrained='/path/to/rednet50.pth',
5 | backbone=dict(
6 | type='RedNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 | # model training and testing settings
57 | train_cfg=dict(
58 | rpn=dict(
59 | assigner=dict(
60 | type='MaxIoUAssigner',
61 | pos_iou_thr=0.7,
62 | neg_iou_thr=0.3,
63 | min_pos_iou=0.3,
64 | match_low_quality=True,
65 | ignore_iof_thr=-1),
66 | sampler=dict(
67 | type='RandomSampler',
68 | num=256,
69 | pos_fraction=0.5,
70 | neg_pos_ub=-1,
71 | add_gt_as_proposals=False),
72 | allowed_border=-1,
73 | pos_weight=-1,
74 | debug=False),
75 | rpn_proposal=dict(
76 | nms_across_levels=False,
77 | nms_pre=2000,
78 | nms_post=1000,
79 | max_num=1000,
80 | nms_thr=0.7,
81 | min_bbox_size=0),
82 | rcnn=dict(
83 | assigner=dict(
84 | type='MaxIoUAssigner',
85 | pos_iou_thr=0.5,
86 | neg_iou_thr=0.5,
87 | min_pos_iou=0.5,
88 | match_low_quality=False,
89 | ignore_iof_thr=-1),
90 | sampler=dict(
91 | type='RandomSampler',
92 | num=512,
93 | pos_fraction=0.25,
94 | neg_pos_ub=-1,
95 | add_gt_as_proposals=True),
96 | pos_weight=-1,
97 | debug=False)),
98 | test_cfg=dict(
99 | rpn=dict(
100 | nms_across_levels=False,
101 | nms_pre=1000,
102 | nms_post=1000,
103 | max_num=1000,
104 | nms_thr=0.7,
105 | min_bbox_size=0),
106 | rcnn=dict(
107 | score_thr=0.05,
108 | nms=dict(type='nms', iou_threshold=0.5),
109 | max_per_img=100)
110 | # soft-nms is also supported for rcnn testing
111 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
112 | ))
113 |
--------------------------------------------------------------------------------
/det/configs/_base_/models/faster_rcnn_red50_neck_fpn.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='FasterRCNN',
3 | #pretrained='torchvision://resnet50',
4 | pretrained='/path/to/rednet50.pth',
5 | backbone=dict(
6 | type='RedNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN_involution',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 | # model training and testing settings
57 | train_cfg=dict(
58 | rpn=dict(
59 | assigner=dict(
60 | type='MaxIoUAssigner',
61 | pos_iou_thr=0.7,
62 | neg_iou_thr=0.3,
63 | min_pos_iou=0.3,
64 | match_low_quality=True,
65 | ignore_iof_thr=-1),
66 | sampler=dict(
67 | type='RandomSampler',
68 | num=256,
69 | pos_fraction=0.5,
70 | neg_pos_ub=-1,
71 | add_gt_as_proposals=False),
72 | allowed_border=-1,
73 | pos_weight=-1,
74 | debug=False),
75 | rpn_proposal=dict(
76 | nms_across_levels=False,
77 | nms_pre=2000,
78 | nms_post=1000,
79 | max_num=1000,
80 | nms_thr=0.7,
81 | min_bbox_size=0),
82 | rcnn=dict(
83 | assigner=dict(
84 | type='MaxIoUAssigner',
85 | pos_iou_thr=0.5,
86 | neg_iou_thr=0.5,
87 | min_pos_iou=0.5,
88 | match_low_quality=False,
89 | ignore_iof_thr=-1),
90 | sampler=dict(
91 | type='RandomSampler',
92 | num=512,
93 | pos_fraction=0.25,
94 | neg_pos_ub=-1,
95 | add_gt_as_proposals=True),
96 | pos_weight=-1,
97 | debug=False)),
98 | test_cfg=dict(
99 | rpn=dict(
100 | nms_across_levels=False,
101 | nms_pre=1000,
102 | nms_post=1000,
103 | max_num=1000,
104 | nms_thr=0.7,
105 | min_bbox_size=0),
106 | rcnn=dict(
107 | score_thr=0.05,
108 | nms=dict(type='nms', iou_threshold=0.5),
109 | max_per_img=100)
110 | # soft-nms is also supported for rcnn testing
111 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
112 | ))
113 |
--------------------------------------------------------------------------------
/det/configs/_base_/models/faster_rcnn_red50_neck_fpn_head.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='FasterRCNN',
3 | #pretrained='torchvision://resnet50',
4 | pretrained='/path/to/rednet50.pth',
5 | backbone=dict(
6 | type='RedNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN_involution',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead_involution',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 | # model training and testing settings
57 | train_cfg=dict(
58 | rpn=dict(
59 | assigner=dict(
60 | type='MaxIoUAssigner',
61 | pos_iou_thr=0.7,
62 | neg_iou_thr=0.3,
63 | min_pos_iou=0.3,
64 | match_low_quality=True,
65 | ignore_iof_thr=-1),
66 | sampler=dict(
67 | type='RandomSampler',
68 | num=256,
69 | pos_fraction=0.5,
70 | neg_pos_ub=-1,
71 | add_gt_as_proposals=False),
72 | allowed_border=-1,
73 | pos_weight=-1,
74 | debug=False),
75 | rpn_proposal=dict(
76 | nms_across_levels=False,
77 | nms_pre=2000,
78 | nms_post=1000,
79 | max_num=1000,
80 | nms_thr=0.7,
81 | min_bbox_size=0),
82 | rcnn=dict(
83 | assigner=dict(
84 | type='MaxIoUAssigner',
85 | pos_iou_thr=0.5,
86 | neg_iou_thr=0.5,
87 | min_pos_iou=0.5,
88 | match_low_quality=False,
89 | ignore_iof_thr=-1),
90 | sampler=dict(
91 | type='RandomSampler',
92 | num=512,
93 | pos_fraction=0.25,
94 | neg_pos_ub=-1,
95 | add_gt_as_proposals=True),
96 | pos_weight=-1,
97 | debug=False)),
98 | test_cfg=dict(
99 | rpn=dict(
100 | nms_across_levels=False,
101 | nms_pre=1000,
102 | nms_post=1000,
103 | max_num=1000,
104 | nms_thr=0.7,
105 | min_bbox_size=0),
106 | rcnn=dict(
107 | score_thr=0.05,
108 | nms=dict(type='nms', iou_threshold=0.5),
109 | max_per_img=100)
110 | # soft-nms is also supported for rcnn testing
111 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
112 | ))
113 |
--------------------------------------------------------------------------------
/det/configs/_base_/models/mask_rcnn_red50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='MaskRCNN',
4 | #pretrained='torchvision://resnet50',
5 | pretrained='/path/to/rednet50.pth',
6 | backbone=dict(
7 | type='RedNet',
8 | depth=50,
9 | num_stages=4,
10 | out_indices=(0, 1, 2, 3),
11 | frozen_stages=1,
12 | norm_cfg=dict(type='BN', requires_grad=True),
13 | norm_eval=True,
14 | style='pytorch'),
15 | neck=dict(
16 | type='FPN',
17 | in_channels=[256, 512, 1024, 2048],
18 | out_channels=256,
19 | num_outs=5),
20 | rpn_head=dict(
21 | type='RPNHead',
22 | in_channels=256,
23 | feat_channels=256,
24 | anchor_generator=dict(
25 | type='AnchorGenerator',
26 | scales=[8],
27 | ratios=[0.5, 1.0, 2.0],
28 | strides=[4, 8, 16, 32, 64]),
29 | bbox_coder=dict(
30 | type='DeltaXYWHBBoxCoder',
31 | target_means=[.0, .0, .0, .0],
32 | target_stds=[1.0, 1.0, 1.0, 1.0]),
33 | loss_cls=dict(
34 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
35 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
36 | roi_head=dict(
37 | type='StandardRoIHead',
38 | bbox_roi_extractor=dict(
39 | type='SingleRoIExtractor',
40 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
41 | out_channels=256,
42 | featmap_strides=[4, 8, 16, 32]),
43 | bbox_head=dict(
44 | type='Shared2FCBBoxHead',
45 | in_channels=256,
46 | fc_out_channels=1024,
47 | roi_feat_size=7,
48 | num_classes=80,
49 | bbox_coder=dict(
50 | type='DeltaXYWHBBoxCoder',
51 | target_means=[0., 0., 0., 0.],
52 | target_stds=[0.1, 0.1, 0.2, 0.2]),
53 | reg_class_agnostic=False,
54 | loss_cls=dict(
55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
56 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
57 | mask_roi_extractor=dict(
58 | type='SingleRoIExtractor',
59 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
60 | out_channels=256,
61 | featmap_strides=[4, 8, 16, 32]),
62 | mask_head=dict(
63 | type='FCNMaskHead',
64 | num_convs=4,
65 | in_channels=256,
66 | conv_out_channels=256,
67 | num_classes=80,
68 | loss_mask=dict(
69 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
70 | # model training and testing settings
71 | train_cfg=dict(
72 | rpn=dict(
73 | assigner=dict(
74 | type='MaxIoUAssigner',
75 | pos_iou_thr=0.7,
76 | neg_iou_thr=0.3,
77 | min_pos_iou=0.3,
78 | match_low_quality=True,
79 | ignore_iof_thr=-1),
80 | sampler=dict(
81 | type='RandomSampler',
82 | num=256,
83 | pos_fraction=0.5,
84 | neg_pos_ub=-1,
85 | add_gt_as_proposals=False),
86 | allowed_border=-1,
87 | pos_weight=-1,
88 | debug=False),
89 | rpn_proposal=dict(
90 | nms_across_levels=False,
91 | nms_pre=2000,
92 | nms_post=1000,
93 | max_num=1000,
94 | nms_thr=0.7,
95 | min_bbox_size=0),
96 | rcnn=dict(
97 | assigner=dict(
98 | type='MaxIoUAssigner',
99 | pos_iou_thr=0.5,
100 | neg_iou_thr=0.5,
101 | min_pos_iou=0.5,
102 | match_low_quality=True,
103 | ignore_iof_thr=-1),
104 | sampler=dict(
105 | type='RandomSampler',
106 | num=512,
107 | pos_fraction=0.25,
108 | neg_pos_ub=-1,
109 | add_gt_as_proposals=True),
110 | mask_size=28,
111 | pos_weight=-1,
112 | debug=False)),
113 | test_cfg=dict(
114 | rpn=dict(
115 | nms_across_levels=False,
116 | nms_pre=1000,
117 | nms_post=1000,
118 | max_num=1000,
119 | nms_thr=0.7,
120 | min_bbox_size=0),
121 | rcnn=dict(
122 | score_thr=0.05,
123 | nms=dict(type='nms', iou_threshold=0.5),
124 | max_per_img=100,
125 | mask_thr_binary=0.5)))
126 |
--------------------------------------------------------------------------------
/det/configs/_base_/models/mask_rcnn_red50_neck_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='MaskRCNN',
4 | #pretrained='torchvision://resnet50',
5 | pretrained='/path/to/rednet50.pth',
6 | backbone=dict(
7 | type='RedNet',
8 | depth=50,
9 | num_stages=4,
10 | out_indices=(0, 1, 2, 3),
11 | frozen_stages=1,
12 | norm_cfg=dict(type='BN', requires_grad=True),
13 | norm_eval=True,
14 | style='pytorch'),
15 | neck=dict(
16 | type='FPN_involution',
17 | in_channels=[256, 512, 1024, 2048],
18 | out_channels=256,
19 | num_outs=5),
20 | rpn_head=dict(
21 | type='RPNHead',
22 | in_channels=256,
23 | feat_channels=256,
24 | anchor_generator=dict(
25 | type='AnchorGenerator',
26 | scales=[8],
27 | ratios=[0.5, 1.0, 2.0],
28 | strides=[4, 8, 16, 32, 64]),
29 | bbox_coder=dict(
30 | type='DeltaXYWHBBoxCoder',
31 | target_means=[.0, .0, .0, .0],
32 | target_stds=[1.0, 1.0, 1.0, 1.0]),
33 | loss_cls=dict(
34 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
35 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
36 | roi_head=dict(
37 | type='StandardRoIHead',
38 | bbox_roi_extractor=dict(
39 | type='SingleRoIExtractor',
40 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
41 | out_channels=256,
42 | featmap_strides=[4, 8, 16, 32]),
43 | bbox_head=dict(
44 | type='Shared2FCBBoxHead',
45 | in_channels=256,
46 | fc_out_channels=1024,
47 | roi_feat_size=7,
48 | num_classes=80,
49 | bbox_coder=dict(
50 | type='DeltaXYWHBBoxCoder',
51 | target_means=[0., 0., 0., 0.],
52 | target_stds=[0.1, 0.1, 0.2, 0.2]),
53 | reg_class_agnostic=False,
54 | loss_cls=dict(
55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
56 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
57 | mask_roi_extractor=dict(
58 | type='SingleRoIExtractor',
59 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
60 | out_channels=256,
61 | featmap_strides=[4, 8, 16, 32]),
62 | mask_head=dict(
63 | type='FCNMaskHead',
64 | num_convs=4,
65 | in_channels=256,
66 | conv_out_channels=256,
67 | num_classes=80,
68 | loss_mask=dict(
69 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
70 | # model training and testing settings
71 | train_cfg=dict(
72 | rpn=dict(
73 | assigner=dict(
74 | type='MaxIoUAssigner',
75 | pos_iou_thr=0.7,
76 | neg_iou_thr=0.3,
77 | min_pos_iou=0.3,
78 | match_low_quality=True,
79 | ignore_iof_thr=-1),
80 | sampler=dict(
81 | type='RandomSampler',
82 | num=256,
83 | pos_fraction=0.5,
84 | neg_pos_ub=-1,
85 | add_gt_as_proposals=False),
86 | allowed_border=-1,
87 | pos_weight=-1,
88 | debug=False),
89 | rpn_proposal=dict(
90 | nms_across_levels=False,
91 | nms_pre=2000,
92 | nms_post=1000,
93 | max_num=1000,
94 | nms_thr=0.7,
95 | min_bbox_size=0),
96 | rcnn=dict(
97 | assigner=dict(
98 | type='MaxIoUAssigner',
99 | pos_iou_thr=0.5,
100 | neg_iou_thr=0.5,
101 | min_pos_iou=0.5,
102 | match_low_quality=True,
103 | ignore_iof_thr=-1),
104 | sampler=dict(
105 | type='RandomSampler',
106 | num=512,
107 | pos_fraction=0.25,
108 | neg_pos_ub=-1,
109 | add_gt_as_proposals=True),
110 | mask_size=28,
111 | pos_weight=-1,
112 | debug=False)),
113 | test_cfg=dict(
114 | rpn=dict(
115 | nms_across_levels=False,
116 | nms_pre=1000,
117 | nms_post=1000,
118 | max_num=1000,
119 | nms_thr=0.7,
120 | min_bbox_size=0),
121 | rcnn=dict(
122 | score_thr=0.05,
123 | nms=dict(type='nms', iou_threshold=0.5),
124 | max_per_img=100,
125 | mask_thr_binary=0.5)))
126 |
--------------------------------------------------------------------------------
/det/configs/_base_/models/mask_rcnn_red50_neck_fpn_head.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='MaskRCNN',
4 | #pretrained='torchvision://resnet50',
5 | pretrained='/path/to/rednet50.pth',
6 | backbone=dict(
7 | type='RedNet',
8 | depth=50,
9 | num_stages=4,
10 | out_indices=(0, 1, 2, 3),
11 | frozen_stages=1,
12 | norm_cfg=dict(type='BN', requires_grad=True),
13 | norm_eval=True,
14 | style='pytorch'),
15 | neck=dict(
16 | type='FPN_involution',
17 | in_channels=[256, 512, 1024, 2048],
18 | out_channels=256,
19 | num_outs=5),
20 | rpn_head=dict(
21 | type='RPNHead_involution',
22 | in_channels=256,
23 | feat_channels=256,
24 | anchor_generator=dict(
25 | type='AnchorGenerator',
26 | scales=[8],
27 | ratios=[0.5, 1.0, 2.0],
28 | strides=[4, 8, 16, 32, 64]),
29 | bbox_coder=dict(
30 | type='DeltaXYWHBBoxCoder',
31 | target_means=[.0, .0, .0, .0],
32 | target_stds=[1.0, 1.0, 1.0, 1.0]),
33 | loss_cls=dict(
34 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
35 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
36 | roi_head=dict(
37 | type='StandardRoIHead',
38 | bbox_roi_extractor=dict(
39 | type='SingleRoIExtractor',
40 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
41 | out_channels=256,
42 | featmap_strides=[4, 8, 16, 32]),
43 | bbox_head=dict(
44 | type='Shared2FCBBoxHead',
45 | in_channels=256,
46 | fc_out_channels=1024,
47 | roi_feat_size=7,
48 | num_classes=80,
49 | bbox_coder=dict(
50 | type='DeltaXYWHBBoxCoder',
51 | target_means=[0., 0., 0., 0.],
52 | target_stds=[0.1, 0.1, 0.2, 0.2]),
53 | reg_class_agnostic=False,
54 | loss_cls=dict(
55 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
56 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
57 | mask_roi_extractor=dict(
58 | type='SingleRoIExtractor',
59 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
60 | out_channels=256,
61 | featmap_strides=[4, 8, 16, 32]),
62 | mask_head=dict(
63 | type='FCNMaskHead_involution',
64 | num_convs=4,
65 | in_channels=256,
66 | conv_out_channels=256,
67 | num_classes=80,
68 | loss_mask=dict(
69 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
70 | # model training and testing settings
71 | train_cfg=dict(
72 | rpn=dict(
73 | assigner=dict(
74 | type='MaxIoUAssigner',
75 | pos_iou_thr=0.7,
76 | neg_iou_thr=0.3,
77 | min_pos_iou=0.3,
78 | match_low_quality=True,
79 | ignore_iof_thr=-1),
80 | sampler=dict(
81 | type='RandomSampler',
82 | num=256,
83 | pos_fraction=0.5,
84 | neg_pos_ub=-1,
85 | add_gt_as_proposals=False),
86 | allowed_border=-1,
87 | pos_weight=-1,
88 | debug=False),
89 | rpn_proposal=dict(
90 | nms_across_levels=False,
91 | nms_pre=2000,
92 | nms_post=1000,
93 | max_num=1000,
94 | nms_thr=0.7,
95 | min_bbox_size=0),
96 | rcnn=dict(
97 | assigner=dict(
98 | type='MaxIoUAssigner',
99 | pos_iou_thr=0.5,
100 | neg_iou_thr=0.5,
101 | min_pos_iou=0.5,
102 | match_low_quality=True,
103 | ignore_iof_thr=-1),
104 | sampler=dict(
105 | type='RandomSampler',
106 | num=512,
107 | pos_fraction=0.25,
108 | neg_pos_ub=-1,
109 | add_gt_as_proposals=True),
110 | mask_size=28,
111 | pos_weight=-1,
112 | debug=False)),
113 | test_cfg=dict(
114 | rpn=dict(
115 | nms_across_levels=False,
116 | nms_pre=1000,
117 | nms_post=1000,
118 | max_num=1000,
119 | nms_thr=0.7,
120 | min_bbox_size=0),
121 | rcnn=dict(
122 | score_thr=0.05,
123 | nms=dict(type='nms', iou_threshold=0.5),
124 | max_per_img=100,
125 | mask_thr_binary=0.5)))
126 |
--------------------------------------------------------------------------------
/det/configs/_base_/models/retinanet_red50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RetinaNet',
4 | #pretrained='torchvision://resnet50',
5 | pretrained='/path/to/rednet50.pth',
6 | backbone=dict(
7 | type='RedNet',
8 | depth=50,
9 | num_stages=4,
10 | out_indices=(0, 1, 2, 3),
11 | frozen_stages=1,
12 | norm_cfg=dict(type='BN', requires_grad=True),
13 | norm_eval=True,
14 | style='pytorch'),
15 | neck=dict(
16 | type='FPN',
17 | in_channels=[256, 512, 1024, 2048],
18 | out_channels=256,
19 | start_level=1,
20 | add_extra_convs='on_input',
21 | num_outs=5),
22 | bbox_head=dict(
23 | type='RetinaHead',
24 | num_classes=80,
25 | in_channels=256,
26 | stacked_convs=4,
27 | feat_channels=256,
28 | anchor_generator=dict(
29 | type='AnchorGenerator',
30 | octave_base_scale=4,
31 | scales_per_octave=3,
32 | ratios=[0.5, 1.0, 2.0],
33 | strides=[8, 16, 32, 64, 128]),
34 | bbox_coder=dict(
35 | type='DeltaXYWHBBoxCoder',
36 | target_means=[.0, .0, .0, .0],
37 | target_stds=[1.0, 1.0, 1.0, 1.0]),
38 | loss_cls=dict(
39 | type='FocalLoss',
40 | use_sigmoid=True,
41 | gamma=2.0,
42 | alpha=0.25,
43 | loss_weight=1.0),
44 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
45 | # training and testing settings
46 | train_cfg=dict(
47 | assigner=dict(
48 | type='MaxIoUAssigner',
49 | pos_iou_thr=0.5,
50 | neg_iou_thr=0.4,
51 | min_pos_iou=0,
52 | ignore_iof_thr=-1),
53 | allowed_border=-1,
54 | pos_weight=-1,
55 | debug=False),
56 | test_cfg=dict(
57 | nms_pre=1000,
58 | min_bbox_size=0,
59 | score_thr=0.05,
60 | nms=dict(type='nms', iou_threshold=0.5),
61 | max_per_img=100))
62 |
--------------------------------------------------------------------------------
/det/configs/_base_/models/retinanet_red50_neck_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RetinaNet',
4 | #pretrained='torchvision://resnet50',
5 | pretrained='/path/to/rednet50.pth',
6 | backbone=dict(
7 | type='RedNet',
8 | depth=50,
9 | num_stages=4,
10 | out_indices=(0, 1, 2, 3),
11 | frozen_stages=1,
12 | norm_cfg=dict(type='BN', requires_grad=True),
13 | norm_eval=True,
14 | style='pytorch'),
15 | neck=dict(
16 | type='FPN_involution',
17 | in_channels=[256, 512, 1024, 2048],
18 | out_channels=256,
19 | start_level=1,
20 | add_extra_convs='on_input',
21 | num_outs=5),
22 | bbox_head=dict(
23 | type='RetinaHead',
24 | num_classes=80,
25 | in_channels=256,
26 | stacked_convs=4,
27 | feat_channels=256,
28 | anchor_generator=dict(
29 | type='AnchorGenerator',
30 | octave_base_scale=4,
31 | scales_per_octave=3,
32 | ratios=[0.5, 1.0, 2.0],
33 | strides=[8, 16, 32, 64, 128]),
34 | bbox_coder=dict(
35 | type='DeltaXYWHBBoxCoder',
36 | target_means=[.0, .0, .0, .0],
37 | target_stds=[1.0, 1.0, 1.0, 1.0]),
38 | loss_cls=dict(
39 | type='FocalLoss',
40 | use_sigmoid=True,
41 | gamma=2.0,
42 | alpha=0.25,
43 | loss_weight=1.0),
44 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
45 | # training and testing settings
46 | train_cfg=dict(
47 | assigner=dict(
48 | type='MaxIoUAssigner',
49 | pos_iou_thr=0.5,
50 | neg_iou_thr=0.4,
51 | min_pos_iou=0,
52 | ignore_iof_thr=-1),
53 | allowed_border=-1,
54 | pos_weight=-1,
55 | debug=False),
56 | test_cfg=dict(
57 | nms_pre=1000,
58 | min_bbox_size=0,
59 | score_thr=0.05,
60 | nms=dict(type='nms', iou_threshold=0.5),
61 | max_per_img=100))
62 |
--------------------------------------------------------------------------------
/det/configs/_base_/schedules/schedule_1x_warmup.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(
6 | policy='step',
7 | warmup='linear',
8 | warmup_iters=2000,
9 | warmup_ratio=0.001,
10 | step=[8, 11])
11 | total_epochs = 12
12 |
--------------------------------------------------------------------------------
/det/configs/involution/faster_rcnn_red50_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/faster_rcnn_red50_fpn.py',
3 | '../_base_/datasets/coco_detection.py',
4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
5 | ]
6 |
--------------------------------------------------------------------------------
/det/configs/involution/faster_rcnn_red50_neck_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/faster_rcnn_red50_neck_fpn.py',
3 | '../_base_/datasets/coco_detection.py',
4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py'
5 | ]
6 |
--------------------------------------------------------------------------------
/det/configs/involution/faster_rcnn_red50_neck_fpn_head_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/faster_rcnn_red50_neck_fpn_head.py',
3 | '../_base_/datasets/coco_detection.py',
4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py'
5 | ]
6 | optimizer_config = dict(grad_clip=dict(_delete_=True, max_norm=5, norm_type=2))
7 |
--------------------------------------------------------------------------------
/det/configs/involution/mask_rcnn_red50_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/mask_rcnn_red50_fpn.py',
3 | '../_base_/datasets/coco_instance.py',
4 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
5 | ]
6 |
--------------------------------------------------------------------------------
/det/configs/involution/mask_rcnn_red50_neck_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/mask_rcnn_red50_neck_fpn.py',
3 | '../_base_/datasets/coco_instance.py',
4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py'
5 | ]
6 |
--------------------------------------------------------------------------------
/det/configs/involution/mask_rcnn_red50_neck_fpn_head_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/mask_rcnn_red50_neck_fpn_head.py',
3 | '../_base_/datasets/coco_instance.py',
4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py'
5 | ]
6 | optimizer_config = dict(grad_clip(dict(_delete_=True, max_norm=5, norm_type=2)))
7 |
--------------------------------------------------------------------------------
/det/configs/involution/retinanet_red50_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/retinanet_red50_fpn.py',
3 | '../_base_/datasets/coco_detection.py',
4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py'
5 | ]
6 | # optimizer
7 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
8 |
--------------------------------------------------------------------------------
/det/configs/involution/retinanet_red50_neck_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/retinanet_red50_neck_fpn.py',
3 | '../_base_/datasets/coco_detection.py',
4 | '../_base_/schedules/schedule_1x_warmup.py', '../_base_/default_runtime.py'
5 | ]
6 | # optimizer
7 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
8 | optimizer_config = dict(grad_clip=dict(_delete_=True, max_norm=40, norm_type=2))
9 |
--------------------------------------------------------------------------------
/det/mmdet/datasets/utils.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import warnings
3 |
4 | from mmcv.cnn import VGG
5 | from mmcv.runner.hooks import HOOKS, Hook
6 |
7 | from mmdet.datasets.builder import PIPELINES
8 | from mmdet.datasets.pipelines import LoadAnnotations, LoadImageFromFile
9 | from mmdet.models.dense_heads import GARPNHead, RPNHead, RPNHead_involution
10 | from mmdet.models.roi_heads.mask_heads import FusedSemanticHead
11 |
12 |
13 | def replace_ImageToTensor(pipelines):
14 | """Replace the ImageToTensor transform in a data pipeline to
15 | DefaultFormatBundle, which is normally useful in batch inference.
16 |
17 | Args:
18 | pipelines (list[dict]): Data pipeline configs.
19 |
20 | Returns:
21 | list: The new pipeline list with all ImageToTensor replaced by
22 | DefaultFormatBundle.
23 |
24 | Examples:
25 | >>> pipelines = [
26 | ... dict(type='LoadImageFromFile'),
27 | ... dict(
28 | ... type='MultiScaleFlipAug',
29 | ... img_scale=(1333, 800),
30 | ... flip=False,
31 | ... transforms=[
32 | ... dict(type='Resize', keep_ratio=True),
33 | ... dict(type='RandomFlip'),
34 | ... dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]),
35 | ... dict(type='Pad', size_divisor=32),
36 | ... dict(type='ImageToTensor', keys=['img']),
37 | ... dict(type='Collect', keys=['img']),
38 | ... ])
39 | ... ]
40 | >>> expected_pipelines = [
41 | ... dict(type='LoadImageFromFile'),
42 | ... dict(
43 | ... type='MultiScaleFlipAug',
44 | ... img_scale=(1333, 800),
45 | ... flip=False,
46 | ... transforms=[
47 | ... dict(type='Resize', keep_ratio=True),
48 | ... dict(type='RandomFlip'),
49 | ... dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]),
50 | ... dict(type='Pad', size_divisor=32),
51 | ... dict(type='DefaultFormatBundle'),
52 | ... dict(type='Collect', keys=['img']),
53 | ... ])
54 | ... ]
55 | >>> assert expected_pipelines == replace_ImageToTensor(pipelines)
56 | """
57 | pipelines = copy.deepcopy(pipelines)
58 | for i, pipeline in enumerate(pipelines):
59 | if pipeline['type'] == 'MultiScaleFlipAug':
60 | assert 'transforms' in pipeline
61 | pipeline['transforms'] = replace_ImageToTensor(
62 | pipeline['transforms'])
63 | elif pipeline['type'] == 'ImageToTensor':
64 | warnings.warn(
65 | '"ImageToTensor" pipeline is replaced by '
66 | '"DefaultFormatBundle" for batch inference. It is '
67 | 'recommended to manually replace it in the test '
68 | 'data pipeline in your config file.', UserWarning)
69 | pipelines[i] = {'type': 'DefaultFormatBundle'}
70 | return pipelines
71 |
72 |
73 | def get_loading_pipeline(pipeline):
74 | """Only keep loading image and annotations related configuration.
75 |
76 | Args:
77 | pipeline (list[dict]): Data pipeline configs.
78 |
79 | Returns:
80 | list[dict]: The new pipeline list with only keep
81 | loading image and annotations related configuration.
82 |
83 | Examples:
84 | >>> pipelines = [
85 | ... dict(type='LoadImageFromFile'),
86 | ... dict(type='LoadAnnotations', with_bbox=True),
87 | ... dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
88 | ... dict(type='RandomFlip', flip_ratio=0.5),
89 | ... dict(type='Normalize', **img_norm_cfg),
90 | ... dict(type='Pad', size_divisor=32),
91 | ... dict(type='DefaultFormatBundle'),
92 | ... dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
93 | ... ]
94 | >>> expected_pipelines = [
95 | ... dict(type='LoadImageFromFile'),
96 | ... dict(type='LoadAnnotations', with_bbox=True)
97 | ... ]
98 | >>> assert expected_pipelines ==\
99 | ... get_loading_pipeline(pipelines)
100 | """
101 | loading_pipeline_cfg = []
102 | for cfg in pipeline:
103 | obj_cls = PIPELINES.get(cfg['type'])
104 | # TODO:use more elegant way to distinguish loading modules
105 | if obj_cls is not None and obj_cls in (LoadImageFromFile,
106 | LoadAnnotations):
107 | loading_pipeline_cfg.append(cfg)
108 | assert len(loading_pipeline_cfg) == 2, \
109 | 'The data pipeline in your config file must include ' \
110 | 'loading image and annotations related pipeline.'
111 | return loading_pipeline_cfg
112 |
113 |
114 | @HOOKS.register_module()
115 | class NumClassCheckHook(Hook):
116 |
117 | def _check_head(self, runner):
118 | """Check whether the `num_classes` in head matches the length of
119 | `CLASSSES` in `dataset`.
120 |
121 | Args:
122 | runner (obj:`EpochBasedRunner`): Epoch based Runner.
123 | """
124 | model = runner.model
125 | dataset = runner.data_loader.dataset
126 | if dataset.CLASSES is None:
127 | runner.logger.warning(
128 | f'Please set `CLASSES` '
129 | f'in the {dataset.__class__.__name__} and'
130 | f'check if it is consistent with the `num_classes` '
131 | f'of head')
132 | else:
133 | for name, module in model.named_modules():
134 | if hasattr(module, 'num_classes') and not isinstance(
135 | module, (RPNHead, RPNHead_involution, VGG, FusedSemanticHead, GARPNHead)):
136 | assert module.num_classes == len(dataset.CLASSES), \
137 | (f'The `num_classes` ({module.num_classes}) in '
138 | f'{module.__class__.__name__} of '
139 | f'{model.__class__.__name__} does not matches '
140 | f'the length of `CLASSES` '
141 | f'{len(dataset.CLASSES)}) in '
142 | f'{dataset.__class__.__name__}')
143 |
144 | def before_train_epoch(self, runner):
145 | """Check whether the training dataset is compatible with head.
146 |
147 | Args:
148 | runner (obj:`EpochBasedRunner`): Epoch based Runner.
149 | """
150 | self._check_head(runner)
151 |
152 | def before_val_epoch(self, runner):
153 | """Check whether the dataset in val epoch is compatible with head.
154 |
155 | Args:
156 | runner (obj:`EpochBasedRunner`): Epoch based Runner.
157 | """
158 | self._check_head(runner)
159 |
--------------------------------------------------------------------------------
/det/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .darknet import Darknet
2 | from .detectors_resnet import DetectoRS_ResNet
3 | from .detectors_resnext import DetectoRS_ResNeXt
4 | from .hourglass import HourglassNet
5 | from .hrnet import HRNet
6 | from .regnet import RegNet
7 | from .res2net import Res2Net
8 | from .resnest import ResNeSt
9 | from .resnet import ResNet, ResNetV1d
10 | from .resnext import ResNeXt
11 | from .ssd_vgg import SSDVGG
12 | from .trident_resnet import TridentResNet
13 | from .rednet import RedNet
14 |
15 | __all__ = [
16 | 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net',
17 | 'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet',
18 | 'ResNeSt', 'TridentResNet',
19 | 'RedNet'
20 | ]
21 |
--------------------------------------------------------------------------------
/det/mmdet/models/backbones/base_backbone.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from abc import ABCMeta, abstractmethod
3 |
4 | import torch.nn as nn
5 | from mmcv.runner import load_checkpoint
6 |
7 |
8 | class BaseBackbone(nn.Module, metaclass=ABCMeta):
9 | """Base backbone.
10 |
11 | This class defines the basic functions of a backbone.
12 | Any backbone that inherits this class should at least
13 | define its own `forward` function.
14 |
15 | """
16 |
17 | def __init__(self):
18 | super(BaseBackbone, self).__init__()
19 |
20 | def init_weights(self, pretrained=None):
21 | """Init backbone weights
22 |
23 | Args:
24 | pretrained (str | None): If pretrained is a string, then it
25 | initializes backbone weights by loading the pretrained
26 | checkpoint. If pretrained is None, then it follows default
27 | initializer or customized initializer in subclasses.
28 | """
29 | if isinstance(pretrained, str):
30 | logger = logging.getLogger()
31 | load_checkpoint(self, pretrained, strict=False, logger=logger)
32 | elif pretrained is None:
33 | # use default initializer or customized initializer in subclasses
34 | pass
35 | else:
36 | raise TypeError('pretrained must be a str or None.'
37 | f' But received {type(pretrained)}.')
38 |
39 | @abstractmethod
40 | def forward(self, x):
41 | """Forward computation
42 |
43 | Args:
44 | x (tensor | tuple[tensor]): x could be a Torch.tensor or a tuple of
45 | Torch.tensor, containing input data for forward computation.
46 | """
47 | pass
48 |
49 | def train(self, mode=True):
50 | """Set module status before forward computation
51 |
52 | Args:
53 | mode (bool): Whether it is train_mode or test_mode
54 | """
55 | super(BaseBackbone, self).train(mode)
56 |
--------------------------------------------------------------------------------
/det/mmdet/models/backbones/rednet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.utils.checkpoint as cp
3 | from mmcv.cnn import (ConvModule, build_conv_layer, build_norm_layer,
4 | constant_init, kaiming_init)
5 | from mmcv.utils.parrots_wrapper import _BatchNorm
6 |
7 | from ..builder import BACKBONES
8 | from .base_backbone import BaseBackbone
9 | from ..utils.involution_cuda import involution
10 |
11 |
12 | class Bottleneck(nn.Module):
13 | """Bottleneck block for ResNet.
14 |
15 | Args:
16 | in_channels (int): Input channels of this block.
17 | out_channels (int): Output channels of this block.
18 | expansion (int): The ratio of ``out_channels/mid_channels`` where
19 | ``mid_channels`` is the input/output channels of conv2. Default: 4.
20 | stride (int): stride of the block. Default: 1
21 | dilation (int): dilation of convolution. Default: 1
22 | downsample (nn.Module): downsample operation on identity branch.
23 | Default: None.
24 | style (str): ``"pytorch"`` or ``"caffe"``. If set to "pytorch", the
25 | stride-two layer is the 3x3 conv layer, otherwise the stride-two
26 | layer is the first 1x1 conv layer. Default: "pytorch".
27 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some
28 | memory while slowing down the training speed.
29 | conv_cfg (dict): dictionary to construct and config conv layer.
30 | Default: None
31 | norm_cfg (dict): dictionary to construct and config norm layer.
32 | Default: dict(type='BN')
33 | """
34 |
35 | def __init__(self,
36 | in_channels,
37 | out_channels,
38 | expansion=4,
39 | stride=1,
40 | dilation=1,
41 | downsample=None,
42 | style='pytorch',
43 | with_cp=False,
44 | conv_cfg=None,
45 | norm_cfg=dict(type='BN')):
46 | super(Bottleneck, self).__init__()
47 | assert style in ['pytorch', 'caffe']
48 |
49 | self.in_channels = in_channels
50 | self.out_channels = out_channels
51 | self.expansion = expansion
52 | assert out_channels % expansion == 0
53 | self.mid_channels = out_channels // expansion
54 | self.stride = stride
55 | self.dilation = dilation
56 | self.style = style
57 | self.with_cp = with_cp
58 | self.conv_cfg = conv_cfg
59 | self.norm_cfg = norm_cfg
60 |
61 | if self.style == 'pytorch':
62 | self.conv1_stride = 1
63 | self.conv2_stride = stride
64 | else:
65 | self.conv1_stride = stride
66 | self.conv2_stride = 1
67 |
68 | self.norm1_name, norm1 = build_norm_layer(
69 | norm_cfg, self.mid_channels, postfix=1)
70 | self.norm2_name, norm2 = build_norm_layer(
71 | norm_cfg, self.mid_channels, postfix=2)
72 | self.norm3_name, norm3 = build_norm_layer(
73 | norm_cfg, out_channels, postfix=3)
74 |
75 | self.conv1 = build_conv_layer(
76 | conv_cfg,
77 | in_channels,
78 | self.mid_channels,
79 | kernel_size=1,
80 | stride=self.conv1_stride,
81 | bias=False)
82 | self.add_module(self.norm1_name, norm1)
83 | self.conv2 = involution(self.mid_channels, 7, self.conv2_stride)
84 |
85 | self.add_module(self.norm2_name, norm2)
86 | self.conv3 = build_conv_layer(
87 | conv_cfg,
88 | self.mid_channels,
89 | out_channels,
90 | kernel_size=1,
91 | bias=False)
92 | self.add_module(self.norm3_name, norm3)
93 |
94 | self.relu = nn.ReLU(inplace=True)
95 | self.downsample = downsample
96 |
97 | @property
98 | def norm1(self):
99 | return getattr(self, self.norm1_name)
100 |
101 | @property
102 | def norm2(self):
103 | return getattr(self, self.norm2_name)
104 |
105 | @property
106 | def norm3(self):
107 | return getattr(self, self.norm3_name)
108 |
109 | def forward(self, x):
110 |
111 | def _inner_forward(x):
112 | identity = x
113 |
114 | out = self.conv1(x)
115 | out = self.norm1(out)
116 | out = self.relu(out)
117 |
118 | out = self.conv2(out)
119 | out = self.norm2(out)
120 | out = self.relu(out)
121 |
122 | out = self.conv3(out)
123 | out = self.norm3(out)
124 |
125 | if self.downsample is not None:
126 | identity = self.downsample(x)
127 |
128 | out += identity
129 |
130 | return out
131 |
132 | if self.with_cp and x.requires_grad:
133 | out = cp.checkpoint(_inner_forward, x)
134 | else:
135 | out = _inner_forward(x)
136 |
137 | out = self.relu(out)
138 |
139 | return out
140 |
141 |
142 | def get_expansion(block, expansion=None):
143 | """Get the expansion of a residual block.
144 |
145 | The block expansion will be obtained by the following order:
146 |
147 | 1. If ``expansion`` is given, just return it.
148 | 2. If ``block`` has the attribute ``expansion``, then return
149 | ``block.expansion``.
150 | 3. Return the default value according the the block type:
151 | 1 for ``BasicBlock`` and 4 for ``Bottleneck``.
152 |
153 | Args:
154 | block (class): The block class.
155 | expansion (int | None): The given expansion ratio.
156 |
157 | Returns:
158 | int: The expansion of the block.
159 | """
160 | if isinstance(expansion, int):
161 | assert expansion > 0
162 | elif expansion is None:
163 | if hasattr(block, 'expansion'):
164 | expansion = block.expansion
165 | elif issubclass(block, Bottleneck):
166 | expansion = 4
167 | else:
168 | raise TypeError(f'expansion is not specified for {block.__name__}')
169 | else:
170 | raise TypeError('expansion must be an integer or None')
171 |
172 | return expansion
173 |
174 |
175 | class ResLayer(nn.Sequential):
176 | """ResLayer to build ResNet style backbone.
177 |
178 | Args:
179 | block (nn.Module): Residual block used to build ResLayer.
180 | num_blocks (int): Number of blocks.
181 | in_channels (int): Input channels of this block.
182 | out_channels (int): Output channels of this block.
183 | expansion (int, optional): The expansion for BasicBlock/Bottleneck.
184 | If not specified, it will firstly be obtained via
185 | ``block.expansion``. If the block has no attribute "expansion",
186 | the following default values will be used: 1 for BasicBlock and
187 | 4 for Bottleneck. Default: None.
188 | stride (int): stride of the first block. Default: 1.
189 | avg_down (bool): Use AvgPool instead of stride conv when
190 | downsampling in the bottleneck. Default: False
191 | conv_cfg (dict): dictionary to construct and config conv layer.
192 | Default: None
193 | norm_cfg (dict): dictionary to construct and config norm layer.
194 | Default: dict(type='BN')
195 | """
196 |
197 | def __init__(self,
198 | block,
199 | num_blocks,
200 | in_channels,
201 | out_channels,
202 | expansion=None,
203 | stride=1,
204 | avg_down=False,
205 | conv_cfg=None,
206 | norm_cfg=dict(type='BN'),
207 | **kwargs):
208 | self.block = block
209 | self.expansion = get_expansion(block, expansion)
210 |
211 | downsample = None
212 | if stride != 1 or in_channels != out_channels:
213 | downsample = []
214 | conv_stride = stride
215 | if avg_down and stride != 1:
216 | conv_stride = 1
217 | downsample.append(
218 | nn.AvgPool2d(
219 | kernel_size=stride,
220 | stride=stride,
221 | ceil_mode=True,
222 | count_include_pad=False))
223 | downsample.extend([
224 | build_conv_layer(
225 | conv_cfg,
226 | in_channels,
227 | out_channels,
228 | kernel_size=1,
229 | stride=conv_stride,
230 | bias=False),
231 | build_norm_layer(norm_cfg, out_channels)[1]
232 | ])
233 | downsample = nn.Sequential(*downsample)
234 |
235 | layers = []
236 | layers.append(
237 | block(
238 | in_channels=in_channels,
239 | out_channels=out_channels,
240 | expansion=self.expansion,
241 | stride=stride,
242 | downsample=downsample,
243 | conv_cfg=conv_cfg,
244 | norm_cfg=norm_cfg,
245 | **kwargs))
246 | in_channels = out_channels
247 | for i in range(1, num_blocks):
248 | layers.append(
249 | block(
250 | in_channels=in_channels,
251 | out_channels=out_channels,
252 | expansion=self.expansion,
253 | stride=1,
254 | conv_cfg=conv_cfg,
255 | norm_cfg=norm_cfg,
256 | **kwargs))
257 | super(ResLayer, self).__init__(*layers)
258 |
259 |
260 | @BACKBONES.register_module()
261 | class RedNet(BaseBackbone):
262 | """ResNet backbone.
263 |
264 | Please refer to the `paper `_ for
265 | details.
266 |
267 | Args:
268 | depth (int): Network depth, from {18, 34, 50, 101, 152}.
269 | in_channels (int): Number of input image channels. Default: 3.
270 | stem_channels (int): Output channels of the stem layer. Default: 64.
271 | base_channels (int): Middle channels of the first stage. Default: 64.
272 | num_stages (int): Stages of the network. Default: 4.
273 | strides (Sequence[int]): Strides of the first block of each stage.
274 | Default: ``(1, 2, 2, 2)``.
275 | dilations (Sequence[int]): Dilation of each stage.
276 | Default: ``(1, 1, 1, 1)``.
277 | out_indices (Sequence[int]): Output from which stages. If only one
278 | stage is specified, a single tensor (feature map) is returned,
279 | otherwise multiple stages are specified, a tuple of tensors will
280 | be returned. Default: ``(3, )``.
281 | style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
282 | layer is the 3x3 conv layer, otherwise the stride-two layer is
283 | the first 1x1 conv layer.
284 | deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv.
285 | Default: False.
286 | avg_down (bool): Use AvgPool instead of stride conv when
287 | downsampling in the bottleneck. Default: False.
288 | frozen_stages (int): Stages to be frozen (stop grad and set eval mode).
289 | -1 means not freezing any parameters. Default: -1.
290 | conv_cfg (dict | None): The config dict for conv layers. Default: None.
291 | norm_cfg (dict): The config dict for norm layers.
292 | norm_eval (bool): Whether to set norm layers to eval mode, namely,
293 | freeze running stats (mean and var). Note: Effect on Batch Norm
294 | and its variants only. Default: False.
295 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some
296 | memory while slowing down the training speed. Default: False.
297 | zero_init_residual (bool): Whether to use zero init for last norm layer
298 | in resblocks to let them behave as identity. Default: True.
299 |
300 | Example:
301 | >>> from mmcls.models import ResNet
302 | >>> import torch
303 | >>> self = ResNet(depth=18)
304 | >>> self.eval()
305 | >>> inputs = torch.rand(1, 3, 32, 32)
306 | >>> level_outputs = self.forward(inputs)
307 | >>> for level_out in level_outputs:
308 | ... print(tuple(level_out.shape))
309 | (1, 64, 8, 8)
310 | (1, 128, 4, 4)
311 | (1, 256, 2, 2)
312 | (1, 512, 1, 1)
313 | """
314 |
315 | arch_settings = {
316 | 26: (Bottleneck, (1, 2, 4, 1)),
317 | 38: (Bottleneck, (2, 3, 5, 2)),
318 | 50: (Bottleneck, (3, 4, 6, 3)),
319 | 101: (Bottleneck, (3, 4, 23, 3)),
320 | 152: (Bottleneck, (3, 8, 36, 3))
321 | }
322 |
323 | def __init__(self,
324 | depth,
325 | in_channels=3,
326 | stem_channels=64,
327 | base_channels=64,
328 | expansion=None,
329 | num_stages=4,
330 | strides=(1, 2, 2, 2),
331 | dilations=(1, 1, 1, 1),
332 | out_indices=(3, ),
333 | style='pytorch',
334 | avg_down=False,
335 | frozen_stages=-1,
336 | conv_cfg=None,
337 | norm_cfg=dict(type='BN', requires_grad=True),
338 | norm_eval=False,
339 | with_cp=False,
340 | zero_init_residual=True):
341 | super(RedNet, self).__init__()
342 | if depth not in self.arch_settings:
343 | raise KeyError(f'invalid depth {depth} for resnet')
344 | self.depth = depth
345 | self.stem_channels = stem_channels
346 | self.base_channels = base_channels
347 | self.num_stages = num_stages
348 | assert num_stages >= 1 and num_stages <= 4
349 | self.strides = strides
350 | self.dilations = dilations
351 | assert len(strides) == len(dilations) == num_stages
352 | self.out_indices = out_indices
353 | assert max(out_indices) < num_stages
354 | self.style = style
355 | self.avg_down = avg_down
356 | self.frozen_stages = frozen_stages
357 | self.conv_cfg = conv_cfg
358 | self.norm_cfg = norm_cfg
359 | self.with_cp = with_cp
360 | self.norm_eval = norm_eval
361 | self.zero_init_residual = zero_init_residual
362 | self.block, stage_blocks = self.arch_settings[depth]
363 | self.stage_blocks = stage_blocks[:num_stages]
364 | self.expansion = get_expansion(self.block, expansion)
365 |
366 | self._make_stem_layer(in_channels, stem_channels)
367 |
368 | self.res_layers = []
369 | _in_channels = stem_channels
370 | _out_channels = base_channels * self.expansion
371 | for i, num_blocks in enumerate(self.stage_blocks):
372 | stride = strides[i]
373 | dilation = dilations[i]
374 | res_layer = self.make_res_layer(
375 | block=self.block,
376 | num_blocks=num_blocks,
377 | in_channels=_in_channels,
378 | out_channels=_out_channels,
379 | expansion=self.expansion,
380 | stride=stride,
381 | dilation=dilation,
382 | style=self.style,
383 | avg_down=self.avg_down,
384 | with_cp=with_cp,
385 | conv_cfg=conv_cfg,
386 | norm_cfg=norm_cfg)
387 | _in_channels = _out_channels
388 | _out_channels *= 2
389 | layer_name = f'layer{i + 1}'
390 | self.add_module(layer_name, res_layer)
391 | self.res_layers.append(layer_name)
392 |
393 | self._freeze_stages()
394 |
395 | self.feat_dim = res_layer[-1].out_channels
396 |
397 | def make_res_layer(self, **kwargs):
398 | return ResLayer(**kwargs)
399 |
400 | @property
401 | def norm1(self):
402 | return getattr(self, self.norm1_name)
403 |
404 | def _make_stem_layer(self, in_channels, stem_channels):
405 | self.stem = nn.Sequential(
406 | ConvModule(
407 | in_channels,
408 | stem_channels // 2,
409 | kernel_size=3,
410 | stride=2,
411 | padding=1,
412 | conv_cfg=self.conv_cfg,
413 | norm_cfg=self.norm_cfg,
414 | inplace=True),
415 | involution(stem_channels // 2, 7, 1),
416 | nn.BatchNorm2d(stem_channels // 2),
417 | nn.ReLU(inplace=True),
418 | ConvModule(
419 | stem_channels // 2,
420 | stem_channels,
421 | kernel_size=3,
422 | stride=1,
423 | padding=1,
424 | conv_cfg=self.conv_cfg,
425 | norm_cfg=self.norm_cfg,
426 | inplace=True))
427 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
428 |
429 | def _freeze_stages(self):
430 | if self.frozen_stages >= 0:
431 | self.stem.eval()
432 | for param in self.stem.parameters():
433 | param.requires_grad = False
434 |
435 | for i in range(1, self.frozen_stages + 1):
436 | m = getattr(self, f'layer{i}')
437 | m.eval()
438 | for param in m.parameters():
439 | param.requires_grad = False
440 |
441 | def init_weights(self, pretrained=None):
442 | super(RedNet, self).init_weights(pretrained)
443 | if pretrained is None:
444 | for m in self.modules():
445 | if isinstance(m, nn.Conv2d):
446 | kaiming_init(m)
447 | elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
448 | constant_init(m, 1)
449 |
450 | if self.zero_init_residual:
451 | for m in self.modules():
452 | if isinstance(m, Bottleneck):
453 | constant_init(m.norm3, 0)
454 |
455 | def forward(self, x):
456 | x = self.stem(x)
457 | x = self.maxpool(x)
458 | outs = []
459 | for i, layer_name in enumerate(self.res_layers):
460 | res_layer = getattr(self, layer_name)
461 | x = res_layer(x)
462 | if i in self.out_indices:
463 | outs.append(x)
464 | if len(outs) == 1:
465 | return outs[0]
466 | else:
467 | return tuple(outs)
468 |
469 | def train(self, mode=True):
470 | super(RedNet, self).train(mode)
471 | self._freeze_stages()
472 | if mode and self.norm_eval:
473 | for m in self.modules():
474 | # trick: eval have effect on BatchNorm only
475 | if isinstance(m, _BatchNorm):
476 | m.eval()
477 |
478 |
--------------------------------------------------------------------------------
/det/mmdet/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_free_head import AnchorFreeHead
2 | from .anchor_head import AnchorHead
3 | from .atss_head import ATSSHead
4 | from .cascade_rpn_head import CascadeRPNHead, StageCascadeRPNHead
5 | from .centripetal_head import CentripetalHead
6 | from .corner_head import CornerHead
7 | from .embedding_rpn_head import EmbeddingRPNHead
8 | from .fcos_head import FCOSHead
9 | from .fovea_head import FoveaHead
10 | from .free_anchor_retina_head import FreeAnchorRetinaHead
11 | from .fsaf_head import FSAFHead
12 | from .ga_retina_head import GARetinaHead
13 | from .ga_rpn_head import GARPNHead
14 | from .gfl_head import GFLHead
15 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
16 | from .nasfcos_head import NASFCOSHead
17 | from .paa_head import PAAHead
18 | from .pisa_retinanet_head import PISARetinaHead
19 | from .pisa_ssd_head import PISASSDHead
20 | from .reppoints_head import RepPointsHead
21 | from .retina_head import RetinaHead
22 | from .retina_sepbn_head import RetinaSepBNHead
23 | from .rpn_head import RPNHead
24 | from .rpn_head_involution import RPNHead_involution
25 | from .sabl_retina_head import SABLRetinaHead
26 | from .ssd_head import SSDHead
27 | from .transformer_head import TransformerHead
28 | from .vfnet_head import VFNetHead
29 | from .yolact_head import YOLACTHead, YOLACTProtonet, YOLACTSegmHead
30 | from .yolo_head import YOLOV3Head
31 |
32 | __all__ = [
33 | 'AnchorFreeHead', 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption',
34 | 'RPNHead', 'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead',
35 | 'SSDHead', 'FCOSHead', 'RepPointsHead', 'FoveaHead',
36 | 'FreeAnchorRetinaHead', 'ATSSHead', 'FSAFHead', 'NASFCOSHead',
37 | 'PISARetinaHead', 'PISASSDHead', 'GFLHead', 'CornerHead', 'YOLACTHead',
38 | 'YOLACTSegmHead', 'YOLACTProtonet', 'YOLOV3Head', 'PAAHead',
39 | 'SABLRetinaHead', 'CentripetalHead', 'VFNetHead', 'TransformerHead',
40 | 'StageCascadeRPNHead', 'CascadeRPNHead', 'EmbeddingRPNHead',
41 | 'RPNHead_involution'
42 | ]
43 |
--------------------------------------------------------------------------------
/det/mmdet/models/dense_heads/rpn_head_involution.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import warnings
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from mmcv import ConfigDict
8 | from mmcv.cnn import normal_init
9 | from mmcv.ops import batched_nms
10 |
11 | from ..builder import HEADS
12 | from .anchor_head import AnchorHead
13 | from .rpn_test_mixin import RPNTestMixin
14 | from ..utils.involution_cuda import involution
15 |
16 |
17 | @HEADS.register_module()
18 | class RPNHead_involution(RPNTestMixin, AnchorHead):
19 | """RPN head.
20 |
21 | Args:
22 | in_channels (int): Number of channels in the input feature map.
23 | """ # noqa: W605
24 |
25 | def __init__(self, in_channels, **kwargs):
26 | super(RPNHead_involution, self).__init__(1, in_channels, **kwargs)
27 |
28 | def _init_layers(self):
29 | """Initialize layers of the head."""
30 | self.rpn_conv = involution(self.in_channels, 7, 1)
31 | self.rpn_cls = nn.Conv2d(self.feat_channels,
32 | self.num_anchors * self.cls_out_channels, 1)
33 | self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1)
34 |
35 | def init_weights(self):
36 | """Initialize weights of the head."""
37 | normal_init(self.rpn_conv, std=0.01)
38 | normal_init(self.rpn_cls, std=0.01)
39 | normal_init(self.rpn_reg, std=0.01)
40 |
41 | def forward_single(self, x):
42 | """Forward feature map of a single scale level."""
43 | x = self.rpn_conv(x)
44 | x = F.relu(x, inplace=True)
45 | rpn_cls_score = self.rpn_cls(x)
46 | rpn_bbox_pred = self.rpn_reg(x)
47 | return rpn_cls_score, rpn_bbox_pred
48 |
49 | def loss(self,
50 | cls_scores,
51 | bbox_preds,
52 | gt_bboxes,
53 | img_metas,
54 | gt_bboxes_ignore=None):
55 | """Compute losses of the head.
56 |
57 | Args:
58 | cls_scores (list[Tensor]): Box scores for each scale level
59 | Has shape (N, num_anchors * num_classes, H, W)
60 | bbox_preds (list[Tensor]): Box energies / deltas for each scale
61 | level with shape (N, num_anchors * 4, H, W)
62 | gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
63 | shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
64 | img_metas (list[dict]): Meta information of each image, e.g.,
65 | image size, scaling factor, etc.
66 | gt_bboxes_ignore (None | list[Tensor]): specify which bounding
67 | boxes can be ignored when computing the loss.
68 |
69 | Returns:
70 | dict[str, Tensor]: A dictionary of loss components.
71 | """
72 | losses = super(RPNHead_involution, self).loss(
73 | cls_scores,
74 | bbox_preds,
75 | gt_bboxes,
76 | None,
77 | img_metas,
78 | gt_bboxes_ignore=gt_bboxes_ignore)
79 | return dict(
80 | loss_rpn_cls=losses['loss_cls'], loss_rpn_bbox=losses['loss_bbox'])
81 |
82 | def _get_bboxes(self,
83 | cls_scores,
84 | bbox_preds,
85 | mlvl_anchors,
86 | img_shapes,
87 | scale_factors,
88 | cfg,
89 | rescale=False):
90 | """Transform outputs for a single batch item into bbox predictions.
91 |
92 | Args:
93 | cls_scores (list[Tensor]): Box scores for each scale level
94 | Has shape (N, num_anchors * num_classes, H, W).
95 | bbox_preds (list[Tensor]): Box energies / deltas for each scale
96 | level with shape (N, num_anchors * 4, H, W).
97 | mlvl_anchors (list[Tensor]): Box reference for each scale level
98 | with shape (num_total_anchors, 4).
99 | img_shapes (list[tuple[int]]): Shape of the input image,
100 | (height, width, 3).
101 | scale_factors (list[ndarray]): Scale factor of the image arange as
102 | (w_scale, h_scale, w_scale, h_scale).
103 | cfg (mmcv.Config): Test / postprocessing configuration,
104 | if None, test_cfg would be used.
105 | rescale (bool): If True, return boxes in original image space.
106 |
107 | Returns:
108 | list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
109 | The first item is an (n, 5) tensor, where the first 4 columns
110 | are bounding box positions (tl_x, tl_y, br_x, br_y) and the
111 | 5-th column is a score between 0 and 1. The second item is a
112 | (n,) tensor where each item is the predicted class labelof the
113 | corresponding box.
114 | """
115 | cfg = self.test_cfg if cfg is None else cfg
116 | cfg = copy.deepcopy(cfg)
117 | # bboxes from different level should be independent during NMS,
118 | # level_ids are used as labels for batched NMS to separate them
119 | level_ids = []
120 | mlvl_scores = []
121 | mlvl_bbox_preds = []
122 | mlvl_valid_anchors = []
123 | batch_size = cls_scores[0].shape[0]
124 | nms_pre_tensor = torch.tensor(
125 | cfg.nms_pre, device=cls_scores[0].device, dtype=torch.long)
126 | for idx in range(len(cls_scores)):
127 | rpn_cls_score = cls_scores[idx]
128 | rpn_bbox_pred = bbox_preds[idx]
129 | assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
130 | rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1)
131 | if self.use_sigmoid_cls:
132 | rpn_cls_score = rpn_cls_score.reshape(batch_size, -1)
133 | scores = rpn_cls_score.sigmoid()
134 | else:
135 | rpn_cls_score = rpn_cls_score.reshape(batch_size, -1, 2)
136 | # We set FG labels to [0, num_class-1] and BG label to
137 | # num_class in RPN head since mmdet v2.5, which is unified to
138 | # be consistent with other head since mmdet v2.0. In mmdet v2.0
139 | # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head.
140 | scores = rpn_cls_score.softmax(-1)[..., 0]
141 | rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).reshape(
142 | batch_size, -1, 4)
143 | anchors = mlvl_anchors[idx]
144 | anchors = anchors.expand_as(rpn_bbox_pred)
145 | if nms_pre_tensor > 0:
146 | # sort is faster than topk
147 | # _, topk_inds = scores.topk(cfg.nms_pre)
148 | # keep topk op for dynamic k in onnx model
149 | if torch.onnx.is_in_onnx_export():
150 | # sort op will be converted to TopK in onnx
151 | # and k<=3480 in TensorRT
152 | scores_shape = torch._shape_as_tensor(scores)
153 | nms_pre = torch.where(scores_shape[1] < nms_pre_tensor,
154 | scores_shape[1], nms_pre_tensor)
155 | _, topk_inds = scores.topk(nms_pre)
156 | batch_inds = torch.arange(batch_size).view(
157 | -1, 1).expand_as(topk_inds)
158 | scores = scores[batch_inds, topk_inds]
159 | rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :]
160 | anchors = anchors[batch_inds, topk_inds, :]
161 |
162 | elif scores.shape[-1] > cfg.nms_pre:
163 | ranked_scores, rank_inds = scores.sort(descending=True)
164 | topk_inds = rank_inds[:, :cfg.nms_pre]
165 | scores = ranked_scores[:, :cfg.nms_pre]
166 | batch_inds = torch.arange(batch_size).view(
167 | -1, 1).expand_as(topk_inds)
168 | rpn_bbox_pred = rpn_bbox_pred[batch_inds, topk_inds, :]
169 | anchors = anchors[batch_inds, topk_inds, :]
170 |
171 | mlvl_scores.append(scores)
172 | mlvl_bbox_preds.append(rpn_bbox_pred)
173 | mlvl_valid_anchors.append(anchors)
174 | level_ids.append(
175 | scores.new_full((
176 | batch_size,
177 | scores.size(1),
178 | ),
179 | idx,
180 | dtype=torch.long))
181 |
182 | batch_mlvl_scores = torch.cat(mlvl_scores, dim=1)
183 | batch_mlvl_anchors = torch.cat(mlvl_valid_anchors, dim=1)
184 | batch_mlvl_rpn_bbox_pred = torch.cat(mlvl_bbox_preds, dim=1)
185 | batch_mlvl_proposals = self.bbox_coder.decode(
186 | batch_mlvl_anchors, batch_mlvl_rpn_bbox_pred, max_shape=img_shapes)
187 | batch_mlvl_ids = torch.cat(level_ids, dim=1)
188 |
189 | # deprecate arguments warning
190 | if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg:
191 | warnings.warn(
192 | 'In rpn_proposal or test_cfg, '
193 | 'nms_thr has been moved to a dict named nms as '
194 | 'iou_threshold, max_num has been renamed as max_per_img, '
195 | 'name of original arguments and the way to specify '
196 | 'iou_threshold of NMS will be deprecated.')
197 | if 'nms' not in cfg:
198 | cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr))
199 | if 'max_num' in cfg:
200 | if 'max_per_img' in cfg:
201 | assert cfg.max_num == cfg.max_per_img, f'You ' \
202 | f'set max_num and ' \
203 | f'max_per_img at the same time, but get {cfg.max_num} ' \
204 | f'and {cfg.max_per_img} respectively' \
205 | 'Please delete max_num which will be deprecated.'
206 | else:
207 | cfg.max_per_img = cfg.max_num
208 | if 'nms_thr' in cfg:
209 | assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set' \
210 | f' iou_threshold in nms and ' \
211 | f'nms_thr at the same time, but get' \
212 | f' {cfg.nms.iou_threshold} and {cfg.nms_thr}' \
213 | f' respectively. Please delete the nms_thr ' \
214 | f'which will be deprecated.'
215 |
216 | result_list = []
217 | for (mlvl_proposals, mlvl_scores,
218 | mlvl_ids) in zip(batch_mlvl_proposals, batch_mlvl_scores,
219 | batch_mlvl_ids):
220 | # Skip nonzero op while exporting to ONNX
221 | if cfg.min_bbox_size > 0 and (not torch.onnx.is_in_onnx_export()):
222 | w = mlvl_proposals[:, 2] - mlvl_proposals[:, 0]
223 | h = mlvl_proposals[:, 3] - mlvl_proposals[:, 1]
224 | valid_ind = torch.nonzero(
225 | (w >= cfg.min_bbox_size)
226 | & (h >= cfg.min_bbox_size),
227 | as_tuple=False).squeeze()
228 | if valid_ind.sum().item() != len(mlvl_proposals):
229 | mlvl_proposals = mlvl_proposals[valid_ind, :]
230 | mlvl_scores = mlvl_scores[valid_ind]
231 | mlvl_ids = mlvl_ids[valid_ind]
232 |
233 | dets, keep = batched_nms(mlvl_proposals, mlvl_scores, mlvl_ids,
234 | cfg.nms)
235 | result_list.append(dets[:cfg.max_per_img])
236 | return result_list
237 |
--------------------------------------------------------------------------------
/det/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .bfp import BFP
2 | from .channel_mapper import ChannelMapper
3 | from .fpn import FPN
4 | from .fpn_involution import FPN_involution
5 | from .fpn_carafe import FPN_CARAFE
6 | from .hrfpn import HRFPN
7 | from .nas_fpn import NASFPN
8 | from .nasfcos_fpn import NASFCOS_FPN
9 | from .pafpn import PAFPN
10 | from .rfp import RFP
11 | from .yolo_neck import YOLOV3Neck
12 |
13 | __all__ = [
14 | 'FPN', 'BFP', 'ChannelMapper', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN',
15 | 'NASFCOS_FPN', 'RFP', 'YOLOV3Neck',
16 | 'FPN_involution'
17 | ]
18 |
--------------------------------------------------------------------------------
/det/mmdet/models/necks/fpn_involution.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | from mmcv.cnn import ConvModule, xavier_init
4 | from mmcv.runner import auto_fp16
5 |
6 | from ..builder import NECKS
7 | from ..utils.involution_cuda import involution
8 |
9 |
10 | @NECKS.register_module()
11 | class FPN_involution(nn.Module):
12 | r"""Feature Pyramid Network.
13 |
14 | This is an implementation of paper `Feature Pyramid Networks for Object
15 | Detection `_.
16 |
17 | Args:
18 | in_channels (List[int]): Number of input channels per scale.
19 | out_channels (int): Number of output channels (used at each scale)
20 | num_outs (int): Number of output scales.
21 | start_level (int): Index of the start input backbone level used to
22 | build the feature pyramid. Default: 0.
23 | end_level (int): Index of the end input backbone level (exclusive) to
24 | build the feature pyramid. Default: -1, which means the last level.
25 | add_extra_convs (bool | str): If bool, it decides whether to add conv
26 | layers on top of the original feature maps. Default to False.
27 | If True, its actual mode is specified by `extra_convs_on_inputs`.
28 | If str, it specifies the source feature map of the extra convs.
29 | Only the following options are allowed
30 |
31 | - 'on_input': Last feat map of neck inputs (i.e. backbone feature).
32 | - 'on_lateral': Last feature map after lateral convs.
33 | - 'on_output': The last output feature map after fpn convs.
34 | extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs
35 | on the original feature from the backbone. If True,
36 | it is equivalent to `add_extra_convs='on_input'`. If False, it is
37 | equivalent to set `add_extra_convs='on_output'`. Default to True.
38 | relu_before_extra_convs (bool): Whether to apply relu before the extra
39 | conv. Default: False.
40 | no_norm_on_lateral (bool): Whether to apply norm on lateral.
41 | Default: False.
42 | conv_cfg (dict): Config dict for convolution layer. Default: None.
43 | norm_cfg (dict): Config dict for normalization layer. Default: None.
44 | act_cfg (str): Config dict for activation layer in ConvModule.
45 | Default: None.
46 | upsample_cfg (dict): Config dict for interpolate layer.
47 | Default: `dict(mode='nearest')`
48 |
49 | Example:
50 | >>> import torch
51 | >>> in_channels = [2, 3, 5, 7]
52 | >>> scales = [340, 170, 84, 43]
53 | >>> inputs = [torch.rand(1, c, s, s)
54 | ... for c, s in zip(in_channels, scales)]
55 | >>> self = FPN(in_channels, 11, len(in_channels)).eval()
56 | >>> outputs = self.forward(inputs)
57 | >>> for i in range(len(outputs)):
58 | ... print(f'outputs[{i}].shape = {outputs[i].shape}')
59 | outputs[0].shape = torch.Size([1, 11, 340, 340])
60 | outputs[1].shape = torch.Size([1, 11, 170, 170])
61 | outputs[2].shape = torch.Size([1, 11, 84, 84])
62 | outputs[3].shape = torch.Size([1, 11, 43, 43])
63 | """
64 |
65 | def __init__(self,
66 | in_channels,
67 | out_channels,
68 | num_outs,
69 | start_level=0,
70 | end_level=-1,
71 | add_extra_convs=False,
72 | extra_convs_on_inputs=True,
73 | relu_before_extra_convs=False,
74 | no_norm_on_lateral=False,
75 | conv_cfg=None,
76 | norm_cfg=None,
77 | act_cfg=None,
78 | upsample_cfg=dict(mode='nearest')):
79 | super(FPN_involution, self).__init__()
80 | assert isinstance(in_channels, list)
81 | self.in_channels = in_channels
82 | self.out_channels = out_channels
83 | self.num_ins = len(in_channels)
84 | self.num_outs = num_outs
85 | self.relu_before_extra_convs = relu_before_extra_convs
86 | self.no_norm_on_lateral = no_norm_on_lateral
87 | self.fp16_enabled = False
88 | self.upsample_cfg = upsample_cfg.copy()
89 |
90 | if end_level == -1:
91 | self.backbone_end_level = self.num_ins
92 | assert num_outs >= self.num_ins - start_level
93 | else:
94 | # if end_level < inputs, no extra level is allowed
95 | self.backbone_end_level = end_level
96 | assert end_level <= len(in_channels)
97 | assert num_outs == end_level - start_level
98 | self.start_level = start_level
99 | self.end_level = end_level
100 | self.add_extra_convs = add_extra_convs
101 | assert isinstance(add_extra_convs, (str, bool))
102 | if isinstance(add_extra_convs, str):
103 | # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
104 | assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
105 | elif add_extra_convs: # True
106 | if extra_convs_on_inputs:
107 | # For compatibility with previous release
108 | # TODO: deprecate `extra_convs_on_inputs`
109 | self.add_extra_convs = 'on_input'
110 | else:
111 | self.add_extra_convs = 'on_output'
112 |
113 | self.lateral_convs = nn.ModuleList()
114 | self.fpn_convs = nn.ModuleList()
115 |
116 | for i in range(self.start_level, self.backbone_end_level):
117 | l_conv = ConvModule(
118 | in_channels[i],
119 | out_channels,
120 | 1,
121 | conv_cfg=conv_cfg,
122 | norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
123 | act_cfg=act_cfg,
124 | inplace=False)
125 | fpn_conv = involution(out_channels, 7, 1)
126 |
127 | self.lateral_convs.append(l_conv)
128 | self.fpn_convs.append(fpn_conv)
129 |
130 | # add extra conv layers (e.g., RetinaNet)
131 | extra_levels = num_outs - self.backbone_end_level + self.start_level
132 | if self.add_extra_convs and extra_levels >= 1:
133 | for i in range(extra_levels):
134 | if i == 0 and self.add_extra_convs == 'on_input':
135 | in_channels = self.in_channels[self.backbone_end_level - 1]
136 | else:
137 | in_channels = out_channels
138 | extra_fpn_conv = ConvModule(
139 | in_channels,
140 | out_channels,
141 | 3,
142 | stride=2,
143 | padding=1,
144 | conv_cfg=conv_cfg,
145 | norm_cfg=norm_cfg,
146 | act_cfg=act_cfg,
147 | inplace=False)
148 | self.fpn_convs.append(extra_fpn_conv)
149 |
150 | # default init_weights for conv(msra) and norm in ConvModule
151 | def init_weights(self):
152 | """Initialize the weights of FPN module."""
153 | for m in self.modules():
154 | if isinstance(m, nn.Conv2d):
155 | xavier_init(m, distribution='uniform')
156 |
157 | @auto_fp16()
158 | def forward(self, inputs):
159 | """Forward function."""
160 | assert len(inputs) == len(self.in_channels)
161 |
162 | # build laterals
163 | laterals = [
164 | lateral_conv(inputs[i + self.start_level])
165 | for i, lateral_conv in enumerate(self.lateral_convs)
166 | ]
167 |
168 | # build top-down path
169 | used_backbone_levels = len(laterals)
170 | for i in range(used_backbone_levels - 1, 0, -1):
171 | # In some cases, fixing `scale factor` (e.g. 2) is preferred, but
172 | # it cannot co-exist with `size` in `F.interpolate`.
173 | if 'scale_factor' in self.upsample_cfg:
174 | laterals[i - 1] += F.interpolate(laterals[i],
175 | **self.upsample_cfg)
176 | else:
177 | prev_shape = laterals[i - 1].shape[2:]
178 | laterals[i - 1] += F.interpolate(
179 | laterals[i], size=prev_shape, **self.upsample_cfg)
180 |
181 | # build outputs
182 | # part 1: from original levels
183 | outs = [
184 | self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
185 | ]
186 | # part 2: add extra levels
187 | if self.num_outs > len(outs):
188 | # use max pool to get more levels on top of outputs
189 | # (e.g., Faster R-CNN, Mask R-CNN)
190 | if not self.add_extra_convs:
191 | for i in range(self.num_outs - used_backbone_levels):
192 | outs.append(F.max_pool2d(outs[-1], 1, stride=2))
193 | # add conv layers on top of original feature maps (RetinaNet)
194 | else:
195 | if self.add_extra_convs == 'on_input':
196 | extra_source = inputs[self.backbone_end_level - 1]
197 | elif self.add_extra_convs == 'on_lateral':
198 | extra_source = laterals[-1]
199 | elif self.add_extra_convs == 'on_output':
200 | extra_source = outs[-1]
201 | else:
202 | raise NotImplementedError
203 | outs.append(self.fpn_convs[used_backbone_levels](extra_source))
204 | for i in range(used_backbone_levels + 1, self.num_outs):
205 | if self.relu_before_extra_convs:
206 | outs.append(self.fpn_convs[i](F.relu(outs[-1])))
207 | else:
208 | outs.append(self.fpn_convs[i](outs[-1]))
209 | return tuple(outs)
210 |
--------------------------------------------------------------------------------
/det/mmdet/models/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_roi_head import BaseRoIHead
2 | from .bbox_heads import (BBoxHead, ConvFCBBoxHead, DoubleConvFCBBoxHead,
3 | SCNetBBoxHead, Shared2FCBBoxHead,
4 | Shared4Conv1FCBBoxHead)
5 | from .cascade_roi_head import CascadeRoIHead
6 | from .double_roi_head import DoubleHeadRoIHead
7 | from .dynamic_roi_head import DynamicRoIHead
8 | from .grid_roi_head import GridRoIHead
9 | from .htc_roi_head import HybridTaskCascadeRoIHead
10 | from .mask_heads import (CoarseMaskHead, FCNMaskHead, FeatureRelayHead,
11 | FusedSemanticHead, GlobalContextHead, GridHead,
12 | HTCMaskHead, MaskIoUHead, MaskPointHead,
13 | SCNetMaskHead, SCNetSemanticHead, FCNMaskHead_involution)
14 | from .mask_scoring_roi_head import MaskScoringRoIHead
15 | from .pisa_roi_head import PISARoIHead
16 | from .point_rend_roi_head import PointRendRoIHead
17 | from .roi_extractors import SingleRoIExtractor
18 | from .scnet_roi_head import SCNetRoIHead
19 | from .shared_heads import ResLayer
20 | from .sparse_roi_head import SparseRoIHead
21 | from .standard_roi_head import StandardRoIHead
22 | from .trident_roi_head import TridentRoIHead
23 |
24 | __all__ = [
25 | 'BaseRoIHead', 'CascadeRoIHead', 'DoubleHeadRoIHead', 'MaskScoringRoIHead',
26 | 'HybridTaskCascadeRoIHead', 'GridRoIHead', 'ResLayer', 'BBoxHead',
27 | 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 'StandardRoIHead',
28 | 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'FCNMaskHead',
29 | 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 'MaskIoUHead',
30 | 'SingleRoIExtractor', 'PISARoIHead', 'PointRendRoIHead', 'MaskPointHead',
31 | 'CoarseMaskHead', 'DynamicRoIHead', 'SparseRoIHead', 'TridentRoIHead',
32 | 'SCNetRoIHead', 'SCNetMaskHead', 'SCNetSemanticHead', 'SCNetBBoxHead',
33 | 'FeatureRelayHead', 'GlobalContextHead',
34 | 'FCNMaskHead_involution'
35 | ]
36 |
--------------------------------------------------------------------------------
/det/mmdet/models/roi_heads/mask_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .coarse_mask_head import CoarseMaskHead
2 | from .fcn_mask_head import FCNMaskHead
3 | from .fcn_mask_head_involution import FCNMaskHead_involution
4 | from .feature_relay_head import FeatureRelayHead
5 | from .fused_semantic_head import FusedSemanticHead
6 | from .global_context_head import GlobalContextHead
7 | from .grid_head import GridHead
8 | from .htc_mask_head import HTCMaskHead
9 | from .mask_point_head import MaskPointHead
10 | from .maskiou_head import MaskIoUHead
11 | from .scnet_mask_head import SCNetMaskHead
12 | from .scnet_semantic_head import SCNetSemanticHead
13 |
14 | __all__ = [
15 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',
16 | 'MaskIoUHead', 'CoarseMaskHead', 'MaskPointHead', 'SCNetMaskHead',
17 | 'SCNetSemanticHead', 'GlobalContextHead', 'FeatureRelayHead',
18 | 'FCNMaskHead_involution'
19 | ]
20 |
--------------------------------------------------------------------------------
/det/mmdet/models/roi_heads/mask_heads/fcn_mask_head_involution.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from mmcv.cnn import Conv2d, ConvModule, build_upsample_layer
6 | from mmcv.ops.carafe import CARAFEPack
7 | from mmcv.runner import auto_fp16, force_fp32
8 | from torch.nn.modules.utils import _pair
9 |
10 | from mmdet.core import mask_target
11 | from mmdet.models.builder import HEADS, build_loss
12 | from mmdet.models.utils.involution_cuda import involution
13 |
14 | BYTES_PER_FLOAT = 4
15 | # TODO: This memory limit may be too much or too little. It would be better to
16 | # determine it based on available resources.
17 | GPU_MEM_LIMIT = 1024**3 # 1 GB memory limit
18 |
19 |
20 | @HEADS.register_module()
21 | class FCNMaskHead_involution(nn.Module):
22 |
23 | def __init__(self,
24 | num_convs=4,
25 | roi_feat_size=14,
26 | in_channels=256,
27 | conv_kernel_size=3,
28 | conv_out_channels=256,
29 | num_classes=80,
30 | class_agnostic=False,
31 | upsample_cfg=dict(type='deconv', scale_factor=2),
32 | conv_cfg=None,
33 | norm_cfg=None,
34 | loss_mask=dict(
35 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)):
36 | super(FCNMaskHead_involution, self).__init__()
37 | self.upsample_cfg = upsample_cfg.copy()
38 | if self.upsample_cfg['type'] not in [
39 | None, 'deconv', 'nearest', 'bilinear', 'carafe'
40 | ]:
41 | raise ValueError(
42 | f'Invalid upsample method {self.upsample_cfg["type"]}, '
43 | 'accepted methods are "deconv", "nearest", "bilinear", '
44 | '"carafe"')
45 | self.num_convs = num_convs
46 | # WARN: roi_feat_size is reserved and not used
47 | self.roi_feat_size = _pair(roi_feat_size)
48 | self.in_channels = in_channels
49 | self.conv_kernel_size = conv_kernel_size
50 | self.conv_out_channels = conv_out_channels
51 | self.upsample_method = self.upsample_cfg.get('type')
52 | self.scale_factor = self.upsample_cfg.pop('scale_factor', None)
53 | self.num_classes = num_classes
54 | self.class_agnostic = class_agnostic
55 | self.conv_cfg = conv_cfg
56 | self.norm_cfg = norm_cfg
57 | self.fp16_enabled = False
58 | self.loss_mask = build_loss(loss_mask)
59 |
60 | self.convs = nn.ModuleList()
61 | for i in range(self.num_convs):
62 | in_channels = (
63 | self.in_channels if i == 0 else self.conv_out_channels)
64 | padding = (self.conv_kernel_size - 1) // 2
65 | self.convs.append(nn.Sequential(
66 | involution(in_channels, 7, 1),
67 | nn.ReLU(inplace=True)))
68 | # ConvModule(
69 | # in_channels,
70 | # self.conv_out_channels,
71 | # self.conv_kernel_size,
72 | # padding=padding,
73 | # conv_cfg=conv_cfg,
74 | # norm_cfg=norm_cfg))
75 | upsample_in_channels = (
76 | self.conv_out_channels if self.num_convs > 0 else in_channels)
77 | upsample_cfg_ = self.upsample_cfg.copy()
78 | if self.upsample_method is None:
79 | self.upsample = None
80 | elif self.upsample_method == 'deconv':
81 | upsample_cfg_.update(
82 | in_channels=upsample_in_channels,
83 | out_channels=self.conv_out_channels,
84 | kernel_size=self.scale_factor,
85 | stride=self.scale_factor)
86 | self.upsample = build_upsample_layer(upsample_cfg_)
87 | elif self.upsample_method == 'carafe':
88 | upsample_cfg_.update(
89 | channels=upsample_in_channels, scale_factor=self.scale_factor)
90 | self.upsample = build_upsample_layer(upsample_cfg_)
91 | else:
92 | # suppress warnings
93 | align_corners = (None
94 | if self.upsample_method == 'nearest' else False)
95 | upsample_cfg_.update(
96 | scale_factor=self.scale_factor,
97 | mode=self.upsample_method,
98 | align_corners=align_corners)
99 | self.upsample = build_upsample_layer(upsample_cfg_)
100 |
101 | out_channels = 1 if self.class_agnostic else self.num_classes
102 | logits_in_channel = (
103 | self.conv_out_channels
104 | if self.upsample_method == 'deconv' else upsample_in_channels)
105 | self.conv_logits = Conv2d(logits_in_channel, out_channels, 1)
106 | self.relu = nn.ReLU(inplace=True)
107 | self.debug_imgs = None
108 |
109 | def init_weights(self):
110 | for m in [self.upsample, self.conv_logits]:
111 | if m is None:
112 | continue
113 | elif isinstance(m, CARAFEPack):
114 | m.init_weights()
115 | else:
116 | nn.init.kaiming_normal_(
117 | m.weight, mode='fan_out', nonlinearity='relu')
118 | nn.init.constant_(m.bias, 0)
119 |
120 | @auto_fp16()
121 | def forward(self, x):
122 | for conv in self.convs:
123 | x = conv(x)
124 | if self.upsample is not None:
125 | x = self.upsample(x)
126 | if self.upsample_method == 'deconv':
127 | x = self.relu(x)
128 | mask_pred = self.conv_logits(x)
129 | return mask_pred
130 |
131 | def get_targets(self, sampling_results, gt_masks, rcnn_train_cfg):
132 | pos_proposals = [res.pos_bboxes for res in sampling_results]
133 | pos_assigned_gt_inds = [
134 | res.pos_assigned_gt_inds for res in sampling_results
135 | ]
136 | mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,
137 | gt_masks, rcnn_train_cfg)
138 | return mask_targets
139 |
140 | @force_fp32(apply_to=('mask_pred', ))
141 | def loss(self, mask_pred, mask_targets, labels):
142 | """
143 | Example:
144 | >>> from mmdet.models.roi_heads.mask_heads.fcn_mask_head import * # NOQA
145 | >>> N = 7 # N = number of extracted ROIs
146 | >>> C, H, W = 11, 32, 32
147 | >>> # Create example instance of FCN Mask Head.
148 | >>> # There are lots of variations depending on the configuration
149 | >>> self = FCNMaskHead(num_classes=C, num_convs=1)
150 | >>> inputs = torch.rand(N, self.in_channels, H, W)
151 | >>> mask_pred = self.forward(inputs)
152 | >>> sf = self.scale_factor
153 | >>> labels = torch.randint(0, C, size=(N,))
154 | >>> # With the default properties the mask targets should indicate
155 | >>> # a (potentially soft) single-class label
156 | >>> mask_targets = torch.rand(N, H * sf, W * sf)
157 | >>> loss = self.loss(mask_pred, mask_targets, labels)
158 | >>> print('loss = {!r}'.format(loss))
159 | """
160 | loss = dict()
161 | if mask_pred.size(0) == 0:
162 | loss_mask = mask_pred.sum()
163 | else:
164 | if self.class_agnostic:
165 | loss_mask = self.loss_mask(mask_pred, mask_targets,
166 | torch.zeros_like(labels))
167 | else:
168 | loss_mask = self.loss_mask(mask_pred, mask_targets, labels)
169 | loss['loss_mask'] = loss_mask
170 | return loss
171 |
172 | def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
173 | ori_shape, scale_factor, rescale):
174 | """Get segmentation masks from mask_pred and bboxes.
175 |
176 | Args:
177 | mask_pred (Tensor or ndarray): shape (n, #class, h, w).
178 | For single-scale testing, mask_pred is the direct output of
179 | model, whose type is Tensor, while for multi-scale testing,
180 | it will be converted to numpy array outside of this method.
181 | det_bboxes (Tensor): shape (n, 4/5)
182 | det_labels (Tensor): shape (n, )
183 | rcnn_test_cfg (dict): rcnn testing config
184 | ori_shape (Tuple): original image height and width, shape (2,)
185 | scale_factor(float | Tensor): If ``rescale is True``, box
186 | coordinates are divided by this scale factor to fit
187 | ``ori_shape``.
188 | rescale (bool): If True, the resulting masks will be rescaled to
189 | ``ori_shape``.
190 |
191 | Returns:
192 | list[list]: encoded masks. The c-th item in the outer list
193 | corresponds to the c-th class. Given the c-th outer list, the
194 | i-th item in that inner list is the mask for the i-th box with
195 | class label c.
196 |
197 | Example:
198 | >>> import mmcv
199 | >>> from mmdet.models.roi_heads.mask_heads.fcn_mask_head import * # NOQA
200 | >>> N = 7 # N = number of extracted ROIs
201 | >>> C, H, W = 11, 32, 32
202 | >>> # Create example instance of FCN Mask Head.
203 | >>> self = FCNMaskHead(num_classes=C, num_convs=0)
204 | >>> inputs = torch.rand(N, self.in_channels, H, W)
205 | >>> mask_pred = self.forward(inputs)
206 | >>> # Each input is associated with some bounding box
207 | >>> det_bboxes = torch.Tensor([[1, 1, 42, 42 ]] * N)
208 | >>> det_labels = torch.randint(0, C, size=(N,))
209 | >>> rcnn_test_cfg = mmcv.Config({'mask_thr_binary': 0, })
210 | >>> ori_shape = (H * 4, W * 4)
211 | >>> scale_factor = torch.FloatTensor((1, 1))
212 | >>> rescale = False
213 | >>> # Encoded masks are a list for each category.
214 | >>> encoded_masks = self.get_seg_masks(
215 | >>> mask_pred, det_bboxes, det_labels, rcnn_test_cfg, ori_shape,
216 | >>> scale_factor, rescale
217 | >>> )
218 | >>> assert len(encoded_masks) == C
219 | >>> assert sum(list(map(len, encoded_masks))) == N
220 | """
221 | if isinstance(mask_pred, torch.Tensor):
222 | mask_pred = mask_pred.sigmoid()
223 | else:
224 | mask_pred = det_bboxes.new_tensor(mask_pred)
225 |
226 | device = mask_pred.device
227 | cls_segms = [[] for _ in range(self.num_classes)
228 | ] # BG is not included in num_classes
229 | bboxes = det_bboxes[:, :4]
230 | labels = det_labels
231 |
232 | if rescale:
233 | img_h, img_w = ori_shape[:2]
234 | else:
235 | if isinstance(scale_factor, float):
236 | img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
237 | img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
238 | else:
239 | w_scale, h_scale = scale_factor[0], scale_factor[1]
240 | img_h = np.round(ori_shape[0] * h_scale.item()).astype(
241 | np.int32)
242 | img_w = np.round(ori_shape[1] * w_scale.item()).astype(
243 | np.int32)
244 | scale_factor = 1.0
245 |
246 | if not isinstance(scale_factor, (float, torch.Tensor)):
247 | scale_factor = bboxes.new_tensor(scale_factor)
248 | bboxes = bboxes / scale_factor
249 |
250 | if torch.onnx.is_in_onnx_export():
251 | # TODO: Remove after F.grid_sample is supported.
252 | from torchvision.models.detection.roi_heads \
253 | import paste_masks_in_image
254 | masks = paste_masks_in_image(mask_pred, bboxes, ori_shape[:2])
255 | thr = rcnn_test_cfg.get('mask_thr_binary', 0)
256 | if thr > 0:
257 | masks = masks >= thr
258 | return masks
259 |
260 | N = len(mask_pred)
261 | # The actual implementation split the input into chunks,
262 | # and paste them chunk by chunk.
263 | if device.type == 'cpu':
264 | # CPU is most efficient when they are pasted one by one with
265 | # skip_empty=True, so that it performs minimal number of
266 | # operations.
267 | num_chunks = N
268 | else:
269 | # GPU benefits from parallelism for larger chunks,
270 | # but may have memory issue
271 | num_chunks = int(
272 | np.ceil(N * img_h * img_w * BYTES_PER_FLOAT / GPU_MEM_LIMIT))
273 | assert (num_chunks <=
274 | N), 'Default GPU_MEM_LIMIT is too small; try increasing it'
275 | chunks = torch.chunk(torch.arange(N, device=device), num_chunks)
276 |
277 | threshold = rcnn_test_cfg.mask_thr_binary
278 | im_mask = torch.zeros(
279 | N,
280 | img_h,
281 | img_w,
282 | device=device,
283 | dtype=torch.bool if threshold >= 0 else torch.uint8)
284 |
285 | if not self.class_agnostic:
286 | mask_pred = mask_pred[range(N), labels][:, None]
287 |
288 | for inds in chunks:
289 | masks_chunk, spatial_inds = _do_paste_mask(
290 | mask_pred[inds],
291 | bboxes[inds],
292 | img_h,
293 | img_w,
294 | skip_empty=device.type == 'cpu')
295 |
296 | if threshold >= 0:
297 | masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool)
298 | else:
299 | # for visualization and debugging
300 | masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8)
301 |
302 | im_mask[(inds, ) + spatial_inds] = masks_chunk
303 |
304 | for i in range(N):
305 | cls_segms[labels[i]].append(im_mask[i].detach().cpu().numpy())
306 | return cls_segms
307 |
308 |
309 | def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True):
310 | """Paste instance masks acoording to boxes.
311 |
312 | This implementation is modified from
313 | https://github.com/facebookresearch/detectron2/
314 |
315 | Args:
316 | masks (Tensor): N, 1, H, W
317 | boxes (Tensor): N, 4
318 | img_h (int): Height of the image to be pasted.
319 | img_w (int): Width of the image to be pasted.
320 | skip_empty (bool): Only paste masks within the region that
321 | tightly bound all boxes, and returns the results this region only.
322 | An important optimization for CPU.
323 |
324 | Returns:
325 | tuple: (Tensor, tuple). The first item is mask tensor, the second one
326 | is the slice object.
327 | If skip_empty == False, the whole image will be pasted. It will
328 | return a mask of shape (N, img_h, img_w) and an empty tuple.
329 | If skip_empty == True, only area around the mask will be pasted.
330 | A mask of shape (N, h', w') and its start and end coordinates
331 | in the original image will be returned.
332 | """
333 | # On GPU, paste all masks together (up to chunk size)
334 | # by using the entire image to sample the masks
335 | # Compared to pasting them one by one,
336 | # this has more operations but is faster on COCO-scale dataset.
337 | device = masks.device
338 | if skip_empty:
339 | x0_int, y0_int = torch.clamp(
340 | boxes.min(dim=0).values.floor()[:2] - 1,
341 | min=0).to(dtype=torch.int32)
342 | x1_int = torch.clamp(
343 | boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32)
344 | y1_int = torch.clamp(
345 | boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32)
346 | else:
347 | x0_int, y0_int = 0, 0
348 | x1_int, y1_int = img_w, img_h
349 | x0, y0, x1, y1 = torch.split(boxes, 1, dim=1) # each is Nx1
350 |
351 | N = masks.shape[0]
352 |
353 | img_y = torch.arange(
354 | y0_int, y1_int, device=device, dtype=torch.float32) + 0.5
355 | img_x = torch.arange(
356 | x0_int, x1_int, device=device, dtype=torch.float32) + 0.5
357 | img_y = (img_y - y0) / (y1 - y0) * 2 - 1
358 | img_x = (img_x - x0) / (x1 - x0) * 2 - 1
359 | # img_x, img_y have shapes (N, w), (N, h)
360 | if torch.isinf(img_x).any():
361 | inds = torch.where(torch.isinf(img_x))
362 | img_x[inds] = 0
363 | if torch.isinf(img_y).any():
364 | inds = torch.where(torch.isinf(img_y))
365 | img_y[inds] = 0
366 |
367 | gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1))
368 | gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1))
369 | grid = torch.stack([gx, gy], dim=3)
370 |
371 | if torch.onnx.is_in_onnx_export():
372 | raise RuntimeError(
373 | 'Exporting F.grid_sample from Pytorch to ONNX is not supported.')
374 | img_masks = F.grid_sample(
375 | masks.to(dtype=torch.float32), grid, align_corners=False)
376 |
377 | if skip_empty:
378 | return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int))
379 | else:
380 | return img_masks[:, 0], ()
381 |
--------------------------------------------------------------------------------
/det/mmdet/models/utils/involution_cuda.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Function
2 | import torch
3 | from torch.nn.modules.utils import _pair
4 | import torch.nn.functional as F
5 | import torch.nn as nn
6 | from mmcv.cnn import ConvModule
7 |
8 |
9 | from collections import namedtuple
10 | import cupy
11 | from string import Template
12 |
13 |
14 | Stream = namedtuple('Stream', ['ptr'])
15 |
16 |
17 | def Dtype(t):
18 | if isinstance(t, torch.cuda.FloatTensor):
19 | return 'float'
20 | elif isinstance(t, torch.cuda.DoubleTensor):
21 | return 'double'
22 |
23 |
24 | @cupy._util.memoize(for_each_device=True)
25 | def load_kernel(kernel_name, code, **kwargs):
26 | code = Template(code).substitute(**kwargs)
27 | kernel_code = cupy.cuda.compile_with_cache(code)
28 | return kernel_code.get_function(kernel_name)
29 |
30 |
31 | CUDA_NUM_THREADS = 1024
32 |
33 | kernel_loop = '''
34 | #define CUDA_KERNEL_LOOP(i, n) \
35 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
36 | i < (n); \
37 | i += blockDim.x * gridDim.x)
38 | '''
39 |
40 |
41 | def GET_BLOCKS(N):
42 | return (N + CUDA_NUM_THREADS - 1) // CUDA_NUM_THREADS
43 |
44 |
45 | _involution_kernel = kernel_loop + '''
46 | extern "C"
47 | __global__ void involution_forward_kernel(
48 | const ${Dtype}* bottom_data, const ${Dtype}* weight_data, ${Dtype}* top_data) {
49 | CUDA_KERNEL_LOOP(index, ${nthreads}) {
50 | const int n = index / ${channels} / ${top_height} / ${top_width};
51 | const int c = (index / ${top_height} / ${top_width}) % ${channels};
52 | const int h = (index / ${top_width}) % ${top_height};
53 | const int w = index % ${top_width};
54 | const int g = c / (${channels} / ${groups});
55 | ${Dtype} value = 0;
56 | #pragma unroll
57 | for (int kh = 0; kh < ${kernel_h}; ++kh) {
58 | #pragma unroll
59 | for (int kw = 0; kw < ${kernel_w}; ++kw) {
60 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h};
61 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w};
62 | if ((h_in >= 0) && (h_in < ${bottom_height})
63 | && (w_in >= 0) && (w_in < ${bottom_width})) {
64 | const int offset = ((n * ${channels} + c) * ${bottom_height} + h_in)
65 | * ${bottom_width} + w_in;
66 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h)
67 | * ${top_width} + w;
68 | value += weight_data[offset_weight] * bottom_data[offset];
69 | }
70 | }
71 | }
72 | top_data[index] = value;
73 | }
74 | }
75 | '''
76 |
77 |
78 | _involution_kernel_backward_grad_input = kernel_loop + '''
79 | extern "C"
80 | __global__ void involution_backward_grad_input_kernel(
81 | const ${Dtype}* const top_diff, const ${Dtype}* const weight_data, ${Dtype}* const bottom_diff) {
82 | CUDA_KERNEL_LOOP(index, ${nthreads}) {
83 | const int n = index / ${channels} / ${bottom_height} / ${bottom_width};
84 | const int c = (index / ${bottom_height} / ${bottom_width}) % ${channels};
85 | const int h = (index / ${bottom_width}) % ${bottom_height};
86 | const int w = index % ${bottom_width};
87 | const int g = c / (${channels} / ${groups});
88 | ${Dtype} value = 0;
89 | #pragma unroll
90 | for (int kh = 0; kh < ${kernel_h}; ++kh) {
91 | #pragma unroll
92 | for (int kw = 0; kw < ${kernel_w}; ++kw) {
93 | const int h_out_s = h + ${pad_h} - kh * ${dilation_h};
94 | const int w_out_s = w + ${pad_w} - kw * ${dilation_w};
95 | if (((h_out_s % ${stride_h}) == 0) && ((w_out_s % ${stride_w}) == 0)) {
96 | const int h_out = h_out_s / ${stride_h};
97 | const int w_out = w_out_s / ${stride_w};
98 | if ((h_out >= 0) && (h_out < ${top_height})
99 | && (w_out >= 0) && (w_out < ${top_width})) {
100 | const int offset = ((n * ${channels} + c) * ${top_height} + h_out)
101 | * ${top_width} + w_out;
102 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h_out)
103 | * ${top_width} + w_out;
104 | value += weight_data[offset_weight] * top_diff[offset];
105 | }
106 | }
107 | }
108 | }
109 | bottom_diff[index] = value;
110 | }
111 | }
112 | '''
113 |
114 |
115 | _involution_kernel_backward_grad_weight = kernel_loop + '''
116 | extern "C"
117 | __global__ void involution_backward_grad_weight_kernel(
118 | const ${Dtype}* const top_diff, const ${Dtype}* const bottom_data, ${Dtype}* const buffer_data) {
119 | CUDA_KERNEL_LOOP(index, ${nthreads}) {
120 | const int h = (index / ${top_width}) % ${top_height};
121 | const int w = index % ${top_width};
122 | const int kh = (index / ${kernel_w} / ${top_height} / ${top_width})
123 | % ${kernel_h};
124 | const int kw = (index / ${top_height} / ${top_width}) % ${kernel_w};
125 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h};
126 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w};
127 | if ((h_in >= 0) && (h_in < ${bottom_height})
128 | && (w_in >= 0) && (w_in < ${bottom_width})) {
129 | const int g = (index / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${groups};
130 | const int n = (index / ${groups} / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${num};
131 | ${Dtype} value = 0;
132 | #pragma unroll
133 | for (int c = g * (${channels} / ${groups}); c < (g + 1) * (${channels} / ${groups}); ++c) {
134 | const int top_offset = ((n * ${channels} + c) * ${top_height} + h)
135 | * ${top_width} + w;
136 | const int bottom_offset = ((n * ${channels} + c) * ${bottom_height} + h_in)
137 | * ${bottom_width} + w_in;
138 | value += top_diff[top_offset] * bottom_data[bottom_offset];
139 | }
140 | buffer_data[index] = value;
141 | } else {
142 | buffer_data[index] = 0;
143 | }
144 | }
145 | }
146 | '''
147 |
148 |
149 | class _involution(Function):
150 | @staticmethod
151 | def forward(ctx, input, weight, stride, padding, dilation):
152 | assert input.dim() == 4 and input.is_cuda
153 | assert weight.dim() == 6 and weight.is_cuda
154 | batch_size, channels, height, width = input.size()
155 | kernel_h, kernel_w = weight.size()[2:4]
156 | output_h = int((height + 2 * padding[0] - (dilation[0] * (kernel_h - 1) + 1)) / stride[0] + 1)
157 | output_w = int((width + 2 * padding[1] - (dilation[1] * (kernel_w - 1) + 1)) / stride[1] + 1)
158 |
159 | output = input.new(batch_size, channels, output_h, output_w)
160 | n = output.numel()
161 |
162 | with torch.cuda.device_of(input):
163 | f = load_kernel('involution_forward_kernel', _involution_kernel, Dtype=Dtype(input), nthreads=n,
164 | num=batch_size, channels=channels, groups=weight.size()[1],
165 | bottom_height=height, bottom_width=width,
166 | top_height=output_h, top_width=output_w,
167 | kernel_h=kernel_h, kernel_w=kernel_w,
168 | stride_h=stride[0], stride_w=stride[1],
169 | dilation_h=dilation[0], dilation_w=dilation[1],
170 | pad_h=padding[0], pad_w=padding[1])
171 | f(block=(CUDA_NUM_THREADS,1,1),
172 | grid=(GET_BLOCKS(n),1,1),
173 | args=[input.data_ptr(), weight.data_ptr(), output.data_ptr()],
174 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
175 |
176 | ctx.save_for_backward(input, weight)
177 | ctx.stride, ctx.padding, ctx.dilation = stride, padding, dilation
178 | return output
179 |
180 | @staticmethod
181 | def backward(ctx, grad_output):
182 | assert grad_output.is_cuda and grad_output.is_contiguous()
183 | input, weight = ctx.saved_tensors
184 | stride, padding, dilation = ctx.stride, ctx.padding, ctx.dilation
185 |
186 | batch_size, channels, height, width = input.size()
187 | kernel_h, kernel_w = weight.size()[2:4]
188 | output_h, output_w = grad_output.size()[2:]
189 |
190 | grad_input, grad_weight = None, None
191 |
192 | opt = dict(Dtype=Dtype(grad_output),
193 | num=batch_size, channels=channels, groups=weight.size()[1],
194 | bottom_height=height, bottom_width=width,
195 | top_height=output_h, top_width=output_w,
196 | kernel_h=kernel_h, kernel_w=kernel_w,
197 | stride_h=stride[0], stride_w=stride[1],
198 | dilation_h=dilation[0], dilation_w=dilation[1],
199 | pad_h=padding[0], pad_w=padding[1])
200 |
201 | with torch.cuda.device_of(input):
202 | if ctx.needs_input_grad[0]:
203 | grad_input = input.new(input.size())
204 |
205 | n = grad_input.numel()
206 | opt['nthreads'] = n
207 |
208 | f = load_kernel('involution_backward_grad_input_kernel',
209 | _involution_kernel_backward_grad_input, **opt)
210 | f(block=(CUDA_NUM_THREADS,1,1),
211 | grid=(GET_BLOCKS(n),1,1),
212 | args=[grad_output.data_ptr(), weight.data_ptr(), grad_input.data_ptr()],
213 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
214 |
215 | if ctx.needs_input_grad[1]:
216 | grad_weight = weight.new(weight.size())
217 |
218 | n = grad_weight.numel()
219 | opt['nthreads'] = n
220 |
221 | f = load_kernel('involution_backward_grad_weight_kernel',
222 | _involution_kernel_backward_grad_weight, **opt)
223 | f(block=(CUDA_NUM_THREADS,1,1),
224 | grid=(GET_BLOCKS(n),1,1),
225 | args=[grad_output.data_ptr(), input.data_ptr(), grad_weight.data_ptr()],
226 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
227 |
228 | return grad_input, grad_weight, None, None, None
229 |
230 |
231 | def _involution_cuda(input, weight, bias=None, stride=1, padding=0, dilation=1):
232 | """ involution kernel
233 | """
234 | assert input.size(0) == weight.size(0)
235 | assert input.size(-2)//stride == weight.size(-2)
236 | assert input.size(-1)//stride == weight.size(-1)
237 | if input.is_cuda:
238 | out = _involution.apply(input, weight, _pair(stride), _pair(padding), _pair(dilation))
239 | if bias is not None:
240 | out += bias.view(1,-1,1,1)
241 | else:
242 | raise NotImplementedError
243 | return out
244 |
245 |
246 | class involution(nn.Module):
247 |
248 | def __init__(self,
249 | channels,
250 | kernel_size,
251 | stride):
252 | super(involution, self).__init__()
253 | self.kernel_size = kernel_size
254 | self.stride = stride
255 | self.channels = channels
256 | reduction_ratio = 4
257 | self.group_channels = 16
258 | self.groups = self.channels // self.group_channels
259 | self.conv1 = ConvModule(
260 | in_channels=channels,
261 | out_channels=channels // reduction_ratio,
262 | kernel_size=1,
263 | conv_cfg=None,
264 | norm_cfg=dict(type='BN'),
265 | act_cfg=dict(type='ReLU'))
266 | self.conv2 = ConvModule(
267 | in_channels=channels // reduction_ratio,
268 | out_channels=kernel_size**2 * self.groups,
269 | kernel_size=1,
270 | stride=1,
271 | conv_cfg=None,
272 | norm_cfg=None,
273 | act_cfg=None)
274 | if stride > 1:
275 | self.avgpool = nn.AvgPool2d(stride, stride)
276 |
277 | def forward(self, x):
278 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
279 | b, c, h, w = weight.shape
280 | weight = weight.view(b, self.groups, self.kernel_size, self.kernel_size, h, w)
281 | out = _involution_cuda(x, weight, stride=self.stride, padding=(self.kernel_size-1)//2)
282 | return out
283 |
--------------------------------------------------------------------------------
/det/mmdet/models/utils/involution_naive.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from mmcv.cnn import ConvModule
3 |
4 |
5 | class involution(nn.Module):
6 |
7 | def __init__(self,
8 | channels,
9 | kernel_size,
10 | stride):
11 | super(involution, self).__init__()
12 | self.kernel_size = kernel_size
13 | self.stride = stride
14 | self.channels = channels
15 | reduction_ratio = 4
16 | self.group_channels = 16
17 | self.groups = self.channels // self.group_channels
18 | self.conv1 = ConvModule(
19 | in_channels=channels,
20 | out_channels=channels // reduction_ratio,
21 | kernel_size=1,
22 | conv_cfg=None,
23 | norm_cfg=dict(type='BN'),
24 | act_cfg=dict(type='ReLU'))
25 | self.conv2 = ConvModule(
26 | in_channels=channels // reduction_ratio,
27 | out_channels=kernel_size**2 * self.groups,
28 | kernel_size=1,
29 | stride=1,
30 | conv_cfg=None,
31 | norm_cfg=None,
32 | act_cfg=None)
33 | if stride > 1:
34 | self.avgpool = nn.AvgPool2d(stride, stride)
35 | self.unfold = nn.Unfold(kernel_size, 1, (kernel_size-1)//2, stride)
36 |
37 | def forward(self, x):
38 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
39 | b, c, h, w = weight.shape
40 | weight = weight.view(b, self.groups, self.kernel_size**2, h, w).unsqueeze(2)
41 | out = self.unfold(x).view(b, self.groups, self.group_channels, self.kernel_size**2, h, w)
42 | out = (weight * out).sum(dim=3).view(b, self.channels, h, w)
43 | return out
44 |
--------------------------------------------------------------------------------
/fig/complexity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d-li14/involution/21c3158fcbb4ecda8ed4626fcae8b01be511a598/fig/complexity.png
--------------------------------------------------------------------------------
/fig/involution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d-li14/involution/21c3158fcbb4ecda8ed4626fcae8b01be511a598/fig/involution.png
--------------------------------------------------------------------------------
/fig/parameter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/d-li14/involution/21c3158fcbb4ecda8ed4626fcae8b01be511a598/fig/parameter.png
--------------------------------------------------------------------------------
/seg/configs/_base_/models/fpn_red50.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
3 | model = dict(
4 | type='EncoderDecoder',
5 | #pretrained='open-mmlab://resnet50_v1c',
6 | pretrained='/path/to/rednet50.pth',
7 | backbone=dict(
8 | type='RedNet',
9 | depth=50,
10 | num_stages=4,
11 | out_indices=(0, 1, 2, 3),
12 | #dilations=(1, 1, 1, 1),
13 | #strides=(1, 2, 2, 2),
14 | norm_cfg=norm_cfg,
15 | norm_eval=False,
16 | style='pytorch'),
17 | #contract_dilation=True),
18 | neck=dict(
19 | type='FPN',
20 | in_channels=[256, 512, 1024, 2048],
21 | out_channels=256,
22 | num_outs=4),
23 | decode_head=dict(
24 | type='FPNHead',
25 | in_channels=[256, 256, 256, 256],
26 | in_index=[0, 1, 2, 3],
27 | feature_strides=[4, 8, 16, 32],
28 | channels=128,
29 | dropout_ratio=0.1,
30 | num_classes=19,
31 | norm_cfg=norm_cfg,
32 | align_corners=False,
33 | loss_decode=dict(
34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35 | # model training and testing settings
36 | train_cfg=dict(),
37 | test_cfg=dict(mode='whole'))
38 |
--------------------------------------------------------------------------------
/seg/configs/_base_/models/fpn_red50_neck.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
3 | model = dict(
4 | type='EncoderDecoder',
5 | #pretrained='open-mmlab://resnet50_v1c',
6 | pretrained='/path/to/rednet50.pth',
7 | backbone=dict(
8 | type='RedNet',
9 | depth=50,
10 | num_stages=4,
11 | out_indices=(0, 1, 2, 3),
12 | #dilations=(1, 1, 1, 1),
13 | #strides=(1, 2, 2, 2),
14 | norm_cfg=norm_cfg,
15 | norm_eval=False,
16 | style='pytorch'),
17 | #contract_dilation=True),
18 | neck=dict(
19 | type='FPN_involution',
20 | in_channels=[256, 512, 1024, 2048],
21 | out_channels=256,
22 | num_outs=4),
23 | decode_head=dict(
24 | type='FPNHead',
25 | in_channels=[256, 256, 256, 256],
26 | in_index=[0, 1, 2, 3],
27 | feature_strides=[4, 8, 16, 32],
28 | channels=128,
29 | dropout_ratio=0.1,
30 | num_classes=19,
31 | norm_cfg=norm_cfg,
32 | align_corners=False,
33 | loss_decode=dict(
34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35 | # model training and testing settings
36 | train_cfg=dict(),
37 | test_cfg=dict(mode='whole'))
38 |
--------------------------------------------------------------------------------
/seg/configs/_base_/models/upernet_red50.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | norm_cfg = dict(type='SyncBN', requires_grad=True)
3 | model = dict(
4 | type='EncoderDecoder',
5 | #pretrained='open-mmlab://resnet50_v1c',
6 | pretrained='/path/to/rednet50.pth',
7 | backbone=dict(
8 | type='RedNet',
9 | depth=50,
10 | num_stages=4,
11 | out_indices=(0, 1, 2, 3),
12 | #dilations=(1, 1, 1, 1),
13 | #strides=(1, 2, 2, 2),
14 | norm_cfg=norm_cfg,
15 | norm_eval=False,
16 | style='pytorch'),
17 | #contract_dilation=True),
18 | decode_head=dict(
19 | type='UPerHead',
20 | in_channels=[256, 512, 1024, 2048],
21 | in_index=[0, 1, 2, 3],
22 | pool_scales=(1, 2, 3, 6),
23 | channels=512,
24 | dropout_ratio=0.1,
25 | num_classes=19,
26 | norm_cfg=norm_cfg,
27 | align_corners=False,
28 | loss_decode=dict(
29 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
30 | auxiliary_head=dict(
31 | type='FCNHead',
32 | in_channels=1024,
33 | in_index=2,
34 | channels=256,
35 | num_convs=1,
36 | concat_input=False,
37 | dropout_ratio=0.1,
38 | num_classes=19,
39 | norm_cfg=norm_cfg,
40 | align_corners=False,
41 | loss_decode=dict(
42 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
43 | # model training and testing settings
44 | train_cfg=dict(),
45 | test_cfg=dict(mode='whole'))
46 |
--------------------------------------------------------------------------------
/seg/configs/involution/fpn_red50_512x1024_80k_cityscapes.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/fpn_red50.py', '../_base_/datasets/cityscapes.py',
3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4 | ]
5 |
--------------------------------------------------------------------------------
/seg/configs/involution/fpn_red50_neck_512x1024_80k_cityscapes.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/fpn_red50_neck.py', '../_base_/datasets/cityscapes.py',
3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4 | ]
5 |
--------------------------------------------------------------------------------
/seg/configs/involution/upernet_red50_512x1024_80k_cityscapes.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/models/upernet_red50.py', '../_base_/datasets/cityscapes.py',
3 | '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
4 | ]
5 |
--------------------------------------------------------------------------------
/seg/mmseg/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .cgnet import CGNet
2 | from .fast_scnn import FastSCNN
3 | from .hrnet import HRNet
4 | from .mobilenet_v2 import MobileNetV2
5 | from .mobilenet_v3 import MobileNetV3
6 | from .resnest import ResNeSt
7 | from .resnet import ResNet, ResNetV1c, ResNetV1d
8 | from .resnext import ResNeXt
9 | from .unet import UNet
10 | from .rednet import RedNet
11 |
12 | __all__ = [
13 | 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN',
14 | 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3',
15 | 'RedNet'
16 | ]
17 |
--------------------------------------------------------------------------------
/seg/mmseg/models/backbones/base_backbone.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from abc import ABCMeta, abstractmethod
3 |
4 | import torch.nn as nn
5 | from mmcv.runner import load_checkpoint
6 |
7 |
8 | class BaseBackbone(nn.Module, metaclass=ABCMeta):
9 | """Base backbone.
10 |
11 | This class defines the basic functions of a backbone.
12 | Any backbone that inherits this class should at least
13 | define its own `forward` function.
14 |
15 | """
16 |
17 | def __init__(self):
18 | super(BaseBackbone, self).__init__()
19 |
20 | def init_weights(self, pretrained=None):
21 | """Init backbone weights
22 |
23 | Args:
24 | pretrained (str | None): If pretrained is a string, then it
25 | initializes backbone weights by loading the pretrained
26 | checkpoint. If pretrained is None, then it follows default
27 | initializer or customized initializer in subclasses.
28 | """
29 | if isinstance(pretrained, str):
30 | logger = logging.getLogger()
31 | load_checkpoint(self, pretrained, strict=False, logger=logger)
32 | elif pretrained is None:
33 | # use default initializer or customized initializer in subclasses
34 | pass
35 | else:
36 | raise TypeError('pretrained must be a str or None.'
37 | f' But received {type(pretrained)}.')
38 |
39 | @abstractmethod
40 | def forward(self, x):
41 | """Forward computation
42 |
43 | Args:
44 | x (tensor | tuple[tensor]): x could be a Torch.tensor or a tuple of
45 | Torch.tensor, containing input data for forward computation.
46 | """
47 | pass
48 |
49 | def train(self, mode=True):
50 | """Set module status before forward computation
51 |
52 | Args:
53 | mode (bool): Whether it is train_mode or test_mode
54 | """
55 | super(BaseBackbone, self).train(mode)
56 |
--------------------------------------------------------------------------------
/seg/mmseg/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FPN
2 | from .fpn_involution import FPN_involution
3 |
4 | __all__ = ['FPN', 'FPN_involution']
5 |
--------------------------------------------------------------------------------
/seg/mmseg/models/necks/fpn_involution.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | from mmcv.cnn import ConvModule, xavier_init
4 |
5 | from ..builder import NECKS
6 | from ..utils.involution_cuda import involution
7 |
8 |
9 | @NECKS.register_module()
10 | class FPN_involution(nn.Module):
11 | """Feature Pyramid Network.
12 |
13 | This is an implementation of - Feature Pyramid Networks for Object
14 | Detection (https://arxiv.org/abs/1612.03144)
15 |
16 | Args:
17 | in_channels (List[int]): Number of input channels per scale.
18 | out_channels (int): Number of output channels (used at each scale)
19 | num_outs (int): Number of output scales.
20 | start_level (int): Index of the start input backbone level used to
21 | build the feature pyramid. Default: 0.
22 | end_level (int): Index of the end input backbone level (exclusive) to
23 | build the feature pyramid. Default: -1, which means the last level.
24 | add_extra_convs (bool | str): If bool, it decides whether to add conv
25 | layers on top of the original feature maps. Default to False.
26 | If True, its actual mode is specified by `extra_convs_on_inputs`.
27 | If str, it specifies the source feature map of the extra convs.
28 | Only the following options are allowed
29 |
30 | - 'on_input': Last feat map of neck inputs (i.e. backbone feature).
31 | - 'on_lateral': Last feature map after lateral convs.
32 | - 'on_output': The last output feature map after fpn convs.
33 | extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs
34 | on the original feature from the backbone. If True,
35 | it is equivalent to `add_extra_convs='on_input'`. If False, it is
36 | equivalent to set `add_extra_convs='on_output'`. Default to True.
37 | relu_before_extra_convs (bool): Whether to apply relu before the extra
38 | conv. Default: False.
39 | no_norm_on_lateral (bool): Whether to apply norm on lateral.
40 | Default: False.
41 | conv_cfg (dict): Config dict for convolution layer. Default: None.
42 | norm_cfg (dict): Config dict for normalization layer. Default: None.
43 | act_cfg (str): Config dict for activation layer in ConvModule.
44 | Default: None.
45 | upsample_cfg (dict): Config dict for interpolate layer.
46 | Default: `dict(mode='nearest')`
47 |
48 | Example:
49 | >>> import torch
50 | >>> in_channels = [2, 3, 5, 7]
51 | >>> scales = [340, 170, 84, 43]
52 | >>> inputs = [torch.rand(1, c, s, s)
53 | ... for c, s in zip(in_channels, scales)]
54 | >>> self = FPN(in_channels, 11, len(in_channels)).eval()
55 | >>> outputs = self.forward(inputs)
56 | >>> for i in range(len(outputs)):
57 | ... print(f'outputs[{i}].shape = {outputs[i].shape}')
58 | outputs[0].shape = torch.Size([1, 11, 340, 340])
59 | outputs[1].shape = torch.Size([1, 11, 170, 170])
60 | outputs[2].shape = torch.Size([1, 11, 84, 84])
61 | outputs[3].shape = torch.Size([1, 11, 43, 43])
62 | """
63 |
64 | def __init__(self,
65 | in_channels,
66 | out_channels,
67 | num_outs,
68 | start_level=0,
69 | end_level=-1,
70 | add_extra_convs=False,
71 | extra_convs_on_inputs=False,
72 | relu_before_extra_convs=False,
73 | no_norm_on_lateral=False,
74 | conv_cfg=None,
75 | norm_cfg=None,
76 | act_cfg=None,
77 | upsample_cfg=dict(mode='nearest')):
78 | super(FPN_involution, self).__init__()
79 | assert isinstance(in_channels, list)
80 | self.in_channels = in_channels
81 | self.out_channels = out_channels
82 | self.num_ins = len(in_channels)
83 | self.num_outs = num_outs
84 | self.relu_before_extra_convs = relu_before_extra_convs
85 | self.no_norm_on_lateral = no_norm_on_lateral
86 | self.fp16_enabled = False
87 | self.upsample_cfg = upsample_cfg.copy()
88 |
89 | if end_level == -1:
90 | self.backbone_end_level = self.num_ins
91 | assert num_outs >= self.num_ins - start_level
92 | else:
93 | # if end_level < inputs, no extra level is allowed
94 | self.backbone_end_level = end_level
95 | assert end_level <= len(in_channels)
96 | assert num_outs == end_level - start_level
97 | self.start_level = start_level
98 | self.end_level = end_level
99 | self.add_extra_convs = add_extra_convs
100 | assert isinstance(add_extra_convs, (str, bool))
101 | if isinstance(add_extra_convs, str):
102 | # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
103 | assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
104 | elif add_extra_convs: # True
105 | if extra_convs_on_inputs:
106 | # For compatibility with previous release
107 | # TODO: deprecate `extra_convs_on_inputs`
108 | self.add_extra_convs = 'on_input'
109 | else:
110 | self.add_extra_convs = 'on_output'
111 |
112 | self.lateral_convs = nn.ModuleList()
113 | self.fpn_convs = nn.ModuleList()
114 |
115 | for i in range(self.start_level, self.backbone_end_level):
116 | l_conv = ConvModule(
117 | in_channels[i],
118 | out_channels,
119 | 1,
120 | conv_cfg=conv_cfg,
121 | norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
122 | act_cfg=act_cfg,
123 | inplace=False)
124 | fpn_conv = involution(out_channels, 7, 1)
125 | #ConvModule(
126 | #out_channels,
127 | #out_channels,
128 | #3,
129 | #padding=1,
130 | #conv_cfg=conv_cfg,
131 | #norm_cfg=norm_cfg,
132 | #act_cfg=act_cfg,
133 | #inplace=False)
134 |
135 | self.lateral_convs.append(l_conv)
136 | self.fpn_convs.append(fpn_conv)
137 |
138 | # add extra conv layers (e.g., RetinaNet)
139 | extra_levels = num_outs - self.backbone_end_level + self.start_level
140 | if self.add_extra_convs and extra_levels >= 1:
141 | for i in range(extra_levels):
142 | if i == 0 and self.add_extra_convs == 'on_input':
143 | in_channels = self.in_channels[self.backbone_end_level - 1]
144 | else:
145 | in_channels = out_channels
146 | extra_fpn_conv = ConvModule(
147 | in_channels,
148 | out_channels,
149 | 3,
150 | stride=2,
151 | padding=1,
152 | conv_cfg=conv_cfg,
153 | norm_cfg=norm_cfg,
154 | act_cfg=act_cfg,
155 | inplace=False)
156 | self.fpn_convs.append(extra_fpn_conv)
157 |
158 | # default init_weights for conv(msra) and norm in ConvModule
159 | def init_weights(self):
160 | for m in self.modules():
161 | if isinstance(m, nn.Conv2d):
162 | xavier_init(m, distribution='uniform')
163 |
164 | def forward(self, inputs):
165 | assert len(inputs) == len(self.in_channels)
166 |
167 | # build laterals
168 | laterals = [
169 | lateral_conv(inputs[i + self.start_level])
170 | for i, lateral_conv in enumerate(self.lateral_convs)
171 | ]
172 |
173 | # build top-down path
174 | used_backbone_levels = len(laterals)
175 | for i in range(used_backbone_levels - 1, 0, -1):
176 | # In some cases, fixing `scale factor` (e.g. 2) is preferred, but
177 | # it cannot co-exist with `size` in `F.interpolate`.
178 | if 'scale_factor' in self.upsample_cfg:
179 | laterals[i - 1] += F.interpolate(laterals[i],
180 | **self.upsample_cfg)
181 | else:
182 | prev_shape = laterals[i - 1].shape[2:]
183 | laterals[i - 1] += F.interpolate(
184 | laterals[i], size=prev_shape, **self.upsample_cfg)
185 |
186 | # build outputs
187 | # part 1: from original levels
188 | outs = [
189 | self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
190 | ]
191 | # part 2: add extra levels
192 | if self.num_outs > len(outs):
193 | # use max pool to get more levels on top of outputs
194 | # (e.g., Faster R-CNN, Mask R-CNN)
195 | if not self.add_extra_convs:
196 | for i in range(self.num_outs - used_backbone_levels):
197 | outs.append(F.max_pool2d(outs[-1], 1, stride=2))
198 | # add conv layers on top of original feature maps (RetinaNet)
199 | else:
200 | if self.add_extra_convs == 'on_input':
201 | extra_source = inputs[self.backbone_end_level - 1]
202 | elif self.add_extra_convs == 'on_lateral':
203 | extra_source = laterals[-1]
204 | elif self.add_extra_convs == 'on_output':
205 | extra_source = outs[-1]
206 | else:
207 | raise NotImplementedError
208 | outs.append(self.fpn_convs[used_backbone_levels](extra_source))
209 | for i in range(used_backbone_levels + 1, self.num_outs):
210 | if self.relu_before_extra_convs:
211 | outs.append(self.fpn_convs[i](F.relu(outs[-1])))
212 | else:
213 | outs.append(self.fpn_convs[i](outs[-1]))
214 | return tuple(outs)
215 |
--------------------------------------------------------------------------------
/seg/mmseg/models/utils/involution_cuda.py:
--------------------------------------------------------------------------------
1 | from torch.autograd import Function
2 | import torch
3 | from torch.nn.modules.utils import _pair
4 | import torch.nn.functional as F
5 | import torch.nn as nn
6 | from mmcv.cnn import ConvModule
7 |
8 |
9 | from collections import namedtuple
10 | import cupy
11 | from string import Template
12 |
13 |
14 | Stream = namedtuple('Stream', ['ptr'])
15 |
16 |
17 | def Dtype(t):
18 | if isinstance(t, torch.cuda.FloatTensor):
19 | return 'float'
20 | elif isinstance(t, torch.cuda.DoubleTensor):
21 | return 'double'
22 |
23 |
24 | @cupy._util.memoize(for_each_device=True)
25 | def load_kernel(kernel_name, code, **kwargs):
26 | code = Template(code).substitute(**kwargs)
27 | kernel_code = cupy.cuda.compile_with_cache(code)
28 | return kernel_code.get_function(kernel_name)
29 |
30 |
31 | CUDA_NUM_THREADS = 1024
32 |
33 | kernel_loop = '''
34 | #define CUDA_KERNEL_LOOP(i, n) \
35 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
36 | i < (n); \
37 | i += blockDim.x * gridDim.x)
38 | '''
39 |
40 |
41 | def GET_BLOCKS(N):
42 | return (N + CUDA_NUM_THREADS - 1) // CUDA_NUM_THREADS
43 |
44 |
45 | _involution_kernel = kernel_loop + '''
46 | extern "C"
47 | __global__ void involution_forward_kernel(
48 | const ${Dtype}* bottom_data, const ${Dtype}* weight_data, ${Dtype}* top_data) {
49 | CUDA_KERNEL_LOOP(index, ${nthreads}) {
50 | const int n = index / ${channels} / ${top_height} / ${top_width};
51 | const int c = (index / ${top_height} / ${top_width}) % ${channels};
52 | const int h = (index / ${top_width}) % ${top_height};
53 | const int w = index % ${top_width};
54 | const int g = c / (${channels} / ${groups});
55 | ${Dtype} value = 0;
56 | #pragma unroll
57 | for (int kh = 0; kh < ${kernel_h}; ++kh) {
58 | #pragma unroll
59 | for (int kw = 0; kw < ${kernel_w}; ++kw) {
60 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h};
61 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w};
62 | if ((h_in >= 0) && (h_in < ${bottom_height})
63 | && (w_in >= 0) && (w_in < ${bottom_width})) {
64 | const int offset = ((n * ${channels} + c) * ${bottom_height} + h_in)
65 | * ${bottom_width} + w_in;
66 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h)
67 | * ${top_width} + w;
68 | value += weight_data[offset_weight] * bottom_data[offset];
69 | }
70 | }
71 | }
72 | top_data[index] = value;
73 | }
74 | }
75 | '''
76 |
77 |
78 | _involution_kernel_backward_grad_input = kernel_loop + '''
79 | extern "C"
80 | __global__ void involution_backward_grad_input_kernel(
81 | const ${Dtype}* const top_diff, const ${Dtype}* const weight_data, ${Dtype}* const bottom_diff) {
82 | CUDA_KERNEL_LOOP(index, ${nthreads}) {
83 | const int n = index / ${channels} / ${bottom_height} / ${bottom_width};
84 | const int c = (index / ${bottom_height} / ${bottom_width}) % ${channels};
85 | const int h = (index / ${bottom_width}) % ${bottom_height};
86 | const int w = index % ${bottom_width};
87 | const int g = c / (${channels} / ${groups});
88 | ${Dtype} value = 0;
89 | #pragma unroll
90 | for (int kh = 0; kh < ${kernel_h}; ++kh) {
91 | #pragma unroll
92 | for (int kw = 0; kw < ${kernel_w}; ++kw) {
93 | const int h_out_s = h + ${pad_h} - kh * ${dilation_h};
94 | const int w_out_s = w + ${pad_w} - kw * ${dilation_w};
95 | if (((h_out_s % ${stride_h}) == 0) && ((w_out_s % ${stride_w}) == 0)) {
96 | const int h_out = h_out_s / ${stride_h};
97 | const int w_out = w_out_s / ${stride_w};
98 | if ((h_out >= 0) && (h_out < ${top_height})
99 | && (w_out >= 0) && (w_out < ${top_width})) {
100 | const int offset = ((n * ${channels} + c) * ${top_height} + h_out)
101 | * ${top_width} + w_out;
102 | const int offset_weight = ((((n * ${groups} + g) * ${kernel_h} + kh) * ${kernel_w} + kw) * ${top_height} + h_out)
103 | * ${top_width} + w_out;
104 | value += weight_data[offset_weight] * top_diff[offset];
105 | }
106 | }
107 | }
108 | }
109 | bottom_diff[index] = value;
110 | }
111 | }
112 | '''
113 |
114 |
115 | _involution_kernel_backward_grad_weight = kernel_loop + '''
116 | extern "C"
117 | __global__ void involution_backward_grad_weight_kernel(
118 | const ${Dtype}* const top_diff, const ${Dtype}* const bottom_data, ${Dtype}* const buffer_data) {
119 | CUDA_KERNEL_LOOP(index, ${nthreads}) {
120 | const int h = (index / ${top_width}) % ${top_height};
121 | const int w = index % ${top_width};
122 | const int kh = (index / ${kernel_w} / ${top_height} / ${top_width})
123 | % ${kernel_h};
124 | const int kw = (index / ${top_height} / ${top_width}) % ${kernel_w};
125 | const int h_in = -${pad_h} + h * ${stride_h} + kh * ${dilation_h};
126 | const int w_in = -${pad_w} + w * ${stride_w} + kw * ${dilation_w};
127 | if ((h_in >= 0) && (h_in < ${bottom_height})
128 | && (w_in >= 0) && (w_in < ${bottom_width})) {
129 | const int g = (index / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${groups};
130 | const int n = (index / ${groups} / ${kernel_h} / ${kernel_w} / ${top_height} / ${top_width}) % ${num};
131 | ${Dtype} value = 0;
132 | #pragma unroll
133 | for (int c = g * (${channels} / ${groups}); c < (g + 1) * (${channels} / ${groups}); ++c) {
134 | const int top_offset = ((n * ${channels} + c) * ${top_height} + h)
135 | * ${top_width} + w;
136 | const int bottom_offset = ((n * ${channels} + c) * ${bottom_height} + h_in)
137 | * ${bottom_width} + w_in;
138 | value += top_diff[top_offset] * bottom_data[bottom_offset];
139 | }
140 | buffer_data[index] = value;
141 | } else {
142 | buffer_data[index] = 0;
143 | }
144 | }
145 | }
146 | '''
147 |
148 |
149 | class _involution(Function):
150 | @staticmethod
151 | def forward(ctx, input, weight, stride, padding, dilation):
152 | assert input.dim() == 4 and input.is_cuda
153 | assert weight.dim() == 6 and weight.is_cuda
154 | batch_size, channels, height, width = input.size()
155 | kernel_h, kernel_w = weight.size()[2:4]
156 | output_h = int((height + 2 * padding[0] - (dilation[0] * (kernel_h - 1) + 1)) / stride[0] + 1)
157 | output_w = int((width + 2 * padding[1] - (dilation[1] * (kernel_w - 1) + 1)) / stride[1] + 1)
158 |
159 | output = input.new(batch_size, channels, output_h, output_w)
160 | n = output.numel()
161 |
162 | with torch.cuda.device_of(input):
163 | f = load_kernel('involution_forward_kernel', _involution_kernel, Dtype=Dtype(input), nthreads=n,
164 | num=batch_size, channels=channels, groups=weight.size()[1],
165 | bottom_height=height, bottom_width=width,
166 | top_height=output_h, top_width=output_w,
167 | kernel_h=kernel_h, kernel_w=kernel_w,
168 | stride_h=stride[0], stride_w=stride[1],
169 | dilation_h=dilation[0], dilation_w=dilation[1],
170 | pad_h=padding[0], pad_w=padding[1])
171 | f(block=(CUDA_NUM_THREADS,1,1),
172 | grid=(GET_BLOCKS(n),1,1),
173 | args=[input.data_ptr(), weight.data_ptr(), output.data_ptr()],
174 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
175 |
176 | ctx.save_for_backward(input, weight)
177 | ctx.stride, ctx.padding, ctx.dilation = stride, padding, dilation
178 | return output
179 |
180 | @staticmethod
181 | def backward(ctx, grad_output):
182 | assert grad_output.is_cuda and grad_output.is_contiguous()
183 | input, weight = ctx.saved_tensors
184 | stride, padding, dilation = ctx.stride, ctx.padding, ctx.dilation
185 |
186 | batch_size, channels, height, width = input.size()
187 | kernel_h, kernel_w = weight.size()[2:4]
188 | output_h, output_w = grad_output.size()[2:]
189 |
190 | grad_input, grad_weight = None, None
191 |
192 | opt = dict(Dtype=Dtype(grad_output),
193 | num=batch_size, channels=channels, groups=weight.size()[1],
194 | bottom_height=height, bottom_width=width,
195 | top_height=output_h, top_width=output_w,
196 | kernel_h=kernel_h, kernel_w=kernel_w,
197 | stride_h=stride[0], stride_w=stride[1],
198 | dilation_h=dilation[0], dilation_w=dilation[1],
199 | pad_h=padding[0], pad_w=padding[1])
200 |
201 | with torch.cuda.device_of(input):
202 | if ctx.needs_input_grad[0]:
203 | grad_input = input.new(input.size())
204 |
205 | n = grad_input.numel()
206 | opt['nthreads'] = n
207 |
208 | f = load_kernel('involution_backward_grad_input_kernel',
209 | _involution_kernel_backward_grad_input, **opt)
210 | f(block=(CUDA_NUM_THREADS,1,1),
211 | grid=(GET_BLOCKS(n),1,1),
212 | args=[grad_output.data_ptr(), weight.data_ptr(), grad_input.data_ptr()],
213 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
214 |
215 | if ctx.needs_input_grad[1]:
216 | grad_weight = weight.new(weight.size())
217 |
218 | n = grad_weight.numel()
219 | opt['nthreads'] = n
220 |
221 | f = load_kernel('involution_backward_grad_weight_kernel',
222 | _involution_kernel_backward_grad_weight, **opt)
223 | f(block=(CUDA_NUM_THREADS,1,1),
224 | grid=(GET_BLOCKS(n),1,1),
225 | args=[grad_output.data_ptr(), input.data_ptr(), grad_weight.data_ptr()],
226 | stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
227 |
228 | return grad_input, grad_weight, None, None, None
229 |
230 |
231 | def _involution_cuda(input, weight, bias=None, stride=1, padding=0, dilation=1):
232 | """ involution kernel
233 | """
234 | assert input.size(0) == weight.size(0)
235 | assert input.size(-2)//stride == weight.size(-2)
236 | assert input.size(-1)//stride == weight.size(-1)
237 | if input.is_cuda:
238 | out = _involution.apply(input, weight, _pair(stride), _pair(padding), _pair(dilation))
239 | if bias is not None:
240 | out += bias.view(1,-1,1,1)
241 | else:
242 | raise NotImplementedError
243 | return out
244 |
245 |
246 | class involution(nn.Module):
247 |
248 | def __init__(self,
249 | channels,
250 | kernel_size,
251 | stride):
252 | super(involution, self).__init__()
253 | self.kernel_size = kernel_size
254 | self.stride = stride
255 | self.channels = channels
256 | reduction_ratio = 4
257 | self.group_channels = 16
258 | self.groups = self.channels // self.group_channels
259 | self.conv1 = ConvModule(
260 | in_channels=channels,
261 | out_channels=channels // reduction_ratio,
262 | kernel_size=1,
263 | conv_cfg=None,
264 | norm_cfg=dict(type='BN'),
265 | act_cfg=dict(type='ReLU'))
266 | self.conv2 = ConvModule(
267 | in_channels=channels // reduction_ratio,
268 | out_channels=kernel_size**2 * self.groups,
269 | kernel_size=1,
270 | stride=1,
271 | conv_cfg=None,
272 | norm_cfg=None,
273 | act_cfg=None)
274 | if stride > 1:
275 | self.avgpool = nn.AvgPool2d(stride, stride)
276 |
277 | def forward(self, x):
278 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
279 | b, c, h, w = weight.shape
280 | weight = weight.view(b, self.groups, self.kernel_size, self.kernel_size, h, w)
281 | out = _involution_cuda(x, weight, stride=self.stride, padding=(self.kernel_size-1)//2)
282 | return out
283 |
--------------------------------------------------------------------------------
/seg/mmseg/models/utils/involution_naive.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from mmcv.cnn import ConvModule
3 |
4 |
5 | class involution(nn.Module):
6 |
7 | def __init__(self,
8 | channels,
9 | kernel_size,
10 | stride):
11 | super(involution, self).__init__()
12 | self.kernel_size = kernel_size
13 | self.stride = stride
14 | self.channels = channels
15 | reduction_ratio = 4
16 | self.group_channels = 16
17 | self.groups = self.channels // self.group_channels
18 | self.conv1 = ConvModule(
19 | in_channels=channels,
20 | out_channels=channels // reduction_ratio,
21 | kernel_size=1,
22 | conv_cfg=None,
23 | norm_cfg=dict(type='BN'),
24 | act_cfg=dict(type='ReLU'))
25 | self.conv2 = ConvModule(
26 | in_channels=channels // reduction_ratio,
27 | out_channels=kernel_size**2 * self.groups,
28 | kernel_size=1,
29 | stride=1,
30 | conv_cfg=None,
31 | norm_cfg=None,
32 | act_cfg=None)
33 | if stride > 1:
34 | self.avgpool = nn.AvgPool2d(stride, stride)
35 | self.unfold = nn.Unfold(kernel_size, 1, (kernel_size-1)//2, stride)
36 |
37 | def forward(self, x):
38 | weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
39 | b, c, h, w = weight.shape
40 | weight = weight.view(b, self.groups, self.kernel_size**2, h, w).unsqueeze(2)
41 | out = self.unfold(x).view(b, self.groups, self.group_channels, self.kernel_size**2, h, w)
42 | out = (weight * out).sum(dim=3).view(b, self.channels, h, w)
43 | return out
44 |
--------------------------------------------------------------------------------